From 5eb31eb5fb5b4bd5dbea03dcb992d04829dd5b8b Mon Sep 17 00:00:00 2001 From: Darpan Date: Wed, 27 May 2026 12:23:17 +0530 Subject: [PATCH 1/4] [CALCITE-7551] Project/Filter/Join transpose and merge rules should not duplicate non-deterministic expressions (e.g. RAND()) # Conflicts: # core/src/test/java/org/apache/calcite/test/SqlToRelConverterTest.java --- .../org/apache/calcite/plan/RelOptUtil.java | 15 +++++ .../rel/rules/FilterProjectTransposeRule.java | 7 ++ .../rel/rules/JoinProjectTransposeRule.java | 16 +++++ .../rules/SemiJoinProjectTransposeRule.java | 10 +++ .../apache/calcite/test/RelOptRulesTest.java | 66 +++++++++++++++++++ .../calcite/test/SqlToRelConverterTest.java | 9 +++ .../apache/calcite/test/RelOptRulesTest.xml | 46 +++++++++++++ .../calcite/test/SqlToRelConverterTest.xml | 12 ++++ 8 files changed, 181 insertions(+) diff --git a/core/src/main/java/org/apache/calcite/plan/RelOptUtil.java b/core/src/main/java/org/apache/calcite/plan/RelOptUtil.java index 9b24cf2ad201..4d4b67dad628 100644 --- a/core/src/main/java/org/apache/calcite/plan/RelOptUtil.java +++ b/core/src/main/java/org/apache/calcite/plan/RelOptUtil.java @@ -3346,6 +3346,21 @@ public static List pushPastProject(List nodes, // function? Possibly. But it's invalid SQL, so don't go there. return null; } + // [CALCITE-7551] Refuse to merge if it would duplicate a + // non-deterministic expression (e.g. RAND()). + final List bottom = project.getProjects(); + final int[] refs = new int[bottom.size()]; + new RexVisitorImpl(true) { + @Override public Void visitInputRef(RexInputRef ref) { + refs[ref.getIndex()]++; + return null; + } + }.visitEach(nodes); + for (int i = 0; i < refs.length; i++) { + if (refs[i] > 1 && !RexUtil.isDeterministic(bottom.get(i))) { + return null; + } + } final List list = pushPastProject(nodes, project); final int bottomCount = RexUtil.nodeCount(project.getProjects()); final int topCount = RexUtil.nodeCount(nodes); diff --git a/core/src/main/java/org/apache/calcite/rel/rules/FilterProjectTransposeRule.java b/core/src/main/java/org/apache/calcite/rel/rules/FilterProjectTransposeRule.java index 3c365222cd66..94fef1ebf048 100644 --- a/core/src/main/java/org/apache/calcite/rel/rules/FilterProjectTransposeRule.java +++ b/core/src/main/java/org/apache/calcite/rel/rules/FilterProjectTransposeRule.java @@ -165,6 +165,13 @@ protected FilterProjectTransposeRule( // it can be pushed down. For now we don't support this. return; } + // Pushing the filter below the project would split a single + // non-deterministic evaluation (e.g. RAND()) into two: one consumed by + // the new filter condition, and the original still produced by the + // project above. Refuse to transpose in that case. + if (!project.getProjects().stream().allMatch(RexUtil::isDeterministic)) { + return; + } // convert the filter to one that references the child of the project RexNode newCondition = RelOptUtil.pushPastProjectUnlessBloat(filter.getCondition(), project, config.bloat()); diff --git a/core/src/main/java/org/apache/calcite/rel/rules/JoinProjectTransposeRule.java b/core/src/main/java/org/apache/calcite/rel/rules/JoinProjectTransposeRule.java index 6902a5ef8054..c980269bdb44 100644 --- a/core/src/main/java/org/apache/calcite/rel/rules/JoinProjectTransposeRule.java +++ b/core/src/main/java/org/apache/calcite/rel/rules/JoinProjectTransposeRule.java @@ -36,6 +36,7 @@ import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexProgram; import org.apache.calcite.rex.RexProgramBuilder; +import org.apache.calcite.rex.RexUtil; import org.apache.calcite.sql.validate.SqlValidatorUtil; import org.apache.calcite.tools.RelBuilder; import org.apache.calcite.tools.RelBuilderFactory; @@ -151,6 +152,21 @@ public JoinProjectTransposeRule(RelOptRuleOperand operand, rightJoinChild = join.getRight(); } + // Skip projects that contain non-deterministic expressions + // (e.g. RAND). The merge below inlines projected expressions + // into the join condition via expandLocalRef, which would + // duplicate every non-deterministic call referenced more than once. + if (leftProject != null + && !leftProject.getProjects().stream().allMatch(RexUtil::isDeterministic)) { + leftProject = null; + leftJoinChild = join.getLeft(); + } + if (rightProject != null + && !rightProject.getProjects().stream().allMatch(RexUtil::isDeterministic)) { + rightProject = null; + rightJoinChild = join.getRight(); + } + if ((leftProject == null) && (rightProject == null)) { return; } diff --git a/core/src/main/java/org/apache/calcite/rel/rules/SemiJoinProjectTransposeRule.java b/core/src/main/java/org/apache/calcite/rel/rules/SemiJoinProjectTransposeRule.java index 42deabf5e3d6..8e341b36b013 100644 --- a/core/src/main/java/org/apache/calcite/rel/rules/SemiJoinProjectTransposeRule.java +++ b/core/src/main/java/org/apache/calcite/rel/rules/SemiJoinProjectTransposeRule.java @@ -31,6 +31,7 @@ import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexProgram; import org.apache.calcite.rex.RexProgramBuilder; +import org.apache.calcite.rex.RexUtil; import org.apache.calcite.sql.validate.SqlValidatorUtil; import org.apache.calcite.tools.RelBuilder; import org.apache.calcite.util.Pair; @@ -72,6 +73,15 @@ protected SemiJoinProjectTransposeRule(Config config) { final Join semiJoin = call.rel(0); final Project project = call.rel(1); + // Skip when the project contains a non-deterministic expression + // (e.g. RAND). Pulling such a project above the semi-join inlines + // its expressions into the join condition via expandLocalRef and + // then re-emits the projection above, splitting one evaluation + // into many. See [CALCITE-7551]. + if (!project.getProjects().stream().allMatch(RexUtil::isDeterministic)) { + return; + } + // Convert the LHS semi-join keys to reference the child projection // expression; all projection expressions must be RexInputRefs, // otherwise, we wouldn't have created this semi-join. diff --git a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java index 715d9f4f7b8e..d5f995b5b3bd 100644 --- a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java +++ b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java @@ -1533,6 +1533,48 @@ private void checkSemiOrAntiJoinProjectTranspose(JoinRelType type) { .check(); } + /** Test case for + * [CALCITE-7551] + * Project/Filter/Join transpose and merge rules can duplicate + * non-deterministic expressions. JoinProjectTransposeRule must + * not pull a project containing a non-deterministic expression above + * the join, because it inlines the expression into the new join + * condition via {@code mergedProgram.expandLocalRef}. */ + @Test void testJoinProjectTransposeShouldIgnoreNonDeterministic() { + final Function relFn = b -> b + .scan("EMP") + .project(b.field("EMPNO"), + b.alias(b.call(SqlStdOperatorTable.RAND), "r")) + .scan("DEPT") + .join(JoinRelType.INNER, + b.and( + b.greaterThan(b.field(2, 0, "r"), b.literal(0.0)), + b.lessThan(b.field(2, 0, "r"), b.literal(1.0)))) + .build(); + relFn(relFn).withRule(CoreRules.JOIN_PROJECT_LEFT_TRANSPOSE).checkUnchanged(); + } + + /** Test case for + * [CALCITE-7551] + * Project/Filter/Join transpose and merge rules can duplicate + * non-deterministic expressions. SemiJoinProjectTransposeRule + * uses the same {@code mergePrograms} + {@code expandLocalRef} + * pattern as JoinProjectTransposeRule, and must not pull a project + * containing a non-deterministic expression above the semi-join. */ + @Test void testSemiJoinProjectTransposeShouldIgnoreNonDeterministic() { + final Function relFn = b -> b + .scan("EMP") + .project(b.field("EMPNO"), + b.alias(b.call(SqlStdOperatorTable.RAND), "r")) + .scan("DEPT") + .join(JoinRelType.SEMI, + b.and( + b.greaterThan(b.field(2, 0, "r"), b.literal(0.0)), + b.lessThan(b.field(2, 0, "r"), b.literal(1.0)))) + .build(); + relFn(relFn).withRule(CoreRules.SEMI_JOIN_PROJECT_TRANSPOSE).checkUnchanged(); + } + /** Test case for * [CALCITE-1338] * JoinProjectTransposeRule should not pull a literal above the @@ -3204,6 +3246,18 @@ private void checkProjectCorrelateTransposeRuleSemiOrAntiCorrelate(JoinRelType t .check(); } + /** Test case for + * [CALCITE-7551] + * Project/Filter/Join transpose and merge rules can duplicate + * non-deterministic expressions. FilterProjectTransposeRule must + * not pull a filter that references a non-deterministic projected + * column below the project. */ + @Test void testFilterProjectTransposeShouldIgnoreNonDeterministic() { + final String sql = "select * from (select rand() as a from emp)\n" + + "where a > 0 and a < 1"; + sql(sql).withRule(CoreRules.FILTER_PROJECT_TRANSPOSE).checkUnchanged(); + } + private static final String NOT_STRONG_EXPR = "case when e.sal < 11 then 11 else -1 * e.sal end"; @@ -6920,6 +6974,18 @@ private HepProgram getTransitiveProgram() { sql(sql).withRule(CoreRules.PROJECT_MERGE).checkUnchanged(); } + /** Test case for + * [CALCITE-7551] + * Project/Filter/Join transpose and merge rules can duplicate + * non-deterministic expressions. ProjectMergeRule must not merge + * adjacent projects when doing so would duplicate a non-deterministic + * expression. */ + @Test void testProjectMergeShouldIgnoreNonDeterministic() { + final String sql = "select a, a + 1 as b from (select rand() as a from emp)"; + sql(sql).withRule(CoreRules.PROJECT_MERGE).checkUnchanged(); + } + + @Test void testAggregateProjectPullUpConstants() { final String sql = "select job, empno, sal, sum(sal) as s\n" + "from emp where empno = 10\n" diff --git a/core/src/test/java/org/apache/calcite/test/SqlToRelConverterTest.java b/core/src/test/java/org/apache/calcite/test/SqlToRelConverterTest.java index 4e8d17cd7a27..5a2ac594a229 100644 --- a/core/src/test/java/org/apache/calcite/test/SqlToRelConverterTest.java +++ b/core/src/test/java/org/apache/calcite/test/SqlToRelConverterTest.java @@ -6179,4 +6179,13 @@ void checkUserDefinedOrderByOver(NullCollation nullCollation) { assertThat(plan, not(containsString("FLOOR(FLOOR"))); assertThat(plan, containsString("FLOOR($4, FLAG(WEEK))")); } + + /** Test case of + * [CALCITE-7551] + * Non-deterministic expressions (e.g. {@code RAND()}) should not be + * duplicated when projections are merged. */ + @Test void testRandNotDuplicatedInProjectionMerge() { + final String sql = "select a, a + 1 as b from (select rand() as a)"; + sql(sql).ok(); + } } diff --git a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml index c782bb913cc1..6ed01795ab42 100644 --- a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml +++ b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml @@ -5903,6 +5903,20 @@ LogicalAggregate(group=[{}], EXPR$0=[COUNT()]) LogicalTableScan(table=[[CATALOG, SALES, EMP]]) }))]) LogicalTableScan(table=[[CATALOG, SALES, EMP]]) +]]> + + + + + 0 and a < 1]]> + + + ($0, CAST(0):DOUBLE NOT NULL), <($0, CAST(1):DOUBLE NOT NULL))]) + LogicalProject(A=[RAND()]) + LogicalTableScan(table=[[CATALOG, SALES, EMP]]) ]]> @@ -8445,6 +8459,16 @@ LogicalProject(DEPTNO=[$0], NAME=[$1], NAME0=[$2], EXPR$1=[$3]) LogicalJoin(condition=[=($1, $3)], joinType=[left]) LogicalTableScan(table=[[CATALOG, SALES, DEPT]]) LogicalTableScan(table=[[CATALOG, SALES, DEPT]]) +]]> + + + + + @@ -11947,6 +11971,18 @@ LogicalProject(EXPR$0=[+($0, 1)]) })]) LogicalProject(X=[ARRAY(1, 2, 3)]) LogicalValues(tuples=[[{ 0 }]]) +]]> + + + + + + + + @@ -17971,6 +18007,16 @@ LogicalProject(DNAME=[$1]) LogicalAggregate(group=[{0}]) LogicalProject($f0=[*(2, $0)]) LogicalTableScan(table=[[scott, DEPT]]) +]]> + + + + + diff --git a/core/src/test/resources/org/apache/calcite/test/SqlToRelConverterTest.xml b/core/src/test/resources/org/apache/calcite/test/SqlToRelConverterTest.xml index 5ee7180efa75..5cbf6d1f1243 100644 --- a/core/src/test/resources/org/apache/calcite/test/SqlToRelConverterTest.xml +++ b/core/src/test/resources/org/apache/calcite/test/SqlToRelConverterTest.xml @@ -7277,6 +7277,18 @@ LogicalProject(EXPR$0=[= SOME(1970-01-01 01:23:45, ARRAY(1970-01-01 01:23:45, 19 + + + + + + + + From 9a3f9318a70ea4ccdd7a5becaeaeffdfa609f51a Mon Sep 17 00:00:00 2001 From: Darpan Date: Thu, 28 May 2026 10:13:26 +0530 Subject: [PATCH 2/4] [CALCITE-7551] Only block filter-project transpose when the filter references a non-deterministic projected column --- .../rel/rules/FilterProjectTransposeRule.java | 17 +++++---- .../apache/calcite/test/RelOptRulesTest.java | 35 +++++++++++++++++-- .../apache/calcite/test/RelOptRulesTest.xml | 25 +++++++++---- 3 files changed, 61 insertions(+), 16 deletions(-) diff --git a/core/src/main/java/org/apache/calcite/rel/rules/FilterProjectTransposeRule.java b/core/src/main/java/org/apache/calcite/rel/rules/FilterProjectTransposeRule.java index 94fef1ebf048..18b2aa9a5ff3 100644 --- a/core/src/main/java/org/apache/calcite/rel/rules/FilterProjectTransposeRule.java +++ b/core/src/main/java/org/apache/calcite/rel/rules/FilterProjectTransposeRule.java @@ -165,12 +165,17 @@ protected FilterProjectTransposeRule( // it can be pushed down. For now we don't support this. return; } - // Pushing the filter below the project would split a single - // non-deterministic evaluation (e.g. RAND()) into two: one consumed by - // the new filter condition, and the original still produced by the - // project above. Refuse to transpose in that case. - if (!project.getProjects().stream().allMatch(RexUtil::isDeterministic)) { - return; + // Refuse to transpose if the filter references a projected column whose + // expression is non-deterministic (e.g. RAND()). Pushing the filter + // below the project would inline that expression into the new filter + // condition while the original is still produced by the project above, + // splitting one evaluation into two. References to deterministic columns + // (even when other columns are non-deterministic) are safe to push. + final List projects = project.getProjects(); + for (int ref : RelOptUtil.InputFinder.bits(filter.getCondition())) { + if (!RexUtil.isDeterministic(projects.get(ref))) { + return; + } } // convert the filter to one that references the child of the project RexNode newCondition = diff --git a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java index d5f995b5b3bd..4a89b831660a 100644 --- a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java +++ b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java @@ -3253,9 +3253,38 @@ private void checkProjectCorrelateTransposeRuleSemiOrAntiCorrelate(JoinRelType t * not pull a filter that references a non-deterministic projected * column below the project. */ @Test void testFilterProjectTransposeShouldIgnoreNonDeterministic() { - final String sql = "select * from (select rand() as a from emp)\n" - + "where a > 0 and a < 1"; - sql(sql).withRule(CoreRules.FILTER_PROJECT_TRANSPOSE).checkUnchanged(); + // Filter(a > 0 AND a < 1) over Project(a = RAND()). The filter + // references the non-deterministic column, so the transpose must be + // refused: otherwise the pushed-down filter and the re-emitted project + // would each evaluate RAND() independently. + final Function relFn = b -> b + .scan("EMP") + .project(b.alias(b.call(SqlStdOperatorTable.RAND), "a")) + .filter( + b.and( + b.greaterThan(b.field("a"), b.literal(0.0)), + b.lessThan(b.field("a"), b.literal(1.0)))) + .build(); + relFn(relFn).withRule(CoreRules.FILTER_PROJECT_TRANSPOSE).checkUnchanged(); + } + + /** Test case for + * [CALCITE-7551] + * Project/Filter/Join transpose and merge rules can duplicate + * non-deterministic expressions. The transpose is still allowed when + * the filter only references deterministic projected columns, even if + * other columns in the project are non-deterministic (here {@code r} is + * RAND() but the filter is on {@code b}). */ + @Test void testFilterProjectTransposeWithUnrelatedNonDeterministic() { + // Filter(b > 0) over Project(r = RAND(), b = DEPTNO). The filter only + // references the deterministic column b, so the transpose is safe. + final Function relFn = b -> b + .scan("EMP") + .project(b.alias(b.call(SqlStdOperatorTable.RAND), "r"), + b.alias(b.field("DEPTNO"), "b")) + .filter(b.greaterThan(b.field("b"), b.literal(0))) + .build(); + relFn(relFn).withRule(CoreRules.FILTER_PROJECT_TRANSPOSE).check(); } private static final String NOT_STRONG_EXPR = diff --git a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml index 6ed01795ab42..5cbb81e22a35 100644 --- a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml +++ b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml @@ -5907,16 +5907,27 @@ LogicalAggregate(group=[{}], EXPR$0=[COUNT()]) - - 0 and a < 1]]> + + + + ($0, CAST(0):DOUBLE NOT NULL), <($0, CAST(1):DOUBLE NOT NULL))]) - LogicalProject(A=[RAND()]) - LogicalTableScan(table=[[CATALOG, SALES, EMP]]) +LogicalFilter(condition=[>($1, 0)]) + LogicalProject(r=[RAND()], b=[$7]) + LogicalTableScan(table=[[scott, EMP]]) +]]> + + + ($7, 0)]) + LogicalTableScan(table=[[scott, EMP]]) ]]> From 51e6c7a137927ef660c7c700c7182b2a1ec89579 Mon Sep 17 00:00:00 2001 From: Darpan Date: Thu, 28 May 2026 11:31:08 +0530 Subject: [PATCH 3/4] [CALCITE-7551] Only block join/semi-join project transpose when the condition references a non-deterministic projected column --- .../rel/rules/JoinProjectTransposeRule.java | 34 ++++++++++++--- .../rules/SemiJoinProjectTransposeRule.java | 22 ++++++---- .../apache/calcite/test/RelOptRulesTest.java | 41 ++++++++++++++++++- .../apache/calcite/test/RelOptRulesTest.xml | 36 ++++++++++++++++ 4 files changed, 119 insertions(+), 14 deletions(-) diff --git a/core/src/main/java/org/apache/calcite/rel/rules/JoinProjectTransposeRule.java b/core/src/main/java/org/apache/calcite/rel/rules/JoinProjectTransposeRule.java index c980269bdb44..5a4fc4c32713 100644 --- a/core/src/main/java/org/apache/calcite/rel/rules/JoinProjectTransposeRule.java +++ b/core/src/main/java/org/apache/calcite/rel/rules/JoinProjectTransposeRule.java @@ -40,6 +40,7 @@ import org.apache.calcite.sql.validate.SqlValidatorUtil; import org.apache.calcite.tools.RelBuilder; import org.apache.calcite.tools.RelBuilderFactory; +import org.apache.calcite.util.ImmutableBitSet; import org.apache.calcite.util.Pair; import org.checkerframework.checker.nullness.qual.Nullable; @@ -113,6 +114,22 @@ public JoinProjectTransposeRule(RelOptRuleOperand operand, //~ Methods ---------------------------------------------------------------- + /** Returns whether {@code conditionRefs} (input references of the join + * condition, expressed against the join's combined output) references a + * non-deterministic expression of {@code project}, whose first output + * field is at {@code offset} in that combined output. */ + private static boolean referencesNonDeterministic(Project project, + ImmutableBitSet conditionRefs, int offset) { + final List exprs = project.getProjects(); + for (int i = 0; i < exprs.size(); i++) { + if (conditionRefs.get(offset + i) + && !RexUtil.isDeterministic(exprs.get(i))) { + return true; + } + } + return false; + } + @Override public void onMatch(RelOptRuleCall call) { final Join join = call.rel(0); final JoinRelType joinType = join.getJoinType(); @@ -152,17 +169,22 @@ public JoinProjectTransposeRule(RelOptRuleOperand operand, rightJoinChild = join.getRight(); } - // Skip projects that contain non-deterministic expressions - // (e.g. RAND). The merge below inlines projected expressions - // into the join condition via expandLocalRef, which would - // duplicate every non-deterministic call referenced more than once. + // Skip a project when the join condition references one of its + // non-deterministic expressions (e.g. RAND()). The merge below inlines + // that expression into the new join condition via expandLocalRef while + // the project still re-emits it above, splitting one evaluation into + // two. Non-deterministic columns that the condition does not reference + // are safe to pull up. + final ImmutableBitSet conditionRefs = + RelOptUtil.InputFinder.bits(join.getCondition()); + final int nLeftFields = join.getLeft().getRowType().getFieldCount(); if (leftProject != null - && !leftProject.getProjects().stream().allMatch(RexUtil::isDeterministic)) { + && referencesNonDeterministic(leftProject, conditionRefs, 0)) { leftProject = null; leftJoinChild = join.getLeft(); } if (rightProject != null - && !rightProject.getProjects().stream().allMatch(RexUtil::isDeterministic)) { + && referencesNonDeterministic(rightProject, conditionRefs, nLeftFields)) { rightProject = null; rightJoinChild = join.getRight(); } diff --git a/core/src/main/java/org/apache/calcite/rel/rules/SemiJoinProjectTransposeRule.java b/core/src/main/java/org/apache/calcite/rel/rules/SemiJoinProjectTransposeRule.java index 8e341b36b013..e5be603d9e40 100644 --- a/core/src/main/java/org/apache/calcite/rel/rules/SemiJoinProjectTransposeRule.java +++ b/core/src/main/java/org/apache/calcite/rel/rules/SemiJoinProjectTransposeRule.java @@ -17,6 +17,7 @@ package org.apache.calcite.rel.rules; import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.plan.RelRule; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Join; @@ -34,6 +35,7 @@ import org.apache.calcite.rex.RexUtil; import org.apache.calcite.sql.validate.SqlValidatorUtil; import org.apache.calcite.tools.RelBuilder; +import org.apache.calcite.util.ImmutableBitSet; import org.apache.calcite.util.Pair; import com.google.common.collect.ImmutableList; @@ -73,13 +75,19 @@ protected SemiJoinProjectTransposeRule(Config config) { final Join semiJoin = call.rel(0); final Project project = call.rel(1); - // Skip when the project contains a non-deterministic expression - // (e.g. RAND). Pulling such a project above the semi-join inlines - // its expressions into the join condition via expandLocalRef and - // then re-emits the projection above, splitting one evaluation - // into many. See [CALCITE-7551]. - if (!project.getProjects().stream().allMatch(RexUtil::isDeterministic)) { - return; + // Skip when the semi-join condition references one of the project's + // non-deterministic expressions (e.g. RAND()). Pulling such a project + // above the semi-join inlines that expression into the join condition + // via expandLocalRef while the project still re-emits it above, + // splitting one evaluation into two. Non-deterministic columns that the + // condition does not reference are safe to pull up. See [CALCITE-7551]. + final ImmutableBitSet conditionRefs = + RelOptUtil.InputFinder.bits(semiJoin.getCondition()); + final List projects = project.getProjects(); + for (int i = 0; i < projects.size(); i++) { + if (conditionRefs.get(i) && !RexUtil.isDeterministic(projects.get(i))) { + return; + } } // Convert the LHS semi-join keys to reference the child projection diff --git a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java index 4a89b831660a..980c976dda87 100644 --- a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java +++ b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java @@ -1554,13 +1554,33 @@ private void checkSemiOrAntiJoinProjectTranspose(JoinRelType type) { relFn(relFn).withRule(CoreRules.JOIN_PROJECT_LEFT_TRANSPOSE).checkUnchanged(); } + /** Test case for + * [CALCITE-7551] + * Project/Filter/Join transpose and merge rules can duplicate + * non-deterministic expressions. The transpose is still allowed when + * the join condition only references deterministic projected columns, + * even if the project also computes a non-deterministic column (here + * {@code r} is RAND() but the join is on DEPTNO). */ + @Test void testJoinProjectTransposeWithUnrelatedNonDeterministic() { + final Function relFn = b -> b + .scan("EMP") + .project(b.alias(b.call(SqlStdOperatorTable.RAND), "r"), + b.field("DEPTNO")) + .scan("DEPT") + .join(JoinRelType.INNER, + b.equals(b.field(2, 0, "DEPTNO"), b.field(2, 1, "DEPTNO"))) + .build(); + relFn(relFn).withRule(CoreRules.JOIN_PROJECT_LEFT_TRANSPOSE).check(); + } + /** Test case for * [CALCITE-7551] * Project/Filter/Join transpose and merge rules can duplicate * non-deterministic expressions. SemiJoinProjectTransposeRule * uses the same {@code mergePrograms} + {@code expandLocalRef} * pattern as JoinProjectTransposeRule, and must not pull a project - * containing a non-deterministic expression above the semi-join. */ + * above the semi-join when the condition references one of its + * non-deterministic expressions. */ @Test void testSemiJoinProjectTransposeShouldIgnoreNonDeterministic() { final Function relFn = b -> b .scan("EMP") @@ -1575,6 +1595,25 @@ private void checkSemiOrAntiJoinProjectTranspose(JoinRelType type) { relFn(relFn).withRule(CoreRules.SEMI_JOIN_PROJECT_TRANSPOSE).checkUnchanged(); } + /** Test case for + * [CALCITE-7551] + * Project/Filter/Join transpose and merge rules can duplicate + * non-deterministic expressions. The semi-join transpose is still + * allowed when the condition only references deterministic projected + * columns, even if the project also computes a non-deterministic column + * (here {@code r} is RAND() but the semi-join is on DEPTNO). */ + @Test void testSemiJoinProjectTransposeWithUnrelatedNonDeterministic() { + final Function relFn = b -> b + .scan("EMP") + .project(b.alias(b.call(SqlStdOperatorTable.RAND), "r"), + b.field("DEPTNO")) + .scan("DEPT") + .join(JoinRelType.SEMI, + b.equals(b.field(2, 0, "DEPTNO"), b.field(2, 1, "DEPTNO"))) + .build(); + relFn(relFn).withRule(CoreRules.SEMI_JOIN_PROJECT_TRANSPOSE).check(); + } + /** Test case for * [CALCITE-1338] * JoinProjectTransposeRule should not pull a literal above the diff --git a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml index 5cbb81e22a35..4e9e1e68cd34 100644 --- a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml +++ b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml @@ -8508,6 +8508,24 @@ LogicalProject(DEPTNO=[$0], NAME=[$1], R=[$3], EXPR$1=[$4]) LogicalTableScan(table=[[CATALOG, SALES, DEPT]]) LogicalProject(R=[RANK() OVER (ORDER BY $1)], EXPR$1=[+(1, 1)]) LogicalTableScan(table=[[CATALOG, SALES, DEPT]]) +]]> + + + + + + + + @@ -18028,6 +18046,24 @@ LogicalJoin(condition=[SEARCH($1, Sarg[(0.0E0:DOUBLE..1.0E0:DOUBLE)]:DOUBLE)], j LogicalProject(EMPNO=[$0], r=[RAND()]) LogicalTableScan(table=[[scott, EMP]]) LogicalTableScan(table=[[scott, DEPT]]) +]]> + + + + + + + + From 241bdab150286d11569efd04194139c60d8ce4d9 Mon Sep 17 00:00:00 2001 From: Darpan Date: Sat, 6 Jun 2026 12:16:47 +0530 Subject: [PATCH 4/4] [CALCITE-7551] Convert CALCITE-7551 transpose tests from RelBuilder to SQL --- .../apache/calcite/test/RelOptRulesTest.java | 90 ++++++----------- .../apache/calcite/test/RelOptRulesTest.xml | 98 +++++++++++++------ 2 files changed, 94 insertions(+), 94 deletions(-) diff --git a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java index 980c976dda87..0c5f707b5625 100644 --- a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java +++ b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java @@ -1541,17 +1541,9 @@ private void checkSemiOrAntiJoinProjectTranspose(JoinRelType type) { * the join, because it inlines the expression into the new join * condition via {@code mergedProgram.expandLocalRef}. */ @Test void testJoinProjectTransposeShouldIgnoreNonDeterministic() { - final Function relFn = b -> b - .scan("EMP") - .project(b.field("EMPNO"), - b.alias(b.call(SqlStdOperatorTable.RAND), "r")) - .scan("DEPT") - .join(JoinRelType.INNER, - b.and( - b.greaterThan(b.field(2, 0, "r"), b.literal(0.0)), - b.lessThan(b.field(2, 0, "r"), b.literal(1.0)))) - .build(); - relFn(relFn).withRule(CoreRules.JOIN_PROJECT_LEFT_TRANSPOSE).checkUnchanged(); + final String sql = "select * from (select empno, rand() as r from emp) e\n" + + "join dept d on e.r = d.deptno"; + sql(sql).withRule(CoreRules.JOIN_PROJECT_LEFT_TRANSPOSE).checkUnchanged(); } /** Test case for @@ -1562,15 +1554,9 @@ private void checkSemiOrAntiJoinProjectTranspose(JoinRelType type) { * even if the project also computes a non-deterministic column (here * {@code r} is RAND() but the join is on DEPTNO). */ @Test void testJoinProjectTransposeWithUnrelatedNonDeterministic() { - final Function relFn = b -> b - .scan("EMP") - .project(b.alias(b.call(SqlStdOperatorTable.RAND), "r"), - b.field("DEPTNO")) - .scan("DEPT") - .join(JoinRelType.INNER, - b.equals(b.field(2, 0, "DEPTNO"), b.field(2, 1, "DEPTNO"))) - .build(); - relFn(relFn).withRule(CoreRules.JOIN_PROJECT_LEFT_TRANSPOSE).check(); + final String sql = "select * from (select rand() as r, deptno from emp) e\n" + + "join dept d on e.deptno = d.deptno"; + sql(sql).withRule(CoreRules.JOIN_PROJECT_LEFT_TRANSPOSE).check(); } /** Test case for @@ -1582,17 +1568,14 @@ private void checkSemiOrAntiJoinProjectTranspose(JoinRelType type) { * above the semi-join when the condition references one of its * non-deterministic expressions. */ @Test void testSemiJoinProjectTransposeShouldIgnoreNonDeterministic() { - final Function relFn = b -> b - .scan("EMP") - .project(b.field("EMPNO"), - b.alias(b.call(SqlStdOperatorTable.RAND), "r")) - .scan("DEPT") - .join(JoinRelType.SEMI, - b.and( - b.greaterThan(b.field(2, 0, "r"), b.literal(0.0)), - b.lessThan(b.field(2, 0, "r"), b.literal(1.0)))) - .build(); - relFn(relFn).withRule(CoreRules.SEMI_JOIN_PROJECT_TRANSPOSE).checkUnchanged(); + final String sql = "select * from (select empno, rand() as r from emp) e\n" + + "where e.r in (select sal from emp)"; + sql(sql) + .withDecorrelate(false) + .withExpand(true) + .withPreRule(CoreRules.PROJECT_TO_SEMI_JOIN) + .withRule(CoreRules.SEMI_JOIN_PROJECT_TRANSPOSE) + .checkUnchanged(); } /** Test case for @@ -1603,15 +1586,14 @@ private void checkSemiOrAntiJoinProjectTranspose(JoinRelType type) { * columns, even if the project also computes a non-deterministic column * (here {@code r} is RAND() but the semi-join is on DEPTNO). */ @Test void testSemiJoinProjectTransposeWithUnrelatedNonDeterministic() { - final Function relFn = b -> b - .scan("EMP") - .project(b.alias(b.call(SqlStdOperatorTable.RAND), "r"), - b.field("DEPTNO")) - .scan("DEPT") - .join(JoinRelType.SEMI, - b.equals(b.field(2, 0, "DEPTNO"), b.field(2, 1, "DEPTNO"))) - .build(); - relFn(relFn).withRule(CoreRules.SEMI_JOIN_PROJECT_TRANSPOSE).check(); + final String sql = "select * from (select rand() as r, deptno from emp) e\n" + + "where e.deptno in (select deptno from dept)"; + sql(sql) + .withDecorrelate(false) + .withExpand(true) + .withPreRule(CoreRules.PROJECT_TO_SEMI_JOIN) + .withRule(CoreRules.SEMI_JOIN_PROJECT_TRANSPOSE) + .check(); } /** Test case for @@ -3292,19 +3274,9 @@ private void checkProjectCorrelateTransposeRuleSemiOrAntiCorrelate(JoinRelType t * not pull a filter that references a non-deterministic projected * column below the project. */ @Test void testFilterProjectTransposeShouldIgnoreNonDeterministic() { - // Filter(a > 0 AND a < 1) over Project(a = RAND()). The filter - // references the non-deterministic column, so the transpose must be - // refused: otherwise the pushed-down filter and the re-emitted project - // would each evaluate RAND() independently. - final Function relFn = b -> b - .scan("EMP") - .project(b.alias(b.call(SqlStdOperatorTable.RAND), "a")) - .filter( - b.and( - b.greaterThan(b.field("a"), b.literal(0.0)), - b.lessThan(b.field("a"), b.literal(1.0)))) - .build(); - relFn(relFn).withRule(CoreRules.FILTER_PROJECT_TRANSPOSE).checkUnchanged(); + final String sql = "select * from (select rand() as a from emp)\n" + + "where a > 0 and a < 1"; + sql(sql).withRule(CoreRules.FILTER_PROJECT_TRANSPOSE).checkUnchanged(); } /** Test case for @@ -3315,15 +3287,9 @@ private void checkProjectCorrelateTransposeRuleSemiOrAntiCorrelate(JoinRelType t * other columns in the project are non-deterministic (here {@code r} is * RAND() but the filter is on {@code b}). */ @Test void testFilterProjectTransposeWithUnrelatedNonDeterministic() { - // Filter(b > 0) over Project(r = RAND(), b = DEPTNO). The filter only - // references the deterministic column b, so the transpose is safe. - final Function relFn = b -> b - .scan("EMP") - .project(b.alias(b.call(SqlStdOperatorTable.RAND), "r"), - b.alias(b.field("DEPTNO"), "b")) - .filter(b.greaterThan(b.field("b"), b.literal(0))) - .build(); - relFn(relFn).withRule(CoreRules.FILTER_PROJECT_TRANSPOSE).check(); + final String sql = "select * from (select rand() as r, deptno as b from emp)\n" + + "where b > 0"; + sql(sql).withRule(CoreRules.FILTER_PROJECT_TRANSPOSE).check(); } private static final String NOT_STRONG_EXPR = diff --git a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml index 4e9e1e68cd34..56cb01846bd5 100644 --- a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml +++ b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml @@ -5907,27 +5907,38 @@ LogicalAggregate(group=[{}], EXPR$0=[COUNT()]) + + 0 and a < 1]]> + ($0, CAST(0):DOUBLE NOT NULL), <($0, CAST(1):DOUBLE NOT NULL))]) + LogicalProject(A=[RAND()]) + LogicalTableScan(table=[[CATALOG, SALES, EMP]]) ]]> + + 0]]> + ($1, 0)]) - LogicalProject(r=[RAND()], b=[$7]) - LogicalTableScan(table=[[scott, EMP]]) +LogicalProject(R=[$0], B=[$1]) + LogicalFilter(condition=[>($1, 0)]) + LogicalProject(R=[RAND()], B=[$7]) + LogicalTableScan(table=[[CATALOG, SALES, EMP]]) ]]> ($7, 0)]) - LogicalTableScan(table=[[scott, EMP]]) +LogicalProject(R=[$0], B=[$1]) + LogicalProject(R=[RAND()], B=[$7]) + LogicalFilter(condition=[>($7, 0)]) + LogicalTableScan(table=[[CATALOG, SALES, EMP]]) ]]> @@ -8474,12 +8485,18 @@ LogicalProject(DEPTNO=[$0], NAME=[$1], NAME0=[$2], EXPR$1=[$3]) + + + @@ -8512,20 +8529,26 @@ LogicalProject(DEPTNO=[$0], NAME=[$1], R=[$3], EXPR$1=[$4]) + + + @@ -18040,30 +18063,41 @@ LogicalProject(DNAME=[$1]) + + + + + +