OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
OuterJoinOptViaNullRejectionRule.java
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package org.apache.calcite.rel.rules;
18 
19 import org.apache.calcite.plan.RelOptRuleCall;
20 import org.apache.calcite.plan.hep.HepRelVertex;
21 import org.apache.calcite.rel.RelNode;
22 import org.apache.calcite.rel.core.Join;
23 import org.apache.calcite.rel.core.JoinRelType;
24 import org.apache.calcite.rel.logical.LogicalFilter;
25 import org.apache.calcite.rel.logical.LogicalJoin;
26 import org.apache.calcite.rel.logical.LogicalProject;
27 import org.apache.calcite.rel.logical.LogicalTableScan;
28 import org.apache.calcite.rex.RexCall;
29 import org.apache.calcite.rex.RexInputRef;
30 import org.apache.calcite.rex.RexLiteral;
31 import org.apache.calcite.rex.RexNode;
32 import org.apache.calcite.sql.SqlKind;
33 import org.apache.calcite.tools.RelBuilder;
34 import org.apache.calcite.tools.RelBuilderFactory;
35 import org.apache.calcite.util.mapping.Mappings;
36 import org.slf4j.Logger;
37 import org.slf4j.LoggerFactory;
38 
39 import java.util.ArrayList;
40 import java.util.HashMap;
41 import java.util.HashSet;
42 import java.util.List;
43 import java.util.Map;
44 import java.util.Set;
45 
47  // goal: relax full outer join to either left or inner joins
48  // consider two tables 'foo(a int, b int)' and 'bar(c int, d int)'
49  // foo = {(1,3), (2,4), (NULL, 5)} // bar = {(1,2), (4, 3), (NULL, 5)}
50 
51  // 1. full outer join -> left
52  // : select * from foo full outer join bar on a = c where a is not null;
53  // = select * from foo left outer join bar on a = c where a is not null;
54 
55  // 2. full outer join -> inner
56  // : select * from foo full outer join bar on a = c where a is not null and c is
57  // not null; = select * from foo join bar on a = c; (or select * from foo, bar
58  // where a = c;)
59 
60  // 3. left outer join --> inner
61  // : select * from foo left outer join bar on a = c where c is not null;
62  // = select * from foo join bar on a = c; (or select * from foo, bar where a = c;)
63 
64  // null rejection: "col IS NOT NULL" or "col > NULL_INDICATOR" in WHERE clause
65  // i.e., col > 1 must reject any tuples having null value in a col column
66 
67  // todo(yoonmin): runtime query optimization via statistic
68  // in fact, we can optimize more broad range of the query having outer joins
69  // by using filter predicates on join tables (but not on join cols)
70  // because such filter conditions could affect join tables and
71  // they can make join cols to be null rejected
72 
73  public static Set<String> visitedJoinMemo = new HashSet<>();
74  final static Logger HEAVYDBLOGGER =
75  LoggerFactory.getLogger(OuterJoinOptViaNullRejectionRule.class);
76 
77  public OuterJoinOptViaNullRejectionRule(RelBuilderFactory relBuilderFactory) {
78  super(operand(RelNode.class, operand(Join.class, null, any())),
79  relBuilderFactory,
80  "OuterJoinOptViaNullRejectionRule");
81  clearMemo();
82  }
83 
84  void clearMemo() {
85  visitedJoinMemo.clear();
86  }
87 
88  @Override
89  public void onMatch(RelOptRuleCall call) {
90  RelNode parentNode = call.rel(0);
91  LogicalJoin join = (LogicalJoin) call.rel(1);
92  String condString = join.getCondition().toString();
93  if (visitedJoinMemo.contains(condString)) {
94  return;
95  } else {
96  visitedJoinMemo.add(condString);
97  }
98  if (!(join.getCondition() instanceof RexCall)) {
99  return; // an inner join
100  }
101  if (join.getJoinType() == JoinRelType.INNER || join.getJoinType() == JoinRelType.SEMI
102  || join.getJoinType() == JoinRelType.ANTI) {
103  return; // non target
104  }
105  RelNode joinLeftChild = ((HepRelVertex) join.getLeft()).getCurrentRel();
106  RelNode joinRightChild = ((HepRelVertex) join.getRight()).getCurrentRel();
107  if (joinLeftChild instanceof LogicalProject) {
108  return; // disable this opt when LHS has subquery (i.e., filter push-down)
109  }
110  if (!(joinRightChild instanceof LogicalTableScan)) {
111  return; // disable this opt when RHS has subquery (i.e., filter push-down)
112  }
113  // an outer join contains its join cond in itself,
114  // not in a filter as typical inner join op. does
115  RexCall joinCond = (RexCall) join.getCondition();
116  Set<Integer> leftJoinCols = new HashSet<>();
117  Set<Integer> rightJoinCols = new HashSet<>();
118  Map<Integer, String> leftJoinColToColNameMap = new HashMap<>();
119  Map<Integer, String> rightJoinColToColNameMap = new HashMap<>();
120  Set<Integer> originalLeftJoinCols = new HashSet<>();
121  Set<Integer> originalRightJoinCols = new HashSet<>();
122  Map<Integer, String> originalLeftJoinColToColNameMap = new HashMap<>();
123  Map<Integer, String> originalRightJoinColToColNameMap = new HashMap<>();
124  List<RexCall> capturedFilterPredFromJoin = new ArrayList<>();
125  if (joinCond.getKind() == SqlKind.EQUALS) {
126  addJoinCols(joinCond,
127  join,
128  leftJoinCols,
129  rightJoinCols,
130  leftJoinColToColNameMap,
131  rightJoinColToColNameMap,
132  originalLeftJoinCols,
133  originalRightJoinCols,
134  originalLeftJoinColToColNameMap,
135  originalRightJoinColToColNameMap);
136  // we only consider ANDED exprs
137  } else if (joinCond.getKind() == SqlKind.AND) {
138  for (RexNode n : joinCond.getOperands()) {
139  if (n instanceof RexCall) {
140  RexCall op = (RexCall) n;
141  if (op.getOperands().size() > 2
142  && op.getOperands().get(1) instanceof RexLiteral) {
143  // try to capture literal comparison of join column located in the cur join
144  // node
145  capturedFilterPredFromJoin.add(op);
146  continue;
147  }
148  addJoinCols(op,
149  join,
150  leftJoinCols,
151  rightJoinCols,
152  leftJoinColToColNameMap,
153  rightJoinColToColNameMap,
154  originalLeftJoinCols,
155  originalRightJoinCols,
156  originalLeftJoinColToColNameMap,
157  originalRightJoinColToColNameMap);
158  }
159  }
160  }
161 
162  if (leftJoinCols.isEmpty() || rightJoinCols.isEmpty()) {
163  return;
164  }
165 
166  // find filter node(s)
167  RelNode root = call.getPlanner().getRoot();
168  List<LogicalFilter> collectedFilterNodes = new ArrayList<>();
169  RelNode curNode = root;
170  final RelBuilder relBuilder = call.builder();
171  // collect filter nodes
172  collectFilterCondition(curNode, collectedFilterNodes);
173  if (collectedFilterNodes.isEmpty()) {
174  // we have a last chance to take a look at this join condition itself
175  // i.e., the filter preds lay with the join conditions in the same join node
176  // but for now we disable the optimization to avoid unexpected plan issue
177  return;
178  }
179 
180  // check whether join column has filter predicate(s)
181  // and collect join column info used in target join nodes to be translated
182  Set<Integer> nullRejectedLeftJoinCols = new HashSet<>();
183  Set<Integer> nullRejectedRightJoinCols = new HashSet<>();
184  boolean hasExprsConnectedViaOR = false;
185  for (LogicalFilter filter : collectedFilterNodes) {
186  RexNode node = filter.getCondition();
187  if (node instanceof RexCall) {
188  RexCall curExpr = (RexCall) node;
189  // we only consider ANDED exprs
190  if (curExpr.getKind() == SqlKind.OR) {
191  hasExprsConnectedViaOR = true;
192  break;
193  }
194  if (curExpr.getKind() == SqlKind.AND) {
195  for (RexNode n : curExpr.getOperands()) {
196  if (n instanceof RexCall) {
197  RexCall c = (RexCall) n;
198  if (isCandidateFilterPred(c)
199  && c.getOperands().get(0) instanceof RexInputRef) {
200  RexInputRef col = (RexInputRef) c.getOperands().get(0);
201  int colId = col.getIndex();
202  boolean leftFilter = leftJoinCols.contains(colId);
203  boolean rightFilter = rightJoinCols.contains(colId);
204  if (leftFilter && rightFilter) {
205  // here we currently do not have a concrete column tracing logic
206  // so it may become a source of plan issue, so we disable this opt
207  return;
208  }
210  filter,
211  nullRejectedLeftJoinCols,
212  nullRejectedRightJoinCols,
213  leftJoinColToColNameMap,
214  rightJoinColToColNameMap);
215  }
216  }
217  }
218  } else {
219  if (curExpr instanceof RexCall) {
220  if (isCandidateFilterPred(curExpr)
221  && curExpr.getOperands().get(0) instanceof RexInputRef) {
222  RexInputRef col = (RexInputRef) curExpr.getOperands().get(0);
223  int colId = col.getIndex();
224  boolean leftFilter = leftJoinCols.contains(colId);
225  boolean rightFilter = rightJoinCols.contains(colId);
226  if (leftFilter && rightFilter) {
227  // here we currently do not have a concrete column tracing logic
228  // so it may become a source of plan issue, so we disable this opt
229  return;
230  }
231  addNullRejectedJoinCols(curExpr,
232  filter,
233  nullRejectedLeftJoinCols,
234  nullRejectedRightJoinCols,
235  leftJoinColToColNameMap,
236  rightJoinColToColNameMap);
237  }
238  }
239  }
240  }
241  }
242 
243  // we skip to optimize this query since analyzing complex filter exprs
244  // connected via OR condition is complex and risky
245  if (hasExprsConnectedViaOR) {
246  return;
247  }
248 
249  if (!capturedFilterPredFromJoin.isEmpty()) {
250  for (RexCall c : capturedFilterPredFromJoin) {
251  if (c.getOperands().get(0) instanceof RexInputRef) {
252  RexInputRef col = (RexInputRef) c.getOperands().get(0);
253  int colId = col.getIndex();
254  String colName = join.getRowType().getFieldNames().get(colId);
255  Boolean l = false;
256  Boolean r = false;
257  if (originalLeftJoinColToColNameMap.containsKey(colId)
258  && originalLeftJoinColToColNameMap.get(colId).equals(colName)) {
259  l = true;
260  }
261  if (originalRightJoinColToColNameMap.containsKey(colId)
262  && originalRightJoinColToColNameMap.get(colId).equals(colName)) {
263  r = true;
264  }
265  if (l && !r) {
266  nullRejectedLeftJoinCols.add(colId);
267  } else if (r && !l) {
268  nullRejectedRightJoinCols.add(colId);
269  } else if (r && l) {
270  return;
271  }
272  }
273  }
274  }
275 
276  Boolean leftNullRejected = false;
277  Boolean rightNullRejected = false;
278  if (!nullRejectedLeftJoinCols.isEmpty()
279  && leftJoinCols.equals(nullRejectedLeftJoinCols)) {
280  leftNullRejected = true;
281  }
282  if (!nullRejectedRightJoinCols.isEmpty()
283  && rightJoinCols.equals(nullRejectedRightJoinCols)) {
284  rightNullRejected = true;
285  }
286 
287  // relax outer join condition depending on null rejected cols
288  RelNode newJoinNode = null;
289  Boolean needTransform = false;
290  if (join.getJoinType() == JoinRelType.FULL) {
291  // 1) full -> left
292  if (leftNullRejected && !rightNullRejected) {
293  newJoinNode = join.copy(join.getTraitSet(),
294  join.getCondition(),
295  join.getLeft(),
296  join.getRight(),
297  JoinRelType.LEFT,
298  join.isSemiJoinDone());
299  needTransform = true;
300  }
301 
302  // 2) full -> inner
303  if (leftNullRejected && rightNullRejected) {
304  newJoinNode = join.copy(join.getTraitSet(),
305  join.getCondition(),
306  join.getLeft(),
307  join.getRight(),
308  JoinRelType.INNER,
309  join.isSemiJoinDone());
310  needTransform = true;
311  }
312  } else if (join.getJoinType() == JoinRelType.LEFT) {
313  // 3) left -> inner
314  if (rightNullRejected) {
315  newJoinNode = join.copy(join.getTraitSet(),
316  join.getCondition(),
317  join.getLeft(),
318  join.getRight(),
319  JoinRelType.INNER,
320  join.isSemiJoinDone());
321  needTransform = true;
322  }
323  }
324  if (needTransform) {
325  relBuilder.push(newJoinNode);
326  parentNode.replaceInput(0, newJoinNode);
327  call.transformTo(parentNode);
328  }
329  return;
330  }
331 
332  void addJoinCols(RexCall joinCond,
333  LogicalJoin joinOp,
334  Set<Integer> leftJoinCols,
335  Set<Integer> rightJoinCols,
336  Map<Integer, String> leftJoinColToColNameMap,
337  Map<Integer, String> rightJoinColToColNameMap,
338  Set<Integer> originalLeftJoinCols,
339  Set<Integer> originalRightJoinCols,
340  Map<Integer, String> originalLeftJoinColToColNameMap,
341  Map<Integer, String> originalRightJoinColToColNameMap) {
342  if (joinCond.getOperands().size() != 2
343  || !(joinCond.getOperands().get(0) instanceof RexInputRef)
344  || !(joinCond.getOperands().get(1) instanceof RexInputRef)) {
345  return;
346  }
347  RexInputRef leftJoinCol = (RexInputRef) joinCond.getOperands().get(0);
348  RexInputRef rightJoinCol = (RexInputRef) joinCond.getOperands().get(1);
349  originalLeftJoinCols.add(leftJoinCol.getIndex());
350  originalRightJoinCols.add(rightJoinCol.getIndex());
351  originalLeftJoinColToColNameMap.put(leftJoinCol.getIndex(),
352  joinOp.getRowType().getFieldNames().get(leftJoinCol.getIndex()));
353  originalRightJoinColToColNameMap.put(rightJoinCol.getIndex(),
354  joinOp.getRowType().getFieldNames().get(rightJoinCol.getIndex()));
355  if (leftJoinCol.getIndex() > rightJoinCol.getIndex()) {
356  leftJoinCol = (RexInputRef) joinCond.getOperands().get(1);
357  rightJoinCol = (RexInputRef) joinCond.getOperands().get(0);
358  }
359  int originalLeftColOffset = traceColOffset(joinOp.getLeft(), leftJoinCol, 0);
360  int originalRightColOffset = traceColOffset(joinOp.getRight(),
361  rightJoinCol,
362  joinOp.getLeft().getRowType().getFieldCount());
363  if (originalLeftColOffset != -1) {
364  return;
365  }
366  int leftColOffset =
367  originalLeftColOffset == -1 ? leftJoinCol.getIndex() : originalLeftColOffset;
368  int rightColOffset = originalRightColOffset == -1 ? rightJoinCol.getIndex()
369  : originalRightColOffset;
370  String leftJoinColName = joinOp.getRowType().getFieldNames().get(leftColOffset);
371  String rightJoinColName =
372  joinOp.getRowType().getFieldNames().get(rightJoinCol.getIndex());
373  leftJoinCols.add(leftColOffset);
374  rightJoinCols.add(rightColOffset);
375  leftJoinColToColNameMap.put(leftColOffset, leftJoinColName);
376  rightJoinColToColNameMap.put(rightColOffset, rightJoinColName);
377  return;
378  }
379 
380  void addNullRejectedJoinCols(RexCall call,
381  LogicalFilter targetFilter,
382  Set<Integer> nullRejectedLeftJoinCols,
383  Set<Integer> nullRejectedRightJoinCols,
384  Map<Integer, String> leftJoinColToColNameMap,
385  Map<Integer, String> rightJoinColToColNameMap) {
386  if (isCandidateFilterPred(call) && call.getOperands().get(0) instanceof RexInputRef) {
387  RexInputRef col = (RexInputRef) call.getOperands().get(0);
388  int colId = col.getIndex();
389  String colName = targetFilter.getRowType().getFieldNames().get(colId);
390  Boolean l = false;
391  Boolean r = false;
392  if (leftJoinColToColNameMap.containsKey(colId)
393  && leftJoinColToColNameMap.get(colId).equals(colName)) {
394  l = true;
395  }
396  if (rightJoinColToColNameMap.containsKey(colId)
397  && rightJoinColToColNameMap.get(colId).equals(colName)) {
398  r = true;
399  }
400  if (l && !r) {
401  nullRejectedLeftJoinCols.add(colId);
402  return;
403  }
404  if (r && !l) {
405  nullRejectedRightJoinCols.add(colId);
406  return;
407  }
408  }
409  }
410 
411  void collectFilterCondition(RelNode curNode, List<LogicalFilter> collectedFilterNodes) {
412  if (curNode instanceof HepRelVertex) {
413  curNode = ((HepRelVertex) curNode).getCurrentRel();
414  }
415  if (curNode instanceof LogicalFilter) {
416  collectedFilterNodes.add((LogicalFilter) curNode);
417  }
418  if (curNode.getInputs().size() == 0) {
419  // end of the query plan, move out
420  return;
421  }
422  for (int i = 0; i < curNode.getInputs().size(); i++) {
423  collectFilterCondition(curNode.getInput(i), collectedFilterNodes);
424  }
425  }
426 
427  void collectProjectNode(RelNode curNode, List<LogicalProject> collectedProject) {
428  if (curNode instanceof HepRelVertex) {
429  curNode = ((HepRelVertex) curNode).getCurrentRel();
430  }
431  if (curNode instanceof LogicalProject) {
432  collectedProject.add((LogicalProject) curNode);
433  }
434  if (curNode.getInputs().size() == 0) {
435  // end of the query plan, move out
436  return;
437  }
438  for (int i = 0; i < curNode.getInputs().size(); i++) {
439  collectProjectNode(curNode.getInput(i), collectedProject);
440  }
441  }
442 
443  int traceColOffset(RelNode curNode, RexInputRef colRef, int startOffset) {
444  int colOffset = -1;
445  ArrayList<LogicalProject> collectedProjectNodes = new ArrayList<>();
446  collectProjectNode(curNode, collectedProjectNodes);
447  // the nearest project node that may permute the column offset
448  if (!collectedProjectNodes.isEmpty()) {
449  // get the closest project node from the cur join node's target child
450  LogicalProject projectNode = collectedProjectNodes.get(0);
451  Mappings.TargetMapping targetMapping = projectNode.getMapping();
452  if (null != colRef && null != targetMapping) {
453  // try to track the original col offset
454  int base_offset = colRef.getIndex() - startOffset;
455 
456  if (base_offset >= 0 && base_offset < targetMapping.getSourceCount()) {
457  colOffset = targetMapping.getSourceOpt(base_offset);
458  }
459  }
460  }
461  return colOffset;
462  }
463 
464  boolean isComparisonOp(RexCall c) {
465  SqlKind opKind = c.getKind();
466  return (SqlKind.BINARY_COMPARISON.contains(opKind)
467  || SqlKind.BINARY_EQUALITY.contains(opKind));
468  }
469 
470  boolean isNotNullFilter(RexCall c) {
471  return (c.op.kind == SqlKind.IS_NOT_NULL && c.operands.size() == 1);
472  }
473 
474  boolean isCandidateFilterPred(RexCall c) {
475  return (isNotNullFilter(c)
476  || (c.operands.size() == 2 && isComparisonOp(c)
477  && c.operands.get(0) instanceof RexInputRef
478  && c.operands.get(1) instanceof RexLiteral));
479  }
480 }
void collectFilterCondition(RelNode curNode, List< LogicalFilter > collectedFilterNodes)
void addNullRejectedJoinCols(RexCall call, LogicalFilter targetFilter, Set< Integer > nullRejectedLeftJoinCols, Set< Integer > nullRejectedRightJoinCols, Map< Integer, String > leftJoinColToColNameMap, Map< Integer, String > rightJoinColToColNameMap)
std::string join(T const &container, std::string const &delim)
tuple root
Definition: setup.in.py:14
void addJoinCols(RexCall joinCond, LogicalJoin joinOp, Set< Integer > leftJoinCols, Set< Integer > rightJoinCols, Map< Integer, String > leftJoinColToColNameMap, Map< Integer, String > rightJoinColToColNameMap, Set< Integer > originalLeftJoinCols, Set< Integer > originalRightJoinCols, Map< Integer, String > originalLeftJoinColToColNameMap, Map< Integer, String > originalRightJoinColToColNameMap)
std::string toString(const Executor::ExtModuleKinds &kind)
Definition: Execute.h:1703
int traceColOffset(RelNode curNode, RexInputRef colRef, int startOffset)
void collectProjectNode(RelNode curNode, List< LogicalProject > collectedProject)
constexpr double n
Definition: Utm.h:38