├── 01-simple-query-optimizer ├── pom.xml └── src │ ├── main │ └── java │ │ └── com │ │ └── querifylabs │ │ └── blog │ │ └── optimizer │ │ ├── Optimizer.java │ │ ├── SimpleSchema.java │ │ ├── SimpleTable.java │ │ └── SimpleTableStatistic.java │ └── test │ └── java │ └── com │ └── querifylabs │ └── blog │ └── optimizer │ └── OptimizerTest.java ├── 02-custom-calcite-trait ├── pom.xml └── src │ ├── main │ └── java │ │ └── com │ │ └── querifylabs │ │ └── blog │ │ └── trait │ │ ├── Distribution.java │ │ ├── DistributionTraitDef.java │ │ ├── ExchangeRel.java │ │ ├── Schema.java │ │ └── Table.java │ └── test │ └── java │ └── com │ └── querifylabs │ └── blog │ └── trait │ └── TraitTest.java ├── README.md └── join-enumerator ├── pom.xml └── src ├── main └── java │ └── com │ └── querifylabs │ └── blog │ └── joins │ ├── JoinEnumerator.java │ └── JoinEnumeratorUtils.java └── test └── java └── com └── querifylabs └── blog └── joins ├── JoinEnumeratorTcpdsTest.java └── JoinEnumeratorTest.java /01-simple-query-optimizer/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 4.0.0 6 | 7 | Simple Query Optimizer 8 | com.querifylabs.blog 9 | simple-query-optimizer 10 | 1.0-SNAPSHOT 11 | jar 12 | https://www.querifylabs.com 13 | 14 | 15 | UTF-8 16 | 1.8 17 | 1.8 18 | 1.25.0 19 | 30.0-jre 20 | 4.11 21 | 22 | 23 | 24 | 25 | org.apache.calcite 26 | calcite-core 27 | ${calcite.version} 28 | 29 | 30 | 31 | com.google.guava 32 | guava 33 | ${guava.version} 34 | 35 | 36 | 37 | junit 38 | junit 39 | ${junit.version} 40 | test 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | maven-compiler-plugin 49 | 3.8.0 50 | 51 | 52 | maven-surefire-plugin 53 | 2.22.1 54 | 55 | 56 | 57 | 58 | 59 | -------------------------------------------------------------------------------- /01-simple-query-optimizer/src/main/java/com/querifylabs/blog/optimizer/Optimizer.java: -------------------------------------------------------------------------------- 1 | package com.querifylabs.blog.optimizer; 2 | 3 | import org.apache.calcite.avatica.util.Casing; 4 | import org.apache.calcite.config.CalciteConnectionConfig; 5 | import org.apache.calcite.config.CalciteConnectionConfigImpl; 6 | import org.apache.calcite.config.CalciteConnectionProperty; 7 | import org.apache.calcite.jdbc.CalciteSchema; 8 | import org.apache.calcite.jdbc.JavaTypeFactoryImpl; 9 | import org.apache.calcite.plan.Contexts; 10 | import org.apache.calcite.plan.ConventionTraitDef; 11 | import org.apache.calcite.plan.RelOptCluster; 12 | import org.apache.calcite.plan.RelOptCostImpl; 13 | import org.apache.calcite.plan.RelTraitSet; 14 | import org.apache.calcite.plan.volcano.VolcanoPlanner; 15 | import org.apache.calcite.prepare.CalciteCatalogReader; 16 | import org.apache.calcite.prepare.Prepare; 17 | import org.apache.calcite.rel.RelNode; 18 | import org.apache.calcite.rel.RelRoot; 19 | import org.apache.calcite.rel.type.RelDataTypeFactory; 20 | import org.apache.calcite.rex.RexBuilder; 21 | import org.apache.calcite.sql.SqlNode; 22 | import org.apache.calcite.sql.SqlOperatorTable; 23 | import org.apache.calcite.sql.fun.SqlStdOperatorTable; 24 | import org.apache.calcite.sql.parser.SqlParser; 25 | import org.apache.calcite.sql.util.ChainedSqlOperatorTable; 26 | import org.apache.calcite.sql.validate.SqlValidator; 27 | import org.apache.calcite.sql.validate.SqlValidatorUtil; 28 | import org.apache.calcite.sql2rel.SqlToRelConverter; 29 | import org.apache.calcite.sql2rel.StandardConvertletTable; 30 | import org.apache.calcite.tools.Program; 31 | import org.apache.calcite.tools.Programs; 32 | import org.apache.calcite.tools.RuleSet; 33 | import org.apache.calcite.tools.RuleSets; 34 | 35 | import java.util.Collections; 36 | import java.util.Properties; 37 | 38 | public class Optimizer { 39 | 40 | private final CalciteConnectionConfig config; 41 | private final SqlValidator validator; 42 | private final SqlToRelConverter converter; 43 | private final VolcanoPlanner planner; 44 | 45 | public Optimizer( 46 | CalciteConnectionConfig config, 47 | SqlValidator validator, 48 | SqlToRelConverter converter, 49 | VolcanoPlanner planner 50 | ) { 51 | this.config = config; 52 | this.validator = validator; 53 | this.converter = converter; 54 | this.planner = planner; 55 | } 56 | 57 | public static Optimizer create(SimpleSchema schema) { 58 | RelDataTypeFactory typeFactory = new JavaTypeFactoryImpl(); 59 | 60 | Properties configProperties = new Properties(); 61 | configProperties.put(CalciteConnectionProperty.CASE_SENSITIVE.camelName(), Boolean.TRUE.toString()); 62 | configProperties.put(CalciteConnectionProperty.UNQUOTED_CASING.camelName(), Casing.UNCHANGED.toString()); 63 | configProperties.put(CalciteConnectionProperty.QUOTED_CASING.camelName(), Casing.UNCHANGED.toString()); 64 | CalciteConnectionConfig config = new CalciteConnectionConfigImpl(configProperties); 65 | 66 | CalciteSchema rootSchema = CalciteSchema.createRootSchema(false, false); 67 | rootSchema.add(schema.getSchemaName(), schema); 68 | Prepare.CatalogReader catalogReader = new CalciteCatalogReader( 69 | rootSchema, 70 | Collections.singletonList(schema.getSchemaName()), 71 | typeFactory, 72 | config 73 | ); 74 | 75 | SqlOperatorTable operatorTable = ChainedSqlOperatorTable.of(SqlStdOperatorTable.instance()); 76 | 77 | SqlValidator.Config validatorConfig = SqlValidator.Config.DEFAULT 78 | .withLenientOperatorLookup(config.lenientOperatorLookup()) 79 | .withSqlConformance(config.conformance()) 80 | .withDefaultNullCollation(config.defaultNullCollation()) 81 | .withIdentifierExpansion(true); 82 | 83 | SqlValidator validator = SqlValidatorUtil.newValidator(operatorTable, catalogReader, typeFactory, validatorConfig); 84 | 85 | VolcanoPlanner planner = new VolcanoPlanner(RelOptCostImpl.FACTORY, Contexts.of(config)); 86 | planner.addRelTraitDef(ConventionTraitDef.INSTANCE); 87 | 88 | RelOptCluster cluster = RelOptCluster.create(planner, new RexBuilder(typeFactory)); 89 | 90 | SqlToRelConverter.Config converterConfig = SqlToRelConverter.configBuilder() 91 | .withTrimUnusedFields(true) 92 | .withExpand(false) // https://issues.apache.org/jira/browse/CALCITE-1045 93 | .build(); 94 | 95 | SqlToRelConverter converter = new SqlToRelConverter( 96 | null, 97 | validator, 98 | catalogReader, 99 | cluster, 100 | StandardConvertletTable.INSTANCE, 101 | converterConfig 102 | ); 103 | 104 | return new Optimizer(config, validator, converter, planner); 105 | } 106 | 107 | public SqlNode parse(String sql) throws Exception { 108 | SqlParser.ConfigBuilder parserConfig = SqlParser.configBuilder(); 109 | parserConfig.setCaseSensitive(config.caseSensitive()); 110 | parserConfig.setUnquotedCasing(config.unquotedCasing()); 111 | parserConfig.setQuotedCasing(config.quotedCasing()); 112 | parserConfig.setConformance(config.conformance()); 113 | 114 | SqlParser parser = SqlParser.create(sql, parserConfig.build()); 115 | 116 | return parser.parseStmt(); 117 | } 118 | 119 | public SqlNode validate(SqlNode node) { 120 | return validator.validate(node); 121 | } 122 | 123 | public RelNode convert(SqlNode node) { 124 | RelRoot root = converter.convertQuery(node, false, true); 125 | 126 | return root.rel; 127 | } 128 | 129 | public RelNode optimize(RelNode node, RelTraitSet requiredTraitSet, RuleSet rules) { 130 | Program program = Programs.of(RuleSets.ofList(rules)); 131 | 132 | return program.run( 133 | planner, 134 | node, 135 | requiredTraitSet, 136 | Collections.emptyList(), 137 | Collections.emptyList() 138 | ); 139 | } 140 | } 141 | -------------------------------------------------------------------------------- /01-simple-query-optimizer/src/main/java/com/querifylabs/blog/optimizer/SimpleSchema.java: -------------------------------------------------------------------------------- 1 | package com.querifylabs.blog.optimizer; 2 | 3 | import org.apache.calcite.schema.Schema; 4 | import org.apache.calcite.schema.SchemaVersion; 5 | import org.apache.calcite.schema.Table; 6 | import org.apache.calcite.schema.impl.AbstractSchema; 7 | 8 | import java.util.HashMap; 9 | import java.util.Map; 10 | 11 | public class SimpleSchema extends AbstractSchema { 12 | 13 | private final String schemaName; 14 | private final Map tableMap; 15 | 16 | private SimpleSchema(String schemaName, Map tableMap) { 17 | this.schemaName = schemaName; 18 | this.tableMap = tableMap; 19 | } 20 | 21 | public String getSchemaName() { 22 | return schemaName; 23 | } 24 | 25 | @Override 26 | public Map getTableMap() { 27 | return tableMap; 28 | } 29 | 30 | @Override 31 | public Schema snapshot(SchemaVersion version) { 32 | return this; 33 | } 34 | 35 | public static Builder newBuilder(String schemaName) { 36 | return new Builder(schemaName); 37 | } 38 | 39 | public static final class Builder { 40 | 41 | private final String schemaName; 42 | private final Map tableMap = new HashMap<>(); 43 | 44 | private Builder(String schemaName) { 45 | if (schemaName == null || schemaName.isEmpty()) { 46 | throw new IllegalArgumentException("Schema name cannot be null or empty"); 47 | } 48 | 49 | this.schemaName = schemaName; 50 | } 51 | 52 | public Builder addTable(SimpleTable table) { 53 | if (tableMap.containsKey(table.getTableName())) { 54 | throw new IllegalArgumentException("Table already defined: " + table.getTableName()); 55 | } 56 | 57 | tableMap.put(table.getTableName(), table); 58 | 59 | return this; 60 | } 61 | 62 | public SimpleSchema build() { 63 | return new SimpleSchema(schemaName, tableMap); 64 | } 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /01-simple-query-optimizer/src/main/java/com/querifylabs/blog/optimizer/SimpleTable.java: -------------------------------------------------------------------------------- 1 | package com.querifylabs.blog.optimizer; 2 | 3 | import org.apache.calcite.DataContext; 4 | import org.apache.calcite.linq4j.Enumerable; 5 | import org.apache.calcite.rel.type.RelDataType; 6 | import org.apache.calcite.rel.type.RelDataTypeFactory; 7 | import org.apache.calcite.rel.type.RelDataTypeField; 8 | import org.apache.calcite.rel.type.RelDataTypeFieldImpl; 9 | import org.apache.calcite.rel.type.RelRecordType; 10 | import org.apache.calcite.rel.type.StructKind; 11 | import org.apache.calcite.schema.ScannableTable; 12 | import org.apache.calcite.schema.Statistic; 13 | import org.apache.calcite.schema.impl.AbstractTable; 14 | import org.apache.calcite.sql.type.SqlTypeName; 15 | 16 | import java.util.ArrayList; 17 | import java.util.List; 18 | 19 | public class SimpleTable extends AbstractTable implements ScannableTable { 20 | 21 | private final String tableName; 22 | private final List fieldNames; 23 | private final List fieldTypes; 24 | private final SimpleTableStatistic statistic; 25 | 26 | private RelDataType rowType; 27 | 28 | private SimpleTable(String tableName, List fieldNames, List fieldTypes, SimpleTableStatistic statistic) { 29 | this.tableName = tableName; 30 | this.fieldNames = fieldNames; 31 | this.fieldTypes = fieldTypes; 32 | this.statistic = statistic; 33 | } 34 | 35 | public String getTableName() { 36 | return tableName; 37 | } 38 | 39 | @Override 40 | public RelDataType getRowType(RelDataTypeFactory typeFactory) { 41 | if (rowType == null) { 42 | List fields = new ArrayList<>(fieldNames.size()); 43 | 44 | for (int i = 0; i < fieldNames.size(); i++) { 45 | RelDataType fieldType = typeFactory.createSqlType(fieldTypes.get(i)); 46 | RelDataTypeField field = new RelDataTypeFieldImpl(fieldNames.get(i), i, fieldType); 47 | fields.add(field); 48 | } 49 | 50 | rowType = new RelRecordType(StructKind.PEEK_FIELDS, fields, false); 51 | } 52 | 53 | return rowType; 54 | } 55 | 56 | @Override 57 | public Statistic getStatistic() { 58 | return statistic; 59 | } 60 | 61 | @Override 62 | public Enumerable scan(DataContext root) { 63 | throw new UnsupportedOperationException("Not implemented"); 64 | } 65 | 66 | public static Builder newBuilder(String tableName) { 67 | return new Builder(tableName); 68 | } 69 | 70 | public static final class Builder { 71 | 72 | private final String tableName; 73 | private final List fieldNames = new ArrayList<>(); 74 | private final List fieldTypes = new ArrayList<>(); 75 | private long rowCount; 76 | 77 | private Builder(String tableName) { 78 | if (tableName == null || tableName.isEmpty()) { 79 | throw new IllegalArgumentException("Table name cannot be null or empty"); 80 | } 81 | 82 | this.tableName = tableName; 83 | } 84 | 85 | public Builder addField(String name, SqlTypeName typeName) { 86 | if (name == null || name.isEmpty()) { 87 | throw new IllegalArgumentException("Field name cannot be null or empty"); 88 | } 89 | 90 | if (fieldNames.contains(name)) { 91 | throw new IllegalArgumentException("Field already defined: " + name); 92 | } 93 | 94 | fieldNames.add(name); 95 | fieldTypes.add(typeName); 96 | 97 | return this; 98 | } 99 | 100 | public Builder withRowCount(long rowCount) { 101 | this.rowCount = rowCount; 102 | 103 | return this; 104 | } 105 | 106 | public SimpleTable build() { 107 | if (fieldNames.isEmpty()) { 108 | throw new IllegalStateException("Table must have at least one field"); 109 | } 110 | 111 | if (rowCount == 0L) { 112 | throw new IllegalStateException("Table must have positive row count"); 113 | } 114 | 115 | return new SimpleTable(tableName, fieldNames, fieldTypes, new SimpleTableStatistic(rowCount)); 116 | } 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /01-simple-query-optimizer/src/main/java/com/querifylabs/blog/optimizer/SimpleTableStatistic.java: -------------------------------------------------------------------------------- 1 | package com.querifylabs.blog.optimizer; 2 | 3 | import org.apache.calcite.rel.RelCollation; 4 | import org.apache.calcite.rel.RelDistribution; 5 | import org.apache.calcite.rel.RelDistributionTraitDef; 6 | import org.apache.calcite.rel.RelReferentialConstraint; 7 | import org.apache.calcite.schema.Statistic; 8 | import org.apache.calcite.util.ImmutableBitSet; 9 | 10 | import java.util.Collections; 11 | import java.util.List; 12 | 13 | public class SimpleTableStatistic implements Statistic { 14 | 15 | private final long rowCount; 16 | 17 | public SimpleTableStatistic(long rowCount) { 18 | this.rowCount = rowCount; 19 | } 20 | 21 | @Override 22 | public Double getRowCount() { 23 | return (double) rowCount; 24 | } 25 | 26 | @Override 27 | public boolean isKey(ImmutableBitSet columns) { 28 | return false; 29 | } 30 | 31 | @Override 32 | public List getKeys() { 33 | return Collections.emptyList(); 34 | } 35 | 36 | @Override 37 | public List getReferentialConstraints() { 38 | return Collections.emptyList(); 39 | } 40 | 41 | @Override 42 | public List getCollations() { 43 | return Collections.emptyList(); 44 | } 45 | 46 | @Override 47 | public RelDistribution getDistribution() { 48 | return RelDistributionTraitDef.INSTANCE.getDefault(); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /01-simple-query-optimizer/src/test/java/com/querifylabs/blog/optimizer/OptimizerTest.java: -------------------------------------------------------------------------------- 1 | package com.querifylabs.blog.optimizer; 2 | 3 | import org.apache.calcite.adapter.enumerable.EnumerableConvention; 4 | import org.apache.calcite.adapter.enumerable.EnumerableRules; 5 | import org.apache.calcite.rel.RelNode; 6 | import org.apache.calcite.rel.externalize.RelWriterImpl; 7 | import org.apache.calcite.rel.rules.CoreRules; 8 | import org.apache.calcite.sql.SqlExplainLevel; 9 | import org.apache.calcite.sql.SqlNode; 10 | import org.apache.calcite.sql.type.SqlTypeName; 11 | import org.apache.calcite.tools.RuleSet; 12 | import org.apache.calcite.tools.RuleSets; 13 | import org.junit.Test; 14 | 15 | import java.io.PrintWriter; 16 | import java.io.StringWriter; 17 | 18 | public class OptimizerTest { 19 | @Test 20 | public void test_tpch_q6() throws Exception { 21 | SimpleTable lineitem = SimpleTable.newBuilder("lineitem") 22 | .addField("l_quantity", SqlTypeName.DECIMAL) 23 | .addField("l_extendedprice", SqlTypeName.DECIMAL) 24 | .addField("l_discount", SqlTypeName.DECIMAL) 25 | .addField("l_shipdate", SqlTypeName.DATE) 26 | .withRowCount(60_000L) 27 | .build(); 28 | 29 | SimpleSchema schema = SimpleSchema.newBuilder("tpch").addTable(lineitem).build(); 30 | 31 | Optimizer optimizer = Optimizer.create(schema); 32 | 33 | String sql = 34 | "select\n" + 35 | " sum(l.l_extendedprice * l.l_discount) as revenue\n" + 36 | "from\n" + 37 | " lineitem l\n" + 38 | "where\n" + 39 | " l.l_shipdate >= ?\n" + 40 | " and l.l_shipdate < ?\n" + 41 | " and l.l_discount between (? - 0.01) AND (? + 0.01)\n" + 42 | " and l.l_quantity < ?"; 43 | 44 | SqlNode sqlTree = optimizer.parse(sql); 45 | SqlNode validatedSqlTree = optimizer.validate(sqlTree); 46 | RelNode relTree = optimizer.convert(validatedSqlTree); 47 | 48 | print("AFTER CONVERSION", relTree); 49 | 50 | RuleSet rules = RuleSets.ofList( 51 | CoreRules.FILTER_TO_CALC, 52 | CoreRules.PROJECT_TO_CALC, 53 | CoreRules.FILTER_CALC_MERGE, 54 | CoreRules.PROJECT_CALC_MERGE, 55 | EnumerableRules.ENUMERABLE_TABLE_SCAN_RULE, 56 | EnumerableRules.ENUMERABLE_PROJECT_RULE, 57 | EnumerableRules.ENUMERABLE_FILTER_RULE, 58 | EnumerableRules.ENUMERABLE_CALC_RULE, 59 | EnumerableRules.ENUMERABLE_AGGREGATE_RULE 60 | ); 61 | 62 | RelNode optimizerRelTree = optimizer.optimize( 63 | relTree, 64 | relTree.getTraitSet().plus(EnumerableConvention.INSTANCE), 65 | rules 66 | ); 67 | 68 | print("AFTER OPTIMIZATION", optimizerRelTree); 69 | } 70 | 71 | private void print(String header, RelNode relTree) { 72 | StringWriter sw = new StringWriter(); 73 | 74 | sw.append(header).append(":").append("\n"); 75 | 76 | RelWriterImpl relWriter = new RelWriterImpl(new PrintWriter(sw), SqlExplainLevel.ALL_ATTRIBUTES, true); 77 | 78 | relTree.explain(relWriter); 79 | 80 | System.out.println(sw.toString()); 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /02-custom-calcite-trait/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 4.0.0 6 | 7 | Custom Calcite Trait 8 | com.querifylabs.blog 9 | custom-calcite-trait 10 | 1.0-SNAPSHOT 11 | jar 12 | https://www.querifylabs.com 13 | 14 | 15 | UTF-8 16 | 1.8 17 | 1.8 18 | 1.25.0 19 | 30.0-jre 20 | 4.11 21 | 22 | 23 | 24 | 25 | org.apache.calcite 26 | calcite-core 27 | ${calcite.version} 28 | 29 | 30 | 31 | com.google.guava 32 | guava 33 | ${guava.version} 34 | 35 | 36 | 37 | junit 38 | junit 39 | ${junit.version} 40 | test 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | maven-compiler-plugin 49 | 3.8.0 50 | 51 | 52 | maven-surefire-plugin 53 | 2.22.1 54 | 55 | 56 | 57 | 58 | 59 | -------------------------------------------------------------------------------- /02-custom-calcite-trait/src/main/java/com/querifylabs/blog/trait/Distribution.java: -------------------------------------------------------------------------------- 1 | package com.querifylabs.blog.trait; 2 | 3 | import org.apache.calcite.plan.RelOptPlanner; 4 | import org.apache.calcite.plan.RelTrait; 5 | import org.apache.calcite.plan.RelTraitDef; 6 | 7 | public class Distribution implements RelTrait { 8 | 9 | public static final Distribution ANY = new Distribution(Type.ANY); 10 | public static final Distribution PARTITIONED = new Distribution(Type.PARTITIONED); 11 | public static final Distribution SINGLETON = new Distribution(Type.SINGLETON); 12 | 13 | private final Type type; 14 | 15 | private Distribution(Type type) { 16 | this.type = type; 17 | } 18 | 19 | @SuppressWarnings("rawtypes") 20 | @Override 21 | public RelTraitDef getTraitDef() { 22 | return DistributionTraitDef.INSTANCE; 23 | } 24 | 25 | @Override 26 | public boolean satisfies(RelTrait toTrait) { 27 | Distribution toTrait0 = (Distribution) toTrait; 28 | 29 | if (toTrait0.type == Type.ANY) { 30 | return true; 31 | } 32 | 33 | return this.type.equals(toTrait0.type); 34 | } 35 | 36 | @Override 37 | public void register(RelOptPlanner planner) { 38 | // No-op. 39 | } 40 | 41 | @Override 42 | public String toString() { 43 | return type.name(); 44 | } 45 | 46 | enum Type { 47 | ANY, 48 | PARTITIONED, 49 | SINGLETON 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /02-custom-calcite-trait/src/main/java/com/querifylabs/blog/trait/DistributionTraitDef.java: -------------------------------------------------------------------------------- 1 | package com.querifylabs.blog.trait; 2 | 3 | import org.apache.calcite.plan.RelOptPlanner; 4 | import org.apache.calcite.plan.RelTraitDef; 5 | import org.apache.calcite.rel.RelNode; 6 | 7 | public class DistributionTraitDef extends RelTraitDef { 8 | 9 | public static DistributionTraitDef INSTANCE = new DistributionTraitDef(); 10 | 11 | private DistributionTraitDef() { 12 | // No-op. 13 | } 14 | 15 | @Override 16 | public Class getTraitClass() { 17 | return Distribution.class; 18 | } 19 | 20 | @Override 21 | public String getSimpleName() { 22 | return "DISTRIBUTION"; 23 | } 24 | 25 | @Override 26 | public RelNode convert( 27 | RelOptPlanner planner, 28 | RelNode rel, 29 | Distribution toTrait, 30 | boolean allowInfiniteCostConverters 31 | ) { 32 | Distribution fromTrait = rel.getTraitSet().getTrait(DistributionTraitDef.INSTANCE); 33 | 34 | if (fromTrait.satisfies(toTrait)) { 35 | return rel; 36 | } 37 | 38 | return new ExchangeRel( 39 | rel.getCluster(), 40 | rel.getTraitSet().plus(toTrait), 41 | rel 42 | ); 43 | } 44 | 45 | @Override 46 | public boolean canConvert( 47 | RelOptPlanner planner, 48 | Distribution fromTrait, 49 | Distribution toTrait 50 | ) { 51 | return true; 52 | } 53 | 54 | @Override 55 | public Distribution getDefault() { 56 | return Distribution.ANY; 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /02-custom-calcite-trait/src/main/java/com/querifylabs/blog/trait/ExchangeRel.java: -------------------------------------------------------------------------------- 1 | package com.querifylabs.blog.trait; 2 | 3 | import org.apache.calcite.plan.RelOptCluster; 4 | import org.apache.calcite.plan.RelTraitSet; 5 | import org.apache.calcite.rel.RelNode; 6 | import org.apache.calcite.rel.SingleRel; 7 | 8 | import java.util.List; 9 | 10 | public class ExchangeRel extends SingleRel { 11 | public ExchangeRel( 12 | RelOptCluster cluster, 13 | RelTraitSet traits, 14 | RelNode input 15 | ) { 16 | super(cluster, traits, input); 17 | } 18 | 19 | @Override 20 | public RelNode copy(RelTraitSet traitSet, List inputs) { 21 | return new ExchangeRel(getCluster(), traitSet, inputs.get(0)); 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /02-custom-calcite-trait/src/main/java/com/querifylabs/blog/trait/Schema.java: -------------------------------------------------------------------------------- 1 | package com.querifylabs.blog.trait; 2 | 3 | import org.apache.calcite.schema.SchemaVersion; 4 | import org.apache.calcite.schema.impl.AbstractSchema; 5 | 6 | import java.util.HashMap; 7 | import java.util.Map; 8 | 9 | public class Schema extends AbstractSchema { 10 | 11 | private final String schemaName; 12 | private final Map tableMap; 13 | 14 | private Schema(String schemaName, Map tableMap) { 15 | this.schemaName = schemaName; 16 | this.tableMap = tableMap; 17 | } 18 | 19 | public String getSchemaName() { 20 | return schemaName; 21 | } 22 | 23 | @Override 24 | public Map getTableMap() { 25 | return tableMap; 26 | } 27 | 28 | @Override 29 | public org.apache.calcite.schema.Schema snapshot(SchemaVersion version) { 30 | return this; 31 | } 32 | 33 | public static Builder newBuilder(String schemaName) { 34 | return new Builder(schemaName); 35 | } 36 | 37 | public static final class Builder { 38 | 39 | private final String schemaName; 40 | private final Map tableMap = new HashMap<>(); 41 | 42 | private Builder(String schemaName) { 43 | if (schemaName == null || schemaName.isEmpty()) { 44 | throw new IllegalArgumentException("Schema name cannot be null or empty"); 45 | } 46 | 47 | this.schemaName = schemaName; 48 | } 49 | 50 | public Builder addTable(Table table) { 51 | if (tableMap.containsKey(table.getTableName())) { 52 | throw new IllegalArgumentException("Table already defined: " + table.getTableName()); 53 | } 54 | 55 | tableMap.put(table.getTableName(), table); 56 | 57 | return this; 58 | } 59 | 60 | public Schema build() { 61 | return new Schema(schemaName, tableMap); 62 | } 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /02-custom-calcite-trait/src/main/java/com/querifylabs/blog/trait/Table.java: -------------------------------------------------------------------------------- 1 | package com.querifylabs.blog.trait; 2 | 3 | import org.apache.calcite.plan.RelOptTable; 4 | import org.apache.calcite.plan.RelTraitSet; 5 | import org.apache.calcite.rel.RelNode; 6 | import org.apache.calcite.rel.logical.LogicalTableScan; 7 | import org.apache.calcite.rel.type.RelDataType; 8 | import org.apache.calcite.rel.type.RelDataTypeFactory; 9 | import org.apache.calcite.rel.type.RelDataTypeField; 10 | import org.apache.calcite.rel.type.RelDataTypeFieldImpl; 11 | import org.apache.calcite.rel.type.RelRecordType; 12 | import org.apache.calcite.rel.type.StructKind; 13 | import org.apache.calcite.schema.TranslatableTable; 14 | import org.apache.calcite.schema.impl.AbstractTable; 15 | import org.apache.calcite.sql.type.SqlTypeName; 16 | 17 | import java.util.ArrayList; 18 | import java.util.List; 19 | 20 | public class Table extends AbstractTable implements TranslatableTable { 21 | 22 | private final String tableName; 23 | private final Distribution distribution; 24 | private final List fieldNames; 25 | private final List fieldTypes; 26 | 27 | private RelDataType rowType; 28 | 29 | private Table(String tableName, Distribution distribution, List fieldNames, List fieldTypes) { 30 | this.tableName = tableName; 31 | this.distribution = distribution; 32 | this.fieldNames = fieldNames; 33 | this.fieldTypes = fieldTypes; 34 | } 35 | 36 | public String getTableName() { 37 | return tableName; 38 | } 39 | 40 | @Override 41 | public RelDataType getRowType(RelDataTypeFactory typeFactory) { 42 | if (rowType == null) { 43 | List fields = new ArrayList<>(fieldNames.size()); 44 | 45 | for (int i = 0; i < fieldNames.size(); i++) { 46 | RelDataType fieldType = typeFactory.createSqlType(fieldTypes.get(i)); 47 | RelDataTypeField field = new RelDataTypeFieldImpl(fieldNames.get(i), i, fieldType); 48 | fields.add(field); 49 | } 50 | 51 | rowType = new RelRecordType(StructKind.PEEK_FIELDS, fields, false); 52 | } 53 | 54 | return rowType; 55 | } 56 | 57 | @Override 58 | public RelNode toRel(RelOptTable.ToRelContext context, RelOptTable relOptTable) { 59 | RelTraitSet traitSet = context.getCluster().traitSetOf(distribution); 60 | 61 | return new LogicalTableScan( 62 | context.getCluster(), 63 | traitSet, 64 | context.getTableHints(), 65 | relOptTable 66 | ); 67 | } 68 | 69 | public static Builder newBuilder(String tableName, Distribution distribution) { 70 | return new Builder(tableName, distribution); 71 | } 72 | 73 | public static final class Builder { 74 | 75 | private final String tableName; 76 | private final Distribution distribution; 77 | private final List fieldNames = new ArrayList<>(); 78 | private final List fieldTypes = new ArrayList<>(); 79 | 80 | private Builder(String tableName, Distribution distribution) { 81 | if (tableName == null || tableName.isEmpty()) { 82 | throw new IllegalArgumentException("Table name cannot be null or empty"); 83 | } 84 | 85 | this.tableName = tableName; 86 | this.distribution = distribution; 87 | } 88 | 89 | public Builder addField(String name, SqlTypeName typeName) { 90 | if (name == null || name.isEmpty()) { 91 | throw new IllegalArgumentException("Field name cannot be null or empty"); 92 | } 93 | 94 | if (fieldNames.contains(name)) { 95 | throw new IllegalArgumentException("Field already defined: " + name); 96 | } 97 | 98 | fieldNames.add(name); 99 | fieldTypes.add(typeName); 100 | 101 | return this; 102 | } 103 | 104 | public Table build() { 105 | if (fieldNames.isEmpty()) { 106 | throw new IllegalStateException("Table must have at least one field"); 107 | } 108 | 109 | return new Table(tableName, distribution, fieldNames, fieldTypes); 110 | } 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /02-custom-calcite-trait/src/test/java/com/querifylabs/blog/trait/TraitTest.java: -------------------------------------------------------------------------------- 1 | package com.querifylabs.blog.trait; 2 | 3 | import org.apache.calcite.config.CalciteConnectionConfig; 4 | import org.apache.calcite.jdbc.CalciteSchema; 5 | import org.apache.calcite.jdbc.JavaTypeFactoryImpl; 6 | import org.apache.calcite.plan.ConventionTraitDef; 7 | import org.apache.calcite.plan.RelOptCluster; 8 | import org.apache.calcite.plan.RelTraitSet; 9 | import org.apache.calcite.plan.volcano.AbstractConverter; 10 | import org.apache.calcite.plan.volcano.VolcanoPlanner; 11 | import org.apache.calcite.prepare.CalciteCatalogReader; 12 | import org.apache.calcite.prepare.Prepare; 13 | import org.apache.calcite.rel.RelNode; 14 | import org.apache.calcite.rel.core.RelFactories; 15 | import org.apache.calcite.rel.externalize.RelWriterImpl; 16 | import org.apache.calcite.rel.type.RelDataTypeFactory; 17 | import org.apache.calcite.rex.RexBuilder; 18 | import org.apache.calcite.sql.SqlExplainLevel; 19 | import org.apache.calcite.sql.type.SqlTypeName; 20 | import org.apache.calcite.tools.Programs; 21 | import org.apache.calcite.tools.RelBuilder; 22 | import org.apache.calcite.tools.RelBuilderFactory; 23 | import org.apache.calcite.tools.RuleSet; 24 | import org.apache.calcite.tools.RuleSets; 25 | import org.junit.Test; 26 | 27 | import java.io.PrintWriter; 28 | import java.io.StringWriter; 29 | import java.util.Collections; 30 | 31 | import static org.junit.Assert.assertSame; 32 | 33 | public class TraitTest { 34 | 35 | private static final String TABLE_PARTITIONED = "partitioned"; 36 | private static final String TABLE_SINGLETON = "singleton"; 37 | 38 | @Test 39 | public void testEnforceSingletonOnPartitioned() { 40 | enforceSingleton(TABLE_PARTITIONED); 41 | } 42 | 43 | @Test 44 | public void testEnforceSingletonOnSingleton() { 45 | enforceSingleton(TABLE_SINGLETON); 46 | } 47 | 48 | private static void enforceSingleton(String tableName) { 49 | // Prepare supporting objects. 50 | Prepare.CatalogReader schema = createSchema(); 51 | VolcanoPlanner planner = createPlanner(); 52 | 53 | // Create a table scan on the desired table. 54 | RelOptCluster cluster = RelOptCluster.create(planner, new RexBuilder(schema.getTypeFactory())); 55 | RelBuilderFactory factory = RelBuilder.proto(RelFactories.DEFAULT_TABLE_SCAN_FACTORY); 56 | RelBuilder relBuilder = factory.create(cluster, schema); 57 | RelNode node = relBuilder.scan(tableName).build(); 58 | print("BEFORE", node); 59 | 60 | // Use the built-in rule that will expand abstract converters. 61 | RuleSet rules = RuleSets.ofList(AbstractConverter.ExpandConversionRule.INSTANCE); 62 | 63 | // Prepare the desired traits with the SINGLETON distribution. 64 | RelTraitSet desiredTraits = node.getTraitSet().plus(Distribution.SINGLETON); 65 | 66 | // Use the planner to enforce the desired traits. 67 | RelNode optimizedNode = Programs.of(rules).run( 68 | planner, 69 | node, 70 | desiredTraits, 71 | Collections.emptyList(), 72 | Collections.emptyList() 73 | ); 74 | 75 | print("AFTER", optimizedNode); 76 | 77 | assertSame(Distribution.SINGLETON, optimizedNode.getTraitSet().getTrait(DistributionTraitDef.INSTANCE)); 78 | } 79 | 80 | private static Prepare.CatalogReader createSchema() { 81 | // Table with PARTITIONED distribution. 82 | Table table1 = Table.newBuilder(TABLE_PARTITIONED, Distribution.PARTITIONED) 83 | .addField("field", SqlTypeName.DECIMAL).build(); 84 | 85 | // Table with SINGLETON distribution. 86 | Table table2 = Table.newBuilder(TABLE_SINGLETON, Distribution.SINGLETON) 87 | .addField("field", SqlTypeName.DECIMAL).build(); 88 | 89 | Schema schema = Schema.newBuilder("schema").addTable(table1).addTable(table2).build(); 90 | 91 | RelDataTypeFactory typeFactory = new JavaTypeFactoryImpl(); 92 | 93 | CalciteConnectionConfig config = CalciteConnectionConfig.DEFAULT; 94 | 95 | CalciteSchema rootSchema = CalciteSchema.createRootSchema(false, false); 96 | rootSchema.add(schema.getSchemaName(), schema); 97 | 98 | return new CalciteCatalogReader( 99 | rootSchema, 100 | Collections.singletonList(schema.getSchemaName()), 101 | typeFactory, 102 | config 103 | ); 104 | } 105 | 106 | private static VolcanoPlanner createPlanner() { 107 | VolcanoPlanner planner = new VolcanoPlanner(); 108 | 109 | // Register distribution trait. 110 | planner.addRelTraitDef(ConventionTraitDef.INSTANCE); 111 | planner.addRelTraitDef(DistributionTraitDef.INSTANCE); 112 | 113 | // DO NOT USE IN PRODUCTION: a quirk to allow Apache Calcite calculate costs for logical nodes. 114 | // Without this line we would have to use a custom convention, that makes the example more complex. 115 | planner.setNoneConventionHasInfiniteCost(false); 116 | 117 | return planner; 118 | } 119 | 120 | private static void print(String header, RelNode relTree) { 121 | StringWriter sw = new StringWriter(); 122 | 123 | sw.append(header).append(":").append("\n"); 124 | 125 | RelWriterImpl relWriter = new RelWriterImpl(new PrintWriter(sw), SqlExplainLevel.DIGEST_ATTRIBUTES, true); 126 | 127 | relTree.explain(relWriter); 128 | 129 | System.out.println(sw.toString()); 130 | } 131 | } 132 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Querify Labs Blog Code Samples 2 | Supporting code for https://www.querifylabs.com/blog 3 | -------------------------------------------------------------------------------- /join-enumerator/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 4.0.0 6 | 7 | Simple Join Enumerator with Cross-Product Suppression 8 | com.querifylabs.blog 9 | querifylabs-join-enumerator 10 | 1.0-SNAPSHOT 11 | jar 12 | https://www.querifylabs.com 13 | 14 | 15 | UTF-8 16 | 11 17 | 11 18 | 4.11 19 | 20 | 21 | 22 | 23 | junit 24 | junit 25 | ${junit.version} 26 | test 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | maven-compiler-plugin 35 | 3.8.0 36 | 37 | 38 | maven-surefire-plugin 39 | 2.22.1 40 | 41 | 42 | 43 | 44 | 45 | -------------------------------------------------------------------------------- /join-enumerator/src/main/java/com/querifylabs/blog/joins/JoinEnumerator.java: -------------------------------------------------------------------------------- 1 | package com.querifylabs.blog.joins; 2 | 3 | import java.util.ArrayList; 4 | import java.util.HashSet; 5 | import java.util.List; 6 | import java.util.Map; 7 | import java.util.Set; 8 | import java.util.TreeSet; 9 | import java.util.concurrent.ConcurrentHashMap; 10 | import java.util.concurrent.CountDownLatch; 11 | import java.util.concurrent.ForkJoinPool; 12 | import java.util.concurrent.atomic.AtomicLong; 13 | 14 | /** 15 | * A naive implementation of a join enumerator with cross-product suppression. 16 | *

17 | * Constructs the possible bushy parenthesizations for the N inputs. Then, 18 | * creates possible orders of leaves. Each parenthesizations is combined 19 | * with each order of leaves, and then checked for the presence of cross-products. 20 | *

21 | * The algorithm assumes that every join is an inner-join. 22 | *

23 | * The algorithm is very simple and convenient for the educational purposes. However, 24 | * it is very inefficient and cannot be used to plan join graphs with more than eight 25 | * tables. 26 | *

27 | * Consider the join graph A-B-C. There valid parenthesizations are ((T1xT2)xT3) 28 | * (T1x(T2xT3)). The valid lead orders are ABC, ACB, BAC, BCA, CAB, CBA. Combining 29 | * these two we got 12 bushy join orders. Cross-product are not present in the 30 | * following join orders: (AB)C, A(BC), A(CB), (BA)C, (BC)A, C(AB), (CB)A, C(BA), 31 | * giving us 8 cross-product free join orders. 32 | */ 33 | public class JoinEnumerator { 34 | /** Unique table names observed so far. */ 35 | private final List tableNames = new ArrayList<>(); 36 | 37 | /** Join conditions. */ 38 | private final Set conditions = new HashSet<>(); 39 | 40 | /** Cached digests for the given set of inputs. */ 41 | private final Map> digestToInputs = new ConcurrentHashMap<>(); 42 | 43 | /** Whether the given join graph is connected. */ 44 | private final Map connected = new ConcurrentHashMap<>(); 45 | 46 | /** 47 | * Count cross-product free join orders for the submitted join graph. 48 | */ 49 | public long count() { 50 | if (inputCount() == 1) { 51 | return 1; 52 | } 53 | 54 | // Clear the state. 55 | digestToInputs.clear(); 56 | connected.clear(); 57 | 58 | // Generate leaf orders. 59 | List> orders = generateLeafOrders(); 60 | assert orders.size() == JoinEnumeratorUtils.fact(inputCount()); 61 | 62 | // Generate associations. 63 | Set templates = generateJoinTemplates(); 64 | assert templates.size() == JoinEnumeratorUtils.catalan(inputCount() - 1); 65 | 66 | // Combine leaf orders and associations. 67 | AtomicLong counter = new AtomicLong(); 68 | CountDownLatch doneLatch = new CountDownLatch(orders.size() * templates.size()); 69 | for (List order : orders) { 70 | for (Join template : templates) { 71 | checkConnectedAsync(order, template, counter, doneLatch); 72 | } 73 | } 74 | 75 | // Await completion. 76 | try { 77 | doneLatch.await(); 78 | } catch (InterruptedException e) { 79 | Thread.currentThread().interrupt(); 80 | throw new RuntimeException("Interrupted", e); 81 | } 82 | 83 | return counter.get(); 84 | } 85 | 86 | /** 87 | * Add join condition between two tables. 88 | */ 89 | public void addJoinCondition(String table1, String table2) { 90 | conditions.add(new JoinConditionKey(tableOrdinal(table1), tableOrdinal(table2))); 91 | } 92 | 93 | /** 94 | * Map unique table name to ordinal. 95 | */ 96 | private int tableOrdinal(String name) { 97 | int index = tableNames.indexOf(name); 98 | if (index == -1) { 99 | index = tableNames.size(); 100 | tableNames.add(name); 101 | } 102 | return index; 103 | } 104 | 105 | /** 106 | * Number of inputs. 107 | */ 108 | private int inputCount() { 109 | return tableNames.size(); 110 | } 111 | 112 | /** 113 | * Increment the counter if the given join order forms a connected graph. 114 | */ 115 | private void checkConnectedAsync( 116 | List order, 117 | Join template, 118 | AtomicLong counter, 119 | CountDownLatch doneLatch 120 | ) { 121 | ForkJoinPool.commonPool().execute(() -> { 122 | Join join = associate(template, order); 123 | ConnectedJoinShuttle shuttle = new ConnectedJoinShuttle(); 124 | join.accept(shuttle); 125 | if (shuttle.connected) { 126 | counter.incrementAndGet(); 127 | } 128 | doneLatch.countDown(); 129 | }); 130 | } 131 | 132 | /** 133 | * Whether there is a join condition between two nodes. 134 | */ 135 | private boolean hasJoinCondition(Node left, Node right) { 136 | String leftDigest = left.toString(); 137 | String rightDigest = right.toString(); 138 | JoinKey key = new JoinKey(leftDigest, rightDigest); 139 | Boolean res = connected.get(key); 140 | if (res != null) { 141 | return res; 142 | } 143 | 144 | Set leftInputs = collectInputs(leftDigest, left); 145 | Set rightInputs = collectInputs(rightDigest, right); 146 | for (int leftInput : leftInputs) { 147 | for (int rightInput : rightInputs) { 148 | if (hasJoinCondition(leftInput, rightInput)) { 149 | connected.put(key, true); 150 | return true; 151 | } 152 | } 153 | } 154 | 155 | connected.put(key, false); 156 | return false; 157 | } 158 | 159 | /** 160 | * Whether there is a join condition between two tables. If not, the join of two inputs is a cross-product. 161 | */ 162 | private boolean hasJoinCondition(int key1, int key2) { 163 | return conditions.contains(new JoinConditionKey(key1, key2)); 164 | } 165 | 166 | /** 167 | * Replace the template with the given order of leaves. 168 | */ 169 | private static Join associate(Join template, List order) { 170 | return (Join)template.accept(new Shuttle() { 171 | @Override 172 | public Node visitLeaf(Leaf leaf) { 173 | return new Leaf(order.get(leaf.index)); 174 | } 175 | @Override 176 | public Node visitJoin(Join join) { 177 | return join; 178 | } 179 | }); 180 | } 181 | 182 | /** 183 | * Collect all inputs present in the given node. For example, A join (B join C) contains three inputs: A, B, and C. 184 | */ 185 | private TreeSet collectInputs(String digest, Node node) { 186 | TreeSet res = digestToInputs.get(digest); 187 | if (res == null) { 188 | TreeSet res0 = new TreeSet<>(); 189 | node.accept(new Shuttle() { 190 | @Override 191 | public Node visitLeaf(Leaf leaf) { 192 | res0.add(leaf.index); 193 | return leaf; 194 | } 195 | 196 | @Override 197 | public Node visitJoin(Join join) { 198 | return join; 199 | } 200 | }); 201 | digestToInputs.put(digest, res0); 202 | res = res0; 203 | } 204 | return res; 205 | } 206 | 207 | /** 208 | * Generate all possible orders of leaves. For example, for inputs A, B, and C possible orders are: ABC, ACB, 209 | * BAC, BCA, CAB, CBA. 210 | */ 211 | private List> generateLeafOrders() { 212 | List> ress = new ArrayList<>(); 213 | List currentOrder = new ArrayList<>(inputCount()); 214 | generateLeafOrders(currentOrder, ress); 215 | return ress; 216 | } 217 | 218 | private void generateLeafOrders(List currentOrder, List> ress) { 219 | if (currentOrder.size() == inputCount()) { 220 | ress.add(new ArrayList<>(currentOrder)); 221 | return; 222 | } 223 | 224 | for (int i = 0; i < inputCount(); i++) { 225 | if (currentOrder.contains(i)) { 226 | continue; 227 | } 228 | currentOrder.add(i); 229 | generateLeafOrders(currentOrder, ress); 230 | currentOrder.remove(currentOrder.size() - 1); 231 | } 232 | } 233 | 234 | /** 235 | * Generate possible associations of inputs. For example, given the inputs T1, T2, and T3 in that order, the 236 | * possible associations are (T1xT2)xT3 and T1x(T2xT3). 237 | */ 238 | private Set generateJoinTemplates() { 239 | Set ress = new HashSet<>(); 240 | List nodes = new ArrayList<>(inputCount()); 241 | for (int i = 0; i < inputCount(); i++) { 242 | nodes.add(new Leaf(i)); 243 | } 244 | generateJoinTemplates(nodes, ress); 245 | return ress; 246 | } 247 | 248 | private static void generateJoinTemplates(List nodes, Set ress) { 249 | if (nodes.size() == 1) { 250 | Node join = nodes.get(0); 251 | assert join instanceof Join; 252 | ress.add((Join)join); 253 | return; 254 | } 255 | 256 | int joinCount = nodes.size() - 1; 257 | for (int i = 0; i < joinCount; i++) { 258 | Node left = nodes.remove(i); 259 | Node right = nodes.remove(i); 260 | Join join = new Join(left, right); 261 | nodes.add(i, join); 262 | 263 | generateJoinTemplates(nodes, ress); 264 | 265 | Node removedJoin = nodes.remove(i); 266 | assert join == removedJoin; 267 | nodes.add(i, right); 268 | nodes.add(i, left); 269 | } 270 | } 271 | 272 | /** 273 | * Node that represents either a leaf input or a join. 274 | */ 275 | private static abstract class Node { 276 | public abstract Node accept(Shuttle shuttle); 277 | } 278 | 279 | /** 280 | * Leaf input. 281 | */ 282 | private static class Leaf extends Node { 283 | private final int index; 284 | private Leaf(int index) { 285 | this.index = index; 286 | } 287 | @Override 288 | public Node accept(Shuttle shuttle) { 289 | return shuttle.visitLeaf(this); 290 | } 291 | @Override 292 | public boolean equals(Object o) { 293 | if (this == o) return true; 294 | if (o == null || getClass() != o.getClass()) return false; 295 | Leaf leaf = (Leaf) o; 296 | return index == leaf.index; 297 | } 298 | @Override 299 | public int hashCode() { 300 | return index; 301 | } 302 | @Override 303 | public String toString() { 304 | return Integer.toString(index); 305 | } 306 | } 307 | 308 | /** 309 | * Join of two inputs. 310 | */ 311 | private static class Join extends Node { 312 | private final Node left; 313 | private final Node right; 314 | public Join(Node left, Node right) { 315 | this.left = left; 316 | this.right = right; 317 | } 318 | @Override 319 | public Node accept(Shuttle shuttle) { 320 | Node newLeft = left.accept(shuttle); 321 | Node newRight = right.accept(shuttle); 322 | return shuttle.visitJoin(new Join(newLeft, newRight)); 323 | } 324 | @Override 325 | public boolean equals(Object o) { 326 | if (this == o) return true; 327 | if (o == null || getClass() != o.getClass()) return false; 328 | Join join = (Join) o; 329 | if (!left.equals(join.left)) return false; 330 | return right.equals(join.right); 331 | } 332 | @Override 333 | public int hashCode() { 334 | int result = left.hashCode(); 335 | result = 31 * result + right.hashCode(); 336 | return result; 337 | } 338 | @Override 339 | public String toString() { 340 | return "(" + left + "x" + right + ")"; 341 | } 342 | } 343 | 344 | /** 345 | * A visitor that can traverse the Node tree bottom-up and construct the new tree. 346 | */ 347 | private interface Shuttle { 348 | Node visitLeaf(Leaf leaf); 349 | Node visitJoin(Join join); 350 | } 351 | 352 | /** 353 | * Visitor that checks whether all Join nodes in the tree have join conditions. 354 | */ 355 | private class ConnectedJoinShuttle implements Shuttle { 356 | private boolean connected = true; 357 | @Override 358 | public Node visitLeaf(Leaf leaf) { 359 | return leaf; 360 | } 361 | @Override 362 | public Node visitJoin(Join join) { 363 | if (connected && !hasJoinCondition(join.left, join.right)) { 364 | connected = false; 365 | } 366 | return join; 367 | } 368 | } 369 | 370 | /** 371 | * A key for the join condition. 372 | */ 373 | private static class JoinConditionKey { 374 | private final int first; 375 | private final int second; 376 | private JoinConditionKey(int first, int second) { 377 | if (first > second) { 378 | this.first = second; 379 | this.second = first; 380 | } else { 381 | this.first = first; 382 | this.second = second; 383 | } 384 | } 385 | @Override 386 | public boolean equals(Object o) { 387 | if (this == o) return true; 388 | if (o == null || getClass() != o.getClass()) return false; 389 | JoinConditionKey that = (JoinConditionKey) o; 390 | if (first != that.first) return false; 391 | return second == that.second; 392 | } 393 | @Override 394 | public int hashCode() { 395 | int result = first; 396 | result = 31 * result + second; 397 | return result; 398 | } 399 | } 400 | 401 | /** 402 | * A key that uniquely identifies a join of two inputs. 403 | */ 404 | private static class JoinKey { 405 | private final String leftDigest; 406 | private final String rightDigest; 407 | private JoinKey(String leftDigest, String rightDigest) { 408 | this.leftDigest = leftDigest; 409 | this.rightDigest = rightDigest; 410 | } 411 | @Override 412 | public boolean equals(Object o) { 413 | if (this == o) return true; 414 | if (o == null || getClass() != o.getClass()) return false; 415 | JoinKey joinKey = (JoinKey) o; 416 | if (!leftDigest.equals(joinKey.leftDigest)) return false; 417 | return rightDigest.equals(joinKey.rightDigest); 418 | } 419 | @Override 420 | public int hashCode() { 421 | int result = leftDigest.hashCode(); 422 | result = 31 * result + rightDigest.hashCode(); 423 | return result; 424 | } 425 | } 426 | } 427 | -------------------------------------------------------------------------------- /join-enumerator/src/main/java/com/querifylabs/blog/joins/JoinEnumeratorUtils.java: -------------------------------------------------------------------------------- 1 | package com.querifylabs.blog.joins; 2 | 3 | public class JoinEnumeratorUtils { 4 | private JoinEnumeratorUtils() {} 5 | 6 | /** 7 | * Calculates factorial of n. 8 | */ 9 | public static Long fact(int n) { 10 | if (n == 1) { 11 | return 1L; 12 | } 13 | return n * fact(n - 1); 14 | } 15 | 16 | /** 17 | * Calculates Catalan number of n. 18 | */ 19 | public static Long catalan(int n) { 20 | return fact(2 * n) / (fact(n + 1) * fact(n)); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /join-enumerator/src/test/java/com/querifylabs/blog/joins/JoinEnumeratorTcpdsTest.java: -------------------------------------------------------------------------------- 1 | package com.querifylabs.blog.joins; 2 | 3 | import org.junit.Test; 4 | 5 | import static org.junit.Assert.assertEquals; 6 | 7 | /** 8 | * Estimate the number of joins for the TPC-DS queries. 9 | */ 10 | public class JoinEnumeratorTcpdsTest { 11 | /** The TPC-DS query 17 contains 211200 valid cross-product free join orders. */ 12 | private static final long TPCDS_17 = 211200; 13 | 14 | @Test 15 | public void testTpcdsQ17() { 16 | JoinEnumerator topology = new JoinEnumerator(); 17 | 18 | // d1.d_date_sk = ss_sold_date_sk 19 | topology.addJoinCondition("date_dim d1", "store_sales"); 20 | 21 | // i_item_sk = ss_item_sk 22 | topology.addJoinCondition("item", "store_sales"); 23 | 24 | // s_store_sk = ss_store_sk 25 | topology.addJoinCondition("store", "store_sales"); 26 | 27 | // ss_customer_sk = sr_customer_sk 28 | // ss_item_sk = sr_item_sk 29 | // ss_ticket_number = sr_ticket_number 30 | topology.addJoinCondition("store_sales", "store_returns"); 31 | 32 | // sr_returned_date_sk = d2.d_date_sk 33 | topology.addJoinCondition("store_returns", "date_dim d2"); 34 | 35 | // sr_customer_sk = cs_bill_customer_sk 36 | // sr_item_sk = cs_item_sk 37 | topology.addJoinCondition("store_returns", "catalog_sales"); 38 | 39 | // cs_sold_date_sk = d3.d_date_sk 40 | topology.addJoinCondition("catalog_sales", "date_dim d3"); 41 | 42 | assertEquals(TPCDS_17, topology.count()); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /join-enumerator/src/test/java/com/querifylabs/blog/joins/JoinEnumeratorTest.java: -------------------------------------------------------------------------------- 1 | package com.querifylabs.blog.joins; 2 | 3 | import org.junit.Test; 4 | 5 | import static com.querifylabs.blog.joins.JoinEnumeratorUtils.catalan; 6 | import static com.querifylabs.blog.joins.JoinEnumeratorUtils.fact; 7 | import static org.junit.Assert.assertEquals; 8 | 9 | /** 10 | * Test the estimated number of joins for common join topologies: chain, star, clique. 11 | */ 12 | public class JoinEnumeratorTest { 13 | 14 | private static final int MIN_INPUTS = 2; 15 | private static final int MAX_INPUTS = 8; 16 | 17 | @Test 18 | public void testChain() { 19 | for (int n = MIN_INPUTS; n <= MAX_INPUTS; n++) { 20 | var topology = new JoinEnumerator(); 21 | for (int i = 1; i < n; i++) { 22 | topology.addJoinCondition(table(i-1), table(i)); 23 | } 24 | long expected = (long)Math.pow(2, n-1) * catalan(n - 1); 25 | assertEquals(expected, topology.count()); 26 | } 27 | } 28 | 29 | @Test 30 | public void testStar() { 31 | for (int n = MIN_INPUTS; n <= MAX_INPUTS; n++) { 32 | var topology = new JoinEnumerator(); 33 | for (int i = 1; i < n; i++) { 34 | topology.addJoinCondition(table(0), table(i)); 35 | } 36 | long expected = (long)Math.pow(2, n-1) * fact(n - 1); 37 | assertEquals(expected, topology.count()); 38 | } 39 | } 40 | 41 | @Test 42 | public void testClique() { 43 | for (int n = MIN_INPUTS; n <= MAX_INPUTS; n++) { 44 | var topology = new JoinEnumerator(); 45 | for (int i = 0; i < n; i++) { 46 | for (int j = 0; j < n; j++) { 47 | if (i == j) { 48 | continue; 49 | } 50 | topology.addJoinCondition(table(i), table(j)); 51 | } 52 | } 53 | long expected = fact(n) * catalan(n - 1); 54 | assertEquals(expected, topology.count()); 55 | } 56 | } 57 | 58 | private static String table(int index) { 59 | return "t" + index; 60 | } 61 | } 62 | --------------------------------------------------------------------------------