├── LICENSE ├── README.md ├── pom.xml └── src ├── main ├── java │ └── pers │ │ └── shezm │ │ └── calcite │ │ ├── csv │ │ ├── CsvEnumerator.java │ │ ├── CsvSchema.java │ │ ├── CsvSchemaFactory.java │ │ └── CsvTable.java │ │ ├── optimizer │ │ ├── converter │ │ │ ├── CSVFilterConverter.java │ │ │ ├── CSVNewProjectConverter.java │ │ │ ├── CSVNewProjectRule.java │ │ │ ├── CSVProjectConverter.java │ │ │ └── CSVTableScanConverter.java │ │ ├── cost │ │ │ ├── CSVRelMdDistinctRowCount.java │ │ │ ├── CSVRelMdRowCount.java │ │ │ └── DefaultRelMetadataProvider.java │ │ └── reloperators │ │ │ ├── CSVFilter.java │ │ │ ├── CSVProject.java │ │ │ ├── CSVRel.java │ │ │ ├── CSVTableScan.java │ │ │ └── NewCsvProject.java │ │ ├── test │ │ ├── Test1.java │ │ ├── Test2.java │ │ ├── Test3.java │ │ ├── Test4.java │ │ ├── Test5.java │ │ └── Test6.java │ │ └── utils │ │ └── CalciteUtil.java └── resources │ ├── TEST01.csv │ ├── TEST02.csv │ └── model.json └── test └── java └── org └── example └── AppTest.java /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 shezm 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # calcite-demo 2 | calcite的相关练习代码,包含CSV适配器,使用CSV适配器来进行SQL查询。SQL的parse和validate,以及RBO和CBO的使用。 3 | # 项目结构 4 | - pers.shezm.calcite 5 | - - csv : csv适配器相关内容 6 | - - optimizer : 优化器相关内容,包含自定义的 RelNode,Converter,以及Cost计算相关内容 7 | - - test : 主要演示的地方,包含各种代码演示 8 | - - utils : 工具类 9 | # 关联博客 10 | 11 | [深入浅出Calcite与SQL CBO(Cost-Based Optimizer)优化](https://zhuanlan.zhihu.com/p/248796415) 12 | 13 | [Hive使用Calcite CBO优化流程及SQL优化实战](https://zhuanlan.zhihu.com/p/258081600) 14 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 4.0.0 6 | 7 | org.example 8 | calcite-demo 9 | 1.0-SNAPSHOT 10 | 11 | calcite-demo 12 | 13 | http://www.example.com 14 | 15 | 16 | UTF-8 17 | 1.8 18 | 1.8 19 | 20 | 21 | 22 | 23 | 24 | org.apache.calcite 25 | calcite-core 26 | 1.22.0 27 | 28 | 29 | com.alibaba 30 | fastjson 31 | 1.2.54 32 | 33 | 34 | 35 | com.google.guava 36 | guava 37 | 24.1-jre 38 | 39 | 40 | 41 | org.slf4j 42 | slf4j-log4j12 43 | 1.7.26 44 | 45 | 46 | 47 | junit 48 | junit 49 | 4.11 50 | test 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | maven-clean-plugin 60 | 3.1.0 61 | 62 | 63 | 64 | maven-resources-plugin 65 | 3.0.2 66 | 67 | 68 | maven-compiler-plugin 69 | 3.8.0 70 | 71 | 72 | maven-surefire-plugin 73 | 2.22.1 74 | 75 | 76 | maven-jar-plugin 77 | 3.0.2 78 | 79 | 80 | maven-install-plugin 81 | 2.5.2 82 | 83 | 84 | maven-deploy-plugin 85 | 2.8.2 86 | 87 | 88 | 89 | maven-site-plugin 90 | 3.7.1 91 | 92 | 93 | maven-project-info-reports-plugin 94 | 3.0.0 95 | 96 | 97 | 98 | 99 | 100 | -------------------------------------------------------------------------------- /src/main/java/pers/shezm/calcite/csv/CsvEnumerator.java: -------------------------------------------------------------------------------- 1 | package pers.shezm.calcite.csv; 2 | 3 | import org.apache.calcite.linq4j.Enumerator; 4 | import org.apache.calcite.util.Source; 5 | 6 | import java.io.BufferedReader; 7 | import java.io.IOException; 8 | 9 | public class CsvEnumerator implements Enumerator { 10 | 11 | private E current; 12 | 13 | private BufferedReader br; 14 | 15 | public CsvEnumerator(Source source) { 16 | try { 17 | this.br = new BufferedReader(source.reader()); 18 | this.br.readLine(); 19 | } catch (IOException e) { 20 | e.printStackTrace(); 21 | } 22 | 23 | } 24 | 25 | @Override 26 | public E current() { 27 | return current; 28 | } 29 | 30 | @Override 31 | public boolean moveNext() { 32 | try { 33 | String line = br.readLine(); 34 | if (line == null) { 35 | return false; 36 | } 37 | 38 | current = (E) line.split(","); // 如果是多列,这里要多个值 39 | } catch (IOException e) { 40 | e.printStackTrace(); 41 | return false; 42 | } 43 | return true; 44 | } 45 | 46 | /** 47 | * 出现异常走这里 48 | */ 49 | @Override 50 | public void reset() { 51 | System.out.println("报错了兄弟,不支持此操作"); 52 | } 53 | 54 | /** 55 | * InputStream流在这里关闭 56 | */ 57 | @Override 58 | public void close() { 59 | 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /src/main/java/pers/shezm/calcite/csv/CsvSchema.java: -------------------------------------------------------------------------------- 1 | package pers.shezm.calcite.csv; 2 | 3 | import com.google.common.collect.ImmutableMap; 4 | import com.google.common.io.Resources; 5 | import org.apache.calcite.schema.Table; 6 | import org.apache.calcite.schema.impl.AbstractSchema; 7 | import org.apache.calcite.util.Source; 8 | import org.apache.calcite.util.Sources; 9 | 10 | import java.net.URL; 11 | import java.util.Map; 12 | 13 | public class CsvSchema extends AbstractSchema { 14 | private Map tableMap; 15 | private String dataFiles; 16 | 17 | public CsvSchema(String dataFile) { 18 | this.dataFiles = dataFile; 19 | } 20 | 21 | @Override 22 | protected Map getTableMap() { 23 | final ImmutableMap.Builder builder = ImmutableMap.builder(); 24 | for (String dataFile : dataFiles.split(",")) { 25 | URL url = Resources.getResource(dataFile); 26 | Source source = Sources.of(url); 27 | builder.put(dataFile.split("\\.")[0], new CsvTable(source)); 28 | } 29 | // 一个数据库有多个表名,这里初始化,大小写要注意了 30 | tableMap = builder.build(); 31 | 32 | return tableMap; 33 | } 34 | } -------------------------------------------------------------------------------- /src/main/java/pers/shezm/calcite/csv/CsvSchemaFactory.java: -------------------------------------------------------------------------------- 1 | package pers.shezm.calcite.csv; 2 | 3 | import org.apache.calcite.schema.Schema; 4 | import org.apache.calcite.schema.SchemaFactory; 5 | import org.apache.calcite.schema.SchemaPlus; 6 | 7 | import java.util.Map; 8 | 9 | public class CsvSchemaFactory implements SchemaFactory { 10 | 11 | /** 12 | * parentSchema 他的父节点,一般为root 13 | * name 数据库的名字,它在model中定义的 14 | * operand 也是在mode中定义的,是Map类型,用于传入自定义参数。 15 | */ 16 | @Override 17 | public Schema create(SchemaPlus parentSchema, String name, Map operand) { 18 | return new CsvSchema(String.valueOf(operand.get("dataFile"))); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/main/java/pers/shezm/calcite/csv/CsvTable.java: -------------------------------------------------------------------------------- 1 | package pers.shezm.calcite.csv; 2 | 3 | import com.google.common.collect.Lists; 4 | import org.apache.calcite.DataContext; 5 | import org.apache.calcite.linq4j.AbstractEnumerable; 6 | import org.apache.calcite.linq4j.Enumerable; 7 | import org.apache.calcite.linq4j.Enumerator; 8 | import org.apache.calcite.rel.type.RelDataType; 9 | import org.apache.calcite.rel.type.RelDataTypeFactory; 10 | import org.apache.calcite.schema.ScannableTable; 11 | import org.apache.calcite.schema.impl.AbstractTable; 12 | import org.apache.calcite.sql.type.SqlTypeName; 13 | import org.apache.calcite.util.Pair; 14 | import org.apache.calcite.util.Source; 15 | 16 | import java.io.BufferedReader; 17 | import java.io.FileNotFoundException; 18 | import java.io.FileReader; 19 | import java.io.IOException; 20 | import java.util.List; 21 | 22 | public class CsvTable extends AbstractTable implements ScannableTable { 23 | private Source source; 24 | 25 | public CsvTable(Source source) { 26 | this.source = source; 27 | } 28 | 29 | /** 30 | * 获取字段类型 31 | */ 32 | @Override 33 | public RelDataType getRowType(RelDataTypeFactory relDataTypeFactory) { 34 | RelDataTypeFactory typeFactory = relDataTypeFactory; 35 | 36 | // JavaTypeFactory typeFactory = (JavaTypeFactory)relDataTypeFactory; 37 | 38 | List names = Lists.newLinkedList(); 39 | List types = Lists.newLinkedList(); 40 | 41 | try { 42 | BufferedReader reader = new BufferedReader(new FileReader(source.file())); 43 | String line = reader.readLine(); 44 | List lines = Lists.newArrayList(line.split(",")); 45 | lines.forEach(column -> { 46 | String name = column.split(":")[0]; 47 | String type = column.split(":")[1]; 48 | names.add(name); 49 | types.add(typeFactory.createSqlType(SqlTypeName.get(type))); 50 | }); 51 | 52 | } catch (FileNotFoundException e) { 53 | e.printStackTrace(); 54 | } catch (IOException e) { 55 | e.printStackTrace(); 56 | } 57 | 58 | return typeFactory.createStructType(Pair.zip(names, types)); 59 | } 60 | 61 | @Override 62 | public Enumerable scan(DataContext dataContext) { 63 | return new AbstractEnumerable() { 64 | @Override 65 | public Enumerator enumerator() { 66 | return new CsvEnumerator<>(source); 67 | } 68 | }; 69 | } 70 | } -------------------------------------------------------------------------------- /src/main/java/pers/shezm/calcite/optimizer/converter/CSVFilterConverter.java: -------------------------------------------------------------------------------- 1 | package pers.shezm.calcite.optimizer.converter; 2 | 3 | import org.apache.calcite.plan.Convention; 4 | import org.apache.calcite.plan.RelOptRuleCall; 5 | import org.apache.calcite.plan.RelTrait; 6 | import org.apache.calcite.plan.RelTraitSet; 7 | import org.apache.calcite.rel.RelDistributionTraitDef; 8 | import org.apache.calcite.rel.RelNode; 9 | import org.apache.calcite.rel.convert.ConverterRule; 10 | import org.apache.calcite.rel.logical.LogicalFilter; 11 | import pers.shezm.calcite.optimizer.reloperators.CSVFilter; 12 | import pers.shezm.calcite.optimizer.reloperators.CSVRel; 13 | 14 | public class CSVFilterConverter extends ConverterRule { 15 | 16 | public static final CSVFilterConverter INSTANCE = new CSVFilterConverter( 17 | LogicalFilter.class, 18 | Convention.NONE, 19 | CSVRel.CONVENTION, 20 | "CSVFilterConverter" 21 | ); 22 | public CSVFilterConverter(Class clazz, RelTrait in, RelTrait out, String description) { 23 | super(clazz, in, out, description); 24 | } 25 | 26 | /** 27 | * RelOptRuleCall 是专门用来被RelOptRule调用的,包含一个 RelNode 的集合 (Set)。 28 | * @param call 29 | * @return 30 | */ 31 | @Override 32 | public boolean matches(RelOptRuleCall call) { 33 | return super.matches(call); 34 | } 35 | 36 | @Override 37 | public RelNode convert(RelNode rel) { 38 | LogicalFilter filter = (LogicalFilter) rel; 39 | RelNode input = convert(filter.getInput(), filter.getInput().getTraitSet().replace(CSVRel.CONVENTION).simplify()); 40 | return new CSVFilter( 41 | filter.getCluster(), 42 | RelTraitSet.createEmpty().plus(CSVRel.CONVENTION).plus(RelDistributionTraitDef.INSTANCE.getDefault()), 43 | input, 44 | filter.getCondition() 45 | ); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/main/java/pers/shezm/calcite/optimizer/converter/CSVNewProjectConverter.java: -------------------------------------------------------------------------------- 1 | package pers.shezm.calcite.optimizer.converter; 2 | 3 | import org.apache.calcite.plan.Convention; 4 | import org.apache.calcite.plan.RelOptRuleCall; 5 | import org.apache.calcite.plan.RelTrait; 6 | import org.apache.calcite.plan.RelTraitSet; 7 | import org.apache.calcite.rel.RelDistributionTraitDef; 8 | import org.apache.calcite.rel.RelNode; 9 | import org.apache.calcite.rel.convert.ConverterRule; 10 | import org.apache.calcite.rel.logical.LogicalProject; 11 | import pers.shezm.calcite.optimizer.reloperators.CSVProject; 12 | import pers.shezm.calcite.optimizer.reloperators.CSVRel; 13 | import pers.shezm.calcite.optimizer.reloperators.NewCsvProject; 14 | 15 | public class CSVNewProjectConverter extends ConverterRule { 16 | 17 | public static final CSVNewProjectConverter INSTANCE = new CSVNewProjectConverter( 18 | LogicalProject.class, 19 | Convention.NONE, 20 | CSVRel.CONVENTION, 21 | "CSVNewProjectConverter" 22 | ); 23 | 24 | public CSVNewProjectConverter(Class clazz, RelTrait in, RelTrait out, String description) { 25 | super(clazz, in, out, description); 26 | } 27 | 28 | @Override 29 | public boolean matches(RelOptRuleCall call) { 30 | return super.matches(call); 31 | } 32 | 33 | @Override 34 | public RelNode convert(RelNode rel) { 35 | LogicalProject logicalProject = (LogicalProject) rel; 36 | RelNode input = convert(logicalProject.getInput(), logicalProject.getInput().getTraitSet().replace(CSVRel.CONVENTION).simplify()); 37 | return new NewCsvProject( 38 | logicalProject.getCluster(), 39 | RelTraitSet.createEmpty().plus(CSVRel.CONVENTION).plus(RelDistributionTraitDef.INSTANCE.getDefault()), 40 | input, 41 | logicalProject.getProjects(), 42 | logicalProject.getRowType() 43 | ); 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/main/java/pers/shezm/calcite/optimizer/converter/CSVNewProjectRule.java: -------------------------------------------------------------------------------- 1 | package pers.shezm.calcite.optimizer.converter; 2 | 3 | import org.apache.calcite.plan.RelOptRule; 4 | import org.apache.calcite.plan.RelOptRuleCall; 5 | import org.apache.calcite.plan.RelTraitSet; 6 | import org.apache.calcite.rel.RelDistributionTraitDef; 7 | import org.apache.calcite.rel.RelNode; 8 | import org.apache.calcite.rel.core.Project; 9 | import org.apache.calcite.rel.core.RelFactories; 10 | import org.apache.calcite.rel.type.RelDataType; 11 | import org.apache.calcite.rex.RexNode; 12 | import org.apache.calcite.rex.RexUtil; 13 | import org.apache.calcite.tools.RelBuilderFactory; 14 | import pers.shezm.calcite.optimizer.reloperators.CSVProject; 15 | import pers.shezm.calcite.optimizer.reloperators.CSVRel; 16 | import pers.shezm.calcite.optimizer.reloperators.NewCsvProject; 17 | 18 | import java.util.List; 19 | 20 | public class CSVNewProjectRule extends RelOptRule { 21 | public static final CSVNewProjectRule INSTANCE = 22 | new CSVNewProjectRule(RelFactories.LOGICAL_BUILDER); 23 | 24 | //~ Constructors ----------------------------------------------------------- 25 | 26 | /** 27 | * Creates a ProjectRemoveRule. 28 | * 29 | * @param relBuilderFactory Builder for relational expressions 30 | */ 31 | public CSVNewProjectRule(RelBuilderFactory relBuilderFactory) { 32 | // Create a specialized operand to detect non-matches early. This keeps 33 | // the rule queue short. 34 | super(operandJ(Project.class, null, CSVNewProjectRule::isTrivial, any()), 35 | relBuilderFactory, null); 36 | } 37 | 38 | //~ Methods ---------------------------------------------------------------- 39 | 40 | public void onMatch(RelOptRuleCall call) { 41 | Project project = call.rel(0); 42 | assert isTrivial(project); 43 | RelNode stripped = project.getInput(); 44 | 45 | if (project instanceof CSVProject) { 46 | CSVProject csvProject = (CSVProject) project; 47 | NewCsvProject newCsvProject = new NewCsvProject( 48 | csvProject.getCluster(), 49 | RelTraitSet.createEmpty().plus(CSVRel.CONVENTION).plus(RelDistributionTraitDef.INSTANCE.getDefault()), 50 | csvProject.getInput(), 51 | csvProject.getProjects(), 52 | csvProject.getRowType() 53 | ); 54 | 55 | call.transformTo(newCsvProject); 56 | } 57 | } 58 | 59 | /** 60 | * Returns the child of a project if the project is trivial, otherwise 61 | * the project itself. 62 | */ 63 | public static RelNode strip(Project project) { 64 | CSVProject csvProject = (CSVProject) project; 65 | RelNode input = convert(csvProject.getInput(), csvProject.getInput().getTraitSet().replace(CSVRel.CONVENTION).simplify()); 66 | return new NewCsvProject( 67 | csvProject.getCluster(), 68 | RelTraitSet.createEmpty().plus(CSVRel.CONVENTION).plus(RelDistributionTraitDef.INSTANCE.getDefault()), 69 | input, 70 | csvProject.getProjects(), 71 | csvProject.getRowType() 72 | ); 73 | } 74 | 75 | public static boolean isTrivial(Project project) { 76 | return RexUtil.isIdentity(project.getProjects(), 77 | project.getInput().getRowType()); 78 | } 79 | 80 | // @Deprecated // to be removed before 1.5 81 | // public static boolean isIdentity(List exps, 82 | // RelDataType childRowType) { 83 | // return RexUtil.isIdentity(exps, childRowType); 84 | // } 85 | } 86 | -------------------------------------------------------------------------------- /src/main/java/pers/shezm/calcite/optimizer/converter/CSVProjectConverter.java: -------------------------------------------------------------------------------- 1 | package pers.shezm.calcite.optimizer.converter; 2 | 3 | import org.apache.calcite.plan.Convention; 4 | import org.apache.calcite.plan.RelOptRuleCall; 5 | import org.apache.calcite.plan.RelTrait; 6 | import org.apache.calcite.plan.RelTraitSet; 7 | import org.apache.calcite.rel.RelDistributionTraitDef; 8 | import org.apache.calcite.rel.RelNode; 9 | import org.apache.calcite.rel.convert.ConverterRule; 10 | import org.apache.calcite.rel.logical.LogicalProject; 11 | import pers.shezm.calcite.optimizer.reloperators.CSVProject; 12 | import pers.shezm.calcite.optimizer.reloperators.CSVRel; 13 | 14 | public class CSVProjectConverter extends ConverterRule { 15 | 16 | public static final CSVProjectConverter INSTANCE = new CSVProjectConverter( 17 | LogicalProject.class, 18 | Convention.NONE, 19 | CSVRel.CONVENTION, 20 | "CSVProjectConverter" 21 | ); 22 | 23 | public CSVProjectConverter(Class clazz, RelTrait in, RelTrait out, String description) { 24 | super(clazz, in, out, description); 25 | } 26 | 27 | @Override 28 | public boolean matches(RelOptRuleCall call) { 29 | return super.matches(call); 30 | } 31 | 32 | @Override 33 | public RelNode convert(RelNode rel) { 34 | LogicalProject logicalProject = (LogicalProject) rel; 35 | RelNode input = convert(logicalProject.getInput(), logicalProject.getInput().getTraitSet().replace(CSVRel.CONVENTION).simplify()); 36 | return new CSVProject( 37 | logicalProject.getCluster(), 38 | RelTraitSet.createEmpty().plus(CSVRel.CONVENTION).plus(RelDistributionTraitDef.INSTANCE.getDefault()), 39 | input, 40 | logicalProject.getProjects(), 41 | logicalProject.getRowType() 42 | ); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/pers/shezm/calcite/optimizer/converter/CSVTableScanConverter.java: -------------------------------------------------------------------------------- 1 | package pers.shezm.calcite.optimizer.converter; 2 | 3 | import org.apache.calcite.plan.Convention; 4 | import org.apache.calcite.plan.RelOptRuleCall; 5 | import org.apache.calcite.plan.RelTrait; 6 | import org.apache.calcite.plan.RelTraitSet; 7 | import org.apache.calcite.rel.RelDistributionTraitDef; 8 | import org.apache.calcite.rel.RelNode; 9 | import org.apache.calcite.rel.convert.ConverterRule; 10 | import org.apache.calcite.rel.logical.LogicalTableScan; 11 | import pers.shezm.calcite.optimizer.reloperators.CSVRel; 12 | import pers.shezm.calcite.optimizer.reloperators.CSVTableScan; 13 | 14 | public class CSVTableScanConverter extends ConverterRule { 15 | 16 | public static final CSVTableScanConverter INSTANCE = new CSVTableScanConverter( 17 | LogicalTableScan.class, 18 | Convention.NONE, 19 | CSVRel.CONVENTION, 20 | "CSVTableScan" 21 | ); 22 | 23 | @Override 24 | public boolean matches(RelOptRuleCall call) { 25 | return super.matches(call); 26 | } 27 | 28 | public CSVTableScanConverter(Class clazz, RelTrait in, RelTrait out, String description) { 29 | super(clazz, in, out, description); 30 | } 31 | 32 | @Override 33 | public RelNode convert(RelNode rel) { 34 | LogicalTableScan tableScan = (LogicalTableScan) rel; 35 | return new CSVTableScan(tableScan.getCluster(), 36 | RelTraitSet.createEmpty().plus(CSVRel.CONVENTION).plus(RelDistributionTraitDef.INSTANCE.getDefault()), 37 | tableScan.getTable()); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/main/java/pers/shezm/calcite/optimizer/cost/CSVRelMdDistinctRowCount.java: -------------------------------------------------------------------------------- 1 | package pers.shezm.calcite.optimizer.cost; 2 | 3 | import com.google.common.collect.ImmutableList; 4 | import org.apache.calcite.rel.RelNode; 5 | import org.apache.calcite.rel.metadata.*; 6 | import org.apache.calcite.rex.RexNode; 7 | import org.apache.calcite.util.BuiltInMethod; 8 | import org.apache.calcite.util.ImmutableBitSet; 9 | 10 | /** 11 | * 其底层是实现了 MetadataHandler 接口,用于提供 distinctRwoCount 信息 12 | */ 13 | public class CSVRelMdDistinctRowCount extends RelMdDistinctRowCount { 14 | 15 | private static final CSVRelMdDistinctRowCount INSTANCE = 16 | new CSVRelMdDistinctRowCount(); 17 | 18 | public static final RelMetadataProvider SOURCE = ChainedRelMetadataProvider 19 | .of(ImmutableList.of( 20 | 21 | ReflectiveRelMetadataProvider.reflectiveSource( 22 | BuiltInMethod.DISTINCT_ROW_COUNT.method, INSTANCE), 23 | 24 | ReflectiveRelMetadataProvider.reflectiveSource( 25 | BuiltInMethod.CUMULATIVE_COST.method, INSTANCE))); 26 | 27 | @Override 28 | public Double getDistinctRowCount(RelNode rel, RelMetadataQuery mq, ImmutableBitSet groupKey, 29 | RexNode predicate) { 30 | 31 | // return getDistinctRowCount( rel, mq, groupKey, predicate); 32 | 33 | /* 34 | * For now use Calcite' default formulas for propagating NDVs up the Query 35 | * Tree. 36 | */ 37 | return super.getDistinctRowCount(rel, mq, groupKey, predicate); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/main/java/pers/shezm/calcite/optimizer/cost/CSVRelMdRowCount.java: -------------------------------------------------------------------------------- 1 | package pers.shezm.calcite.optimizer.cost; 2 | 3 | import org.apache.calcite.plan.RelOptUtil; 4 | import org.apache.calcite.rel.RelNode; 5 | import org.apache.calcite.rel.core.Filter; 6 | import org.apache.calcite.rel.core.Project; 7 | import org.apache.calcite.rel.metadata.*; 8 | import org.apache.calcite.rex.RexNode; 9 | import org.apache.calcite.util.BuiltInMethod; 10 | import pers.shezm.calcite.optimizer.reloperators.CSVProject; 11 | import pers.shezm.calcite.optimizer.reloperators.CSVTableScan; 12 | import pers.shezm.calcite.optimizer.reloperators.NewCsvProject; 13 | 14 | public class CSVRelMdRowCount extends RelMdRowCount { 15 | public MetadataDef getDef() { 16 | return BuiltInMetadata.RowCount.DEF; 17 | } 18 | 19 | public static final RelMetadataProvider SOURCE = ReflectiveRelMetadataProvider 20 | .reflectiveSource(BuiltInMethod.ROW_COUNT.method, new CSVRelMdRowCount()); 21 | 22 | @Override 23 | public Double getRowCount(RelNode rel, RelMetadataQuery mq) { 24 | return 1.0; 25 | } 26 | 27 | //以下两个方法用于测试 CBO 优化 28 | public Double getRowCount(NewCsvProject rel, RelMetadataQuery mq) { 29 | return 1.0; 30 | } 31 | 32 | public Double getRowCount(CSVProject rel, RelMetadataQuery mq) { 33 | return 1.0; 34 | } 35 | 36 | public Double getRowCount(Project rel, RelMetadataQuery mq) { 37 | return 2.0; 38 | } 39 | 40 | //实现自己的 cost 计算逻辑 41 | @Override 42 | public Double getRowCount(Filter rel, RelMetadataQuery mq) { 43 | return mq.getRowCount(rel.getInput()) / 5; 44 | // return 5.0; 45 | } 46 | 47 | 48 | public Double getRowCount(RelNode rel) { 49 | return 1.0; 50 | } 51 | 52 | public Double getRowCount(RelNode rel,RexNode rex,RelMetadataQuery mq) { 53 | return 1.0; 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /src/main/java/pers/shezm/calcite/optimizer/cost/DefaultRelMetadataProvider.java: -------------------------------------------------------------------------------- 1 | package pers.shezm.calcite.optimizer.cost; 2 | 3 | import com.google.common.collect.ImmutableList; 4 | import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider; 5 | import org.apache.calcite.rel.metadata.RelMetadataProvider; 6 | 7 | public class DefaultRelMetadataProvider { 8 | 9 | public RelMetadataProvider getMetadataProvider() { 10 | // Return MD provider 11 | return ChainedRelMetadataProvider.of(ImmutableList 12 | .of( 13 | CSVRelMdRowCount.SOURCE, 14 | CSVRelMdDistinctRowCount.SOURCE, 15 | org.apache.calcite.rel.metadata.DefaultRelMetadataProvider.INSTANCE 16 | ) 17 | ); 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /src/main/java/pers/shezm/calcite/optimizer/reloperators/CSVFilter.java: -------------------------------------------------------------------------------- 1 | package pers.shezm.calcite.optimizer.reloperators; 2 | 3 | import org.apache.calcite.plan.RelOptCluster; 4 | import org.apache.calcite.plan.RelOptCost; 5 | import org.apache.calcite.plan.RelOptPlanner; 6 | import org.apache.calcite.plan.RelTraitSet; 7 | import org.apache.calcite.rel.RelNode; 8 | import org.apache.calcite.rel.core.Filter; 9 | import org.apache.calcite.rel.metadata.RelMetadataQuery; 10 | import org.apache.calcite.rex.RexNode; 11 | 12 | public class CSVFilter extends Filter implements CSVRel { 13 | private RelOptCost cost; 14 | 15 | public CSVFilter(RelOptCluster cluster, RelTraitSet traits, RelNode child, RexNode condition) { 16 | super(cluster, traits, child, condition); 17 | } 18 | 19 | @Override 20 | public Filter copy(RelTraitSet traitSet, RelNode input, RexNode condition) { 21 | return new CSVFilter(getCluster(),this.traitSet,input,condition); 22 | } 23 | 24 | 25 | @Override 26 | public RelOptCost computeSelfCost(RelOptPlanner planner, RelMetadataQuery mq) { 27 | RelNode input = this.input; 28 | 29 | // RelOptCost inputCost; 30 | // if (input instanceof RelSubset) { 31 | // inputCost = ((RelSubset) input). 32 | // } 33 | // 34 | // RelOptCost inputCost = mq.getCumulativeCost(this.input); 35 | 36 | 37 | //return mq.getCumulativeCost(this); 38 | //return VolcanoCost.FACTORY.makeZeroCost(); 39 | double dRows = mq.getRowCount(this); 40 | // double dCpu = mq.getRowCount(getInput()); 41 | double dCpu = dRows; 42 | 43 | double dIo = 0; 44 | // return planner.getCostFactory().makeCost(dRows, dCpu, dIo); 45 | return planner.getCostFactory().makeCost(2, 2, 2); 46 | 47 | 48 | } 49 | 50 | @Override 51 | public double estimateRowCount(RelMetadataQuery mq) { 52 | return 12; 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/main/java/pers/shezm/calcite/optimizer/reloperators/CSVProject.java: -------------------------------------------------------------------------------- 1 | package pers.shezm.calcite.optimizer.reloperators; 2 | 3 | import org.apache.calcite.plan.RelOptCluster; 4 | import org.apache.calcite.plan.RelOptCost; 5 | import org.apache.calcite.plan.RelOptPlanner; 6 | import org.apache.calcite.plan.RelTraitSet; 7 | import org.apache.calcite.rel.RelNode; 8 | import org.apache.calcite.rel.core.Project; 9 | import org.apache.calcite.rel.metadata.RelMetadataQuery; 10 | import org.apache.calcite.rel.type.RelDataType; 11 | import org.apache.calcite.rex.RexNode; 12 | 13 | import java.util.List; 14 | 15 | public class CSVProject extends Project implements CSVRel { 16 | private RelOptCost cost; 17 | 18 | public CSVProject(RelOptCluster cluster, RelTraitSet traits, RelNode input, List projects, RelDataType rowType) { 19 | super(cluster, traits, input, projects, rowType); 20 | } 21 | 22 | @Override 23 | public Project copy(RelTraitSet traitSet, RelNode input, List projects, RelDataType rowType) { 24 | return new CSVProject(getCluster(),traitSet,input,projects,rowType); 25 | } 26 | 27 | @Override 28 | public RelOptCost computeSelfCost(RelOptPlanner planner, RelMetadataQuery mq) { 29 | double dRows = mq.getRowCount(getInput()); 30 | 31 | double dCpu = dRows * exps.size(); 32 | double dIo = 0; 33 | return planner.getCostFactory().makeCost(30, 30, 30); 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/main/java/pers/shezm/calcite/optimizer/reloperators/CSVRel.java: -------------------------------------------------------------------------------- 1 | package pers.shezm.calcite.optimizer.reloperators; 2 | 3 | import org.apache.calcite.plan.Convention; 4 | import org.apache.calcite.rel.RelNode; 5 | 6 | public interface CSVRel extends RelNode { 7 | Convention CONVENTION = new Convention.Impl("CSV", CSVRel.class); 8 | } 9 | -------------------------------------------------------------------------------- /src/main/java/pers/shezm/calcite/optimizer/reloperators/CSVTableScan.java: -------------------------------------------------------------------------------- 1 | package pers.shezm.calcite.optimizer.reloperators; 2 | 3 | import org.apache.calcite.plan.*; 4 | import org.apache.calcite.rel.core.TableScan; 5 | import org.apache.calcite.rel.metadata.RelMetadataQuery; 6 | 7 | public class CSVTableScan extends TableScan implements CSVRel { 8 | private RelOptCost cost; 9 | /** 10 | * RelOptCluster:palnner 运行时的环境,保存上下文信息 11 | * RelTrait:用来定义逻辑表的物理相关属性(physical property),三种主要的 trait 类型是:Convention、RelCollation、RelDistribution; 12 | * RelOpt:代表关系表 13 | */ 14 | public CSVTableScan(RelOptCluster cluster, RelTraitSet traitSet, RelOptTable table) { 15 | super(cluster, traitSet, table); 16 | } 17 | 18 | @Override public double estimateRowCount(RelMetadataQuery mq) { 19 | return 50; 20 | } 21 | 22 | @Override 23 | public RelOptCost computeSelfCost(RelOptPlanner planner, RelMetadataQuery mq) { 24 | //return super.computeSelfCo(planner, mq); 25 | 26 | if (cost != null) { 27 | return cost; 28 | } 29 | //通过工厂生成 RelOptCost ,注入自定义 cost 值并返回 30 | cost = planner.getCostFactory().makeCost(1, 1, 0); 31 | return cost; 32 | } 33 | 34 | 35 | } 36 | -------------------------------------------------------------------------------- /src/main/java/pers/shezm/calcite/optimizer/reloperators/NewCsvProject.java: -------------------------------------------------------------------------------- 1 | package pers.shezm.calcite.optimizer.reloperators; 2 | 3 | import org.apache.calcite.plan.RelOptCluster; 4 | import org.apache.calcite.plan.RelOptCost; 5 | import org.apache.calcite.plan.RelOptPlanner; 6 | import org.apache.calcite.plan.RelTraitSet; 7 | import org.apache.calcite.rel.RelNode; 8 | import org.apache.calcite.rel.core.Project; 9 | import org.apache.calcite.rel.metadata.RelMetadataQuery; 10 | import org.apache.calcite.rel.type.RelDataType; 11 | import org.apache.calcite.rex.RexNode; 12 | 13 | import java.util.List; 14 | 15 | public class NewCsvProject extends Project implements CSVRel { 16 | private RelOptCost cost; 17 | 18 | public NewCsvProject(RelOptCluster cluster, RelTraitSet traits, RelNode input, List projects, RelDataType rowType) { 19 | super(cluster, traits, input, projects, rowType); 20 | } 21 | 22 | @Override 23 | public Project copy(RelTraitSet traitSet, RelNode input, List projects, RelDataType rowType) { 24 | return new NewCsvProject(getCluster(),traitSet,input,projects,rowType); 25 | } 26 | 27 | @Override 28 | public RelOptCost computeSelfCost(RelOptPlanner planner, RelMetadataQuery mq) { 29 | double dRows = mq.getRowCount(getInput()); 30 | 31 | double dCpu = dRows * exps.size(); 32 | double dIo = 0; 33 | // return planner.getCostFactory().makeCost(dRows, dCpu, dIo); 34 | //返回不同的Cost,CBO的结果不一样 35 | return planner.getCostFactory().makeCost(10, 10, 0); 36 | // return planner.getCostFactory().makeCost(40, 40, 0); 37 | 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/main/java/pers/shezm/calcite/test/Test1.java: -------------------------------------------------------------------------------- 1 | package pers.shezm.calcite.test; 2 | 3 | import com.alibaba.fastjson.JSON; 4 | import com.alibaba.fastjson.serializer.SerializerFeature; 5 | import pers.shezm.calcite.utils.CalciteUtil; 6 | 7 | import java.sql.*; 8 | 9 | /** 10 | * 使用自定义的 csv 源,查询数据 11 | */ 12 | public class Test1 { 13 | public static void main(String[] args) { 14 | String filePath = "/model.json"; 15 | Connection connection = null; 16 | Statement statement = null; 17 | try { 18 | connection = CalciteUtil.getConnect(filePath); 19 | statement = connection.createStatement(); 20 | String[] strArray = { 21 | "select * from TEST_CSV.TEST02", 22 | "select NAME3,count(*) as num from TEST_CSV.TEST02 group by NAME3", 23 | "select * from TEST_CSV.TEST01 as t1 left join TEST_CSV.TEST02 as t2 on t1.NAME1=t2.NAME3" 24 | }; 25 | 26 | for (String sql : strArray) { 27 | 28 | ResultSet resultSet = statement.executeQuery(sql); 29 | System.out.println("------------------------- " + 30 | "start sql" 31 | + " ------------------------- "); 32 | String pretty = JSON.toJSONString(CalciteUtil.getData(resultSet), 33 | SerializerFeature.PrettyFormat, 34 | SerializerFeature.WriteMapNullValue, 35 | SerializerFeature.WriteDateUseDateFormat); 36 | System.out.println(pretty); 37 | System.out.println("------------------------- " + 38 | "end sql" 39 | + " ------------------------- "); 40 | } 41 | 42 | } catch (Exception e) { 43 | e.printStackTrace(); 44 | }finally { 45 | try { 46 | statement.close(); 47 | connection.close(); 48 | }catch (SQLException e){ 49 | e.printStackTrace(); 50 | } 51 | } 52 | } 53 | 54 | 55 | } 56 | -------------------------------------------------------------------------------- /src/main/java/pers/shezm/calcite/test/Test2.java: -------------------------------------------------------------------------------- 1 | package pers.shezm.calcite.test; 2 | 3 | import org.apache.calcite.plan.ConventionTraitDef; 4 | import org.apache.calcite.rel.RelDistributionTraitDef; 5 | import pers.shezm.calcite.utils.CalciteUtil; 6 | import org.apache.calcite.config.Lex; 7 | import org.apache.calcite.jdbc.CalcitePrepare; 8 | import org.apache.calcite.plan.RelOptTable; 9 | import org.apache.calcite.rel.RelNode; 10 | import org.apache.calcite.rel.RelRoot; 11 | import org.apache.calcite.server.CalciteServerStatement; 12 | import org.apache.calcite.sql.SqlNode; 13 | import org.apache.calcite.sql.parser.SqlParser; 14 | import org.apache.calcite.tools.FrameworkConfig; 15 | import org.apache.calcite.tools.Frameworks; 16 | import org.apache.calcite.tools.Planner; 17 | 18 | import java.sql.Connection; 19 | import java.sql.SQLException; 20 | import java.util.LinkedList; 21 | import java.util.Queue; 22 | 23 | /** 24 | * 使用自定义的 csv 源,解析校验 SQL ,遍历 RelNode 树,统计其中一些信息并打印 25 | */ 26 | public class Test2 { 27 | 28 | public static void main(String[] args) { 29 | String sql = "select * from TEST_CSV.TEST01 as t1 left join TEST_CSV.TEST02 as t2 on t1.NAME1=t2.NAME3"; 30 | String filePath = "/model.json"; 31 | Connection connection = null; 32 | try { 33 | connection = CalciteUtil.getConnect(filePath); 34 | RelRoot root = Test2.genRelRoot(connection, sql); 35 | Test2.printSqlInfo(root.rel); 36 | } catch (Exception e) { 37 | e.printStackTrace(); 38 | } finally { 39 | try { 40 | connection.close(); 41 | }catch (SQLException e){ 42 | e.printStackTrace(); 43 | } 44 | } 45 | } 46 | 47 | 48 | /** 49 | * Planner解析,校验,然后生成RelNode,使用mysql的sql语法格式 50 | * 51 | * @param connection 52 | * @param sql 53 | * @return 54 | * @throws Exception 参考自:https://zhuanlan.zhihu.com/p/65345335 55 | */ 56 | public static RelRoot genRelRoot(Connection connection, String sql) throws Exception { 57 | //从 conn 中获取相关的环境和配置,生成对应配置 58 | CalciteServerStatement st = connection.createStatement().unwrap(CalciteServerStatement.class); 59 | CalcitePrepare.Context prepareContext = st.createPrepareContext(); 60 | final FrameworkConfig config = Frameworks.newConfigBuilder() 61 | .parserConfig(SqlParser.configBuilder().setLex(Lex.MYSQL).build()) 62 | .defaultSchema(prepareContext.getRootSchema().plus()) 63 | // .traitDefs(ConventionTraitDef.INSTANCE, RelDistributionTraitDef.INSTANCE) 64 | .build(); 65 | Planner planner = Frameworks.getPlanner(config); 66 | RelRoot root = null; 67 | try { 68 | SqlNode parse1 = planner.parse(sql); 69 | SqlNode validate = planner.validate(parse1); 70 | root = planner.rel(validate); 71 | RelNode rel = root.rel; 72 | } catch (Exception e) { 73 | e.printStackTrace(); 74 | } 75 | 76 | return root; 77 | } 78 | 79 | /** 80 | * 遍历 RelNode 并打印一些节点统计信息 81 | * 82 | * @param rel 83 | */ 84 | public static void printSqlInfo(RelNode rel) { 85 | Queue relNodeQueue = new LinkedList(); 86 | relNodeQueue.offer(rel); 87 | /** 88 | * RelNode类型: 89 | * TableScan(获取表信息,列信息) 90 | * 一元节点: 91 | * LogicalJoin 92 | * Sort 93 | * GROUP BY 94 | * ...... 95 | * 96 | * 二元信息: 97 | * LogicalJoin 98 | * Union 99 | * ...... 100 | * 101 | */ 102 | int joinCount = 0; 103 | int aggregateCount = 0; 104 | //层次遍历树并获取信息 105 | while (relNodeQueue.size() != 0) { 106 | int inputNum = relNodeQueue.size(); 107 | for (int i = 0; i < inputNum; i++) { 108 | RelNode tem = relNodeQueue.poll(); 109 | for (RelNode r : tem.getInputs()) { 110 | relNodeQueue.offer(r); 111 | } 112 | if (tem.getRelTypeName().contains("Join")) { 113 | joinCount += 1; 114 | } 115 | if (tem.getRelTypeName().contains("Aggregate")) { 116 | aggregateCount += 1; 117 | } 118 | //print table info 119 | if (tem.getTable() != null) { 120 | RelOptTable rtable = tem.getTable(); 121 | System.out.println("------------------ table " + rtable.getQualifiedName() + " scan info: ------------------"); 122 | System.out.println("row name and type : " + rtable.getRowType()); 123 | System.out.println("distribution info : " + rtable.getDistribution()); //由 RelDistribution 的类型决定 124 | System.out.println("columns strategies : " + rtable.getColumnStrategies()); 125 | System.out.println("------------------end table " + rtable.getQualifiedName() + " ------------------"); 126 | } 127 | // RelMetadataQuery mq = rel.getCluster().getMetadataQuery(); 128 | } 129 | } 130 | //print sql info 131 | System.out.println("Join num is : " + joinCount); 132 | System.out.println("Aggregate num is : " + joinCount); 133 | 134 | // System.out.println("After------------------"); 135 | 136 | } 137 | 138 | } 139 | -------------------------------------------------------------------------------- /src/main/java/pers/shezm/calcite/test/Test3.java: -------------------------------------------------------------------------------- 1 | package pers.shezm.calcite.test; 2 | 3 | import org.apache.calcite.plan.RelOptUtil; 4 | import org.apache.calcite.plan.hep.HepPlanner; 5 | import org.apache.calcite.plan.hep.HepProgramBuilder; 6 | import org.apache.calcite.rel.RelNode; 7 | import org.apache.calcite.rel.RelRoot; 8 | import org.apache.calcite.rel.rules.FilterJoinRule; 9 | import org.apache.calcite.sql.SqlExplainLevel; 10 | import pers.shezm.calcite.utils.CalciteUtil; 11 | 12 | import java.sql.Connection; 13 | import java.sql.SQLException; 14 | 15 | /** 16 | * 打印 SQL 的 Cost 信息,添加 Calcite 的优化器后,RelNode 的信息 17 | */ 18 | public class Test3 { 19 | public static void main(String[] args) { 20 | String sql = "select * from TEST_CSV.TEST01 as t1 left join TEST_CSV.TEST02 as t2 " + 21 | "on t1.NAME1=t2.NAME3 " + 22 | "where t1.NAME1='hello'"; 23 | String filePath = "/model.json"; 24 | Connection connection = null; 25 | try { 26 | connection = CalciteUtil.getConnect(filePath); 27 | RelRoot root = Test2.genRelRoot(connection, sql); 28 | Test3.optimize(root.rel); 29 | } catch (Exception e) { 30 | e.printStackTrace(); 31 | } finally { 32 | try { 33 | connection.close(); 34 | }catch (SQLException e){ 35 | e.printStackTrace(); 36 | } 37 | } 38 | } 39 | 40 | /** 41 | * 构建一个优化器,添加 FILTER_ON_JOIN 这个优化 rule,然后进行优化,打印优化前后对比,以及对应的 cost 信息 42 | * 在 Calcite 中,树上层的 Cost 信息都是通过下层的信息计算而来,所以最重要的就是底层 TableSacn 的 Cost 信息 43 | * 而默认的 rowcount 是 100,CPU 是 101 44 | * 可寻迹 Calcite 源码: TableScan.computeSelfCost(xxx) -> RelOptAbstractTable.getRowCount() 查看 45 | * @param rel 46 | * @return 47 | */ 48 | public static RelNode optimize(RelNode rel){ 49 | System.out.println("----------------- before optimizer ------------------"); 50 | System.out.println(RelOptUtil.toString(rel, SqlExplainLevel.ALL_ATTRIBUTES)); 51 | HepProgramBuilder builder = new HepProgramBuilder(); 52 | builder.addRuleInstance(FilterJoinRule.FilterIntoJoinRule.FILTER_ON_JOIN); //note: 添加 rule 53 | HepPlanner hepPlanner = new HepPlanner(builder.build()); //同时也可以仿照 FilterIntoJoinRule 这个类实现自己的优化 rule 54 | hepPlanner.setRoot(rel); 55 | rel = hepPlanner.findBestExp(); 56 | System.out.println("----------------- after optimizer ------------------"); 57 | 58 | System.out.println(RelOptUtil.toString(rel, SqlExplainLevel.ALL_ATTRIBUTES)); 59 | 60 | return rel; 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /src/main/java/pers/shezm/calcite/test/Test4.java: -------------------------------------------------------------------------------- 1 | package pers.shezm.calcite.test; 2 | 3 | import org.apache.calcite.plan.RelOptUtil; 4 | import org.apache.calcite.plan.hep.HepPlanner; 5 | import org.apache.calcite.plan.hep.HepProgramBuilder; 6 | import org.apache.calcite.rel.RelNode; 7 | import org.apache.calcite.rel.RelRoot; 8 | import org.apache.calcite.sql.SqlExplainLevel; 9 | import pers.shezm.calcite.optimizer.converter.CSVFilterConverter; 10 | import pers.shezm.calcite.optimizer.converter.CSVNewProjectConverter; 11 | import pers.shezm.calcite.optimizer.converter.CSVProjectConverter; 12 | import pers.shezm.calcite.optimizer.converter.CSVTableScanConverter; 13 | import pers.shezm.calcite.utils.CalciteUtil; 14 | 15 | import java.sql.Connection; 16 | import java.sql.SQLException; 17 | 18 | /** 19 | * 要实现优化器,可能需要 cost 信息,包括 rowcount,cpu,io 等,Calcite 默认实现比较粗糙 20 | * 且有些组件会维护自己的元数据信息(比如 hive),那么就需要修改底层的 RelNode 的 Cost 计算逻辑,注入自己的元数据信息 21 | * 有两种方式,这里介绍第一种,直接转换成自定义的 RelNode 22 | */ 23 | public class Test4 { 24 | public static void main(String[] args) { 25 | String sql = "select * from TEST_CSV.TEST01 as t1 left join TEST_CSV.TEST02 as t2 " + 26 | "on t1.NAME1=t2.NAME3 " + 27 | "where t1.NAME1='hello'"; 28 | sql = "select * from TEST_CSV.TEST01 where TEST01.NAME1='hello'"; 29 | 30 | String filePath = "/model.json"; 31 | Connection connection = null; 32 | try { 33 | connection = CalciteUtil.getConnect(filePath); 34 | RelRoot root = Test2.genRelRoot(connection, sql); 35 | Test4.optimize(root.rel); 36 | } catch (Exception e) { 37 | e.printStackTrace(); 38 | } finally { 39 | try { 40 | connection.close(); 41 | }catch (SQLException e){ 42 | e.printStackTrace(); 43 | } 44 | } 45 | } 46 | 47 | /** Calcite RBO 优化器执行顺序 48 | * 先按照 rule 的顺序(比如我这里有三个 converter),每条 rule 都会去 applyrules。 49 | * 50 | * applyrules就会去遍历整颗 Rel 树(每个都会包装成HepRelVertex),每个节点都尝试match,并且是自上而下match。match 了就会生成一个HepRuleCall。然后会 apply对应的 transform。 51 | * @param rel 52 | * @return 53 | */ 54 | public static RelNode optimize(RelNode rel){ 55 | System.out.println("----------------- before optimizer ------------------"); 56 | System.out.println(RelOptUtil.toString(rel, SqlExplainLevel.ALL_ATTRIBUTES)); 57 | 58 | HepProgramBuilder builder = new HepProgramBuilder(); 59 | builder.addRuleInstance(CSVTableScanConverter.INSTANCE); 60 | builder.addRuleInstance(CSVFilterConverter.INSTANCE); 61 | builder.addRuleInstance(CSVProjectConverter.INSTANCE); 62 | 63 | HepPlanner hepPlanner = new HepPlanner(builder.build()); //同时也可以仿照 FilterIntoJoinRule 这个类实现自己的优化 rule 64 | hepPlanner.setRoot(rel); 65 | rel = hepPlanner.findBestExp(); 66 | System.out.println("----------------- after optimizer ------------------"); 67 | 68 | System.out.println(RelOptUtil.toString(rel, SqlExplainLevel.ALL_ATTRIBUTES)); 69 | 70 | return rel; 71 | } 72 | 73 | 74 | 75 | } 76 | -------------------------------------------------------------------------------- /src/main/java/pers/shezm/calcite/test/Test5.java: -------------------------------------------------------------------------------- 1 | package pers.shezm.calcite.test; 2 | 3 | import com.google.common.collect.ImmutableList; 4 | import com.google.common.collect.Lists; 5 | import org.apache.calcite.plan.RelOptPlanner; 6 | import org.apache.calcite.plan.RelOptRule; 7 | import org.apache.calcite.plan.RelOptUtil; 8 | import org.apache.calcite.plan.hep.HepMatchOrder; 9 | import org.apache.calcite.plan.hep.HepPlanner; 10 | import org.apache.calcite.plan.hep.HepProgramBuilder; 11 | import org.apache.calcite.rel.RelNode; 12 | import org.apache.calcite.rel.RelRoot; 13 | import org.apache.calcite.rel.core.Filter; 14 | import org.apache.calcite.rel.metadata.*; 15 | import org.apache.calcite.sql.SqlExplainLevel; 16 | import pers.shezm.calcite.optimizer.converter.CSVTableScanConverter; 17 | import pers.shezm.calcite.optimizer.cost.DefaultRelMetadataProvider; 18 | import pers.shezm.calcite.utils.CalciteUtil; 19 | 20 | import java.sql.Connection; 21 | import java.sql.SQLException; 22 | import java.util.List; 23 | 24 | /** 25 | * 通过 MetadataProvider 的方式,并实现相关的 MetadataHandler,最终实现自己计算 cost 的逻辑 26 | * 不过需要将 RelNode 转换成自己实现的 RelNode,才能实现注入(比如CSVTableScan) 27 | */ 28 | public class Test5 { 29 | public static void main(String[] args) { 30 | String sql = "select * from TEST_CSV.TEST01 where TEST01.NAME1='hello'"; 31 | 32 | String filePath = "/model.json"; 33 | Connection connection = null; 34 | try { 35 | connection = CalciteUtil.getConnect(filePath); 36 | RelRoot root = Test2.genRelRoot(connection, sql); 37 | System.out.println("----------------- before optimizer ------------------"); 38 | System.out.println(RelOptUtil.toString(root.rel, SqlExplainLevel.ALL_ATTRIBUTES)); 39 | 40 | DefaultRelMetadataProvider defaultRelMetadataProvider = new DefaultRelMetadataProvider(); 41 | defaultRelMetadataProvider.getMetadataProvider(); 42 | RelNode rel = Test5.hepPlan(root.rel,false,defaultRelMetadataProvider.getMetadataProvider(),null,null, CSVTableScanConverter.INSTANCE); 43 | 44 | System.out.println("----------------- after optimizer ------------------"); 45 | /**这里修改了 TableScan 到 Filter 的 rowcount 的计算逻辑, 46 | * 详见 {@link pers.shezm.calcite.optimizer.cost.CSVRelMdRowCount#getRowCount(Filter rel, RelMetadataQuery mq) }*/ 47 | System.out.println(RelOptUtil.toString(rel, SqlExplainLevel.ALL_ATTRIBUTES)); 48 | 49 | } catch (Exception e) { 50 | e.printStackTrace(); 51 | } finally { 52 | try { 53 | connection.close(); 54 | }catch (SQLException e){ 55 | e.printStackTrace(); 56 | } 57 | } 58 | } 59 | 60 | 61 | /** 62 | * Run the HEP Planner with the given rule set. 63 | * 64 | * @param basePlan 65 | * @param followPlanChanges 66 | * @param mdProvider 67 | * @param executorProvider 68 | * @param order 69 | * @param rules 70 | * @return optimized RelNode 71 | */ 72 | public static RelNode hepPlan(RelNode basePlan, boolean followPlanChanges, 73 | RelMetadataProvider mdProvider, RelOptPlanner.Executor executorProvider, HepMatchOrder order, 74 | RelOptRule... rules) { 75 | 76 | RelNode optimizedRelNode = basePlan; 77 | HepProgramBuilder programBuilder = new HepProgramBuilder(); 78 | if (followPlanChanges) { 79 | programBuilder.addMatchOrder(order); 80 | programBuilder = programBuilder.addRuleCollection(ImmutableList.copyOf(rules)); 81 | } else { 82 | // TODO: Should this be also TOP_DOWN? 83 | for (RelOptRule r : rules) 84 | programBuilder.addRuleInstance(r); 85 | } 86 | 87 | // Create planner and copy context 88 | HepPlanner planner = new HepPlanner(programBuilder.build(), 89 | basePlan.getCluster().getPlanner().getContext()); 90 | 91 | List list = Lists.newArrayList(); 92 | list.add(mdProvider); 93 | planner.registerMetadataProviders(list); 94 | RelMetadataProvider chainedProvider = ChainedRelMetadataProvider.of(list); 95 | basePlan.getCluster().setMetadataProvider( 96 | new CachingRelMetadataProvider(chainedProvider, planner)); 97 | 98 | if (executorProvider != null) { 99 | basePlan.getCluster().getPlanner().setExecutor(executorProvider); 100 | } 101 | planner.setRoot(basePlan); 102 | optimizedRelNode = planner.findBestExp(); 103 | 104 | return optimizedRelNode; 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /src/main/java/pers/shezm/calcite/test/Test6.java: -------------------------------------------------------------------------------- 1 | package pers.shezm.calcite.test; 2 | 3 | import com.google.common.collect.Lists; 4 | import org.apache.calcite.config.Lex; 5 | import org.apache.calcite.jdbc.CalcitePrepare; 6 | import org.apache.calcite.plan.*; 7 | import org.apache.calcite.plan.volcano.VolcanoPlanner; 8 | import org.apache.calcite.rel.RelNode; 9 | import org.apache.calcite.rel.RelRoot; 10 | import org.apache.calcite.rel.core.Filter; 11 | import org.apache.calcite.rel.core.Join; 12 | import org.apache.calcite.rel.core.RelFactories; 13 | import org.apache.calcite.rel.metadata.CachingRelMetadataProvider; 14 | import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider; 15 | import org.apache.calcite.rel.metadata.RelMetadataProvider; 16 | import org.apache.calcite.rel.rules.FilterJoinRule; 17 | import org.apache.calcite.rel.type.RelDataTypeFactory; 18 | import org.apache.calcite.rex.RexBuilder; 19 | import org.apache.calcite.server.CalciteServerStatement; 20 | import org.apache.calcite.sql.SqlExplainLevel; 21 | import org.apache.calcite.sql.SqlNode; 22 | import org.apache.calcite.sql.parser.SqlParser; 23 | import org.apache.calcite.sql.type.SqlTypeFactoryImpl; 24 | import org.apache.calcite.sql2rel.RelDecorrelator; 25 | import org.apache.calcite.tools.*; 26 | import pers.shezm.calcite.optimizer.converter.*; 27 | import pers.shezm.calcite.optimizer.cost.DefaultRelMetadataProvider; 28 | import pers.shezm.calcite.utils.CalciteUtil; 29 | 30 | import java.sql.Connection; 31 | import java.sql.SQLException; 32 | import java.util.List; 33 | 34 | import static org.apache.calcite.rel.core.RelFactories.DEFAULT_STRUCT; 35 | 36 | /** 37 | * 对比 RBO 和 CBO,在 Calcite 中,对应的是 HepPlanner 和 VolcanoPlanner。 38 | * 遇到多个可匹配 rule 的时候,HepPlanner 会按照顺序进行匹配生成 RelNode。而 VolcanoPlanner 会根据最终的 Cost 生成 RelNode。 39 | */ 40 | public class Test6 { 41 | public static void main(String[] args) { 42 | String sql = "select * from TEST_CSV.TEST01 where TEST01.NAME1='hello'"; 43 | 44 | String filePath = "/model.json"; 45 | Connection connection = null; 46 | try { 47 | connection = CalciteUtil.getConnect(filePath); 48 | RelRoot root = Test2.genRelRoot(connection, sql); 49 | System.out.println("----------------- before optimizer ------------------"); 50 | System.out.println(RelOptUtil.toString(root.rel, SqlExplainLevel.ALL_ATTRIBUTES)); 51 | 52 | DefaultRelMetadataProvider defaultRelMetadataProvider = new DefaultRelMetadataProvider(); 53 | defaultRelMetadataProvider.getMetadataProvider(); 54 | RelNode rel = Test5.hepPlan(root.rel, 55 | false, 56 | defaultRelMetadataProvider.getMetadataProvider(), 57 | null,null, 58 | CSVTableScanConverter.INSTANCE, 59 | CSVFilterConverter.INSTANCE, 60 | CSVProjectConverter.INSTANCE, 61 | CSVNewProjectConverter.INSTANCE); 62 | 63 | System.out.println("----------------- after RBO optimizer 1------------------"); 64 | System.out.println(RelOptUtil.toString(rel, SqlExplainLevel.ALL_ATTRIBUTES)); 65 | 66 | //将最后两个 rule 改变一下顺序,会发现结果的顺序也改变了,说明 RBO 只会简单得遍历 rule 然后应用 67 | RelNode rel1 = Test5.hepPlan(root.rel, 68 | false, 69 | defaultRelMetadataProvider.getMetadataProvider(), 70 | null,null, 71 | CSVTableScanConverter.INSTANCE, 72 | CSVFilterConverter.INSTANCE, 73 | CSVNewProjectConverter.INSTANCE, 74 | CSVProjectConverter.INSTANCE); 75 | System.out.println("----------------- after RBO optimizer 2------------------"); 76 | System.out.println(RelOptUtil.toString(rel1, SqlExplainLevel.ALL_ATTRIBUTES)); 77 | 78 | //这里的 rule 是替换 CsvProject 为 NewCsvProject,是否替换会根据 cumulative cost 的信息,谁的小就替换谁的 79 | //我直接在对应的 rel 里面写死了返回的 cost 信息(rows:10,cpu:10,io:0),如果调高一点(高过 CsvProject 的定义),那么是不会替换的 80 | rel = CBOOptimizer(rel, 81 | CSVNewProjectRule.INSTANCE 82 | ); 83 | System.out.println("----------------- after CBO optimizer ------------------"); 84 | System.out.println(RelOptUtil.toString(rel, SqlExplainLevel.ALL_ATTRIBUTES)); 85 | 86 | 87 | } catch (Exception e) { 88 | e.printStackTrace(); 89 | } finally { 90 | try { 91 | connection.close(); 92 | }catch (SQLException e){ 93 | e.printStackTrace(); 94 | } 95 | } 96 | } 97 | 98 | 99 | public static class FilterIntoJoinRule extends FilterJoinRule { 100 | public FilterIntoJoinRule(boolean smart, 101 | RelBuilderFactory relBuilderFactory, Predicate predicate) { 102 | super( 103 | operand(Filter.class, 104 | operand(Join.class, RelOptRule.any())), 105 | "FilterJoinRule:filter", smart, relBuilderFactory, 106 | predicate); 107 | } 108 | 109 | @Deprecated // to be removed before 2.0 110 | public FilterIntoJoinRule(boolean smart, 111 | RelFactories.FilterFactory filterFactory, 112 | RelFactories.ProjectFactory projectFactory, 113 | Predicate predicate) { 114 | this(smart, RelBuilder.proto(filterFactory, projectFactory), predicate); 115 | } 116 | 117 | @Override public void onMatch(RelOptRuleCall call) { 118 | Filter filter = call.rel(0); 119 | Join join = call.rel(1); 120 | perform(call, filter, join); 121 | } 122 | } 123 | 124 | public static RelNode CBOOptimizer(RelNode rel, RelOptRule... rules){ 125 | // rel.getCluster() 126 | VolcanoPlanner planner = (VolcanoPlanner) rel.getCluster().getPlanner(); 127 | //VolcanoPlanner 默认带有很多的优化 rule,其中有一个 ProjectRemoveRule 会消除掉 Project,故先 clear 128 | planner.clear(); 129 | planner.addRelTraitDef(ConventionTraitDef.INSTANCE); 130 | 131 | //由于火山模型用的 RelMetadataProvider 是 VolcanoRelMetadataProvider ,所以哪怕这里注入了我们自定义的 RelMetadataProvider 也不会生效 132 | // List list = Lists.newArrayList(); 133 | // DefaultRelMetadataProvider mdProvider = new DefaultRelMetadataProvider(); 134 | // list.add(mdProvider.getMetadataProvider()); 135 | // planner.registerMetadataProviders(list); 136 | // RelMetadataProvider chainedProvider = ChainedRelMetadataProvider.of(list); 137 | // rel.getCluster().setMetadataProvider( 138 | // new CachingRelMetadataProvider(chainedProvider, planner)); 139 | 140 | for (RelOptRule r : rules) 141 | planner.addRule(r); 142 | 143 | RelOptCluster cluster = newCluster(planner); 144 | 145 | cluster.getPlanner().setRoot(rel); 146 | RelNode result = planner.chooseDelegate().findBestExp(); 147 | return result; 148 | } 149 | 150 | static RelOptCluster newCluster(VolcanoPlanner planner) { 151 | final RelDataTypeFactory typeFactory = 152 | new SqlTypeFactoryImpl(org.apache.calcite.rel.type.RelDataTypeSystem.DEFAULT); 153 | return RelOptCluster.create(planner, new RexBuilder(typeFactory)); 154 | } 155 | 156 | public static RelRoot genRelRootWithVolcanoPlanner(Connection connection, String sql) throws Exception { 157 | //从 conn 中获取相关的环境和配置,生成对应配置 158 | CalciteServerStatement st = connection.createStatement().unwrap(CalciteServerStatement.class); 159 | CalcitePrepare.Context prepareContext = st.createPrepareContext(); 160 | final FrameworkConfig config = Frameworks.newConfigBuilder() 161 | .parserConfig(SqlParser.configBuilder().setLex(Lex.MYSQL).build()) 162 | .defaultSchema(prepareContext.getRootSchema().plus()) 163 | // .traitDefs(ConventionTraitDef.INSTANCE, RelDistributionTraitDef.INSTANCE) 164 | .build(); 165 | Planner planner = Frameworks.getPlanner(config); 166 | RelRoot root = null; 167 | 168 | //添加 VolcanoPlanner 优化器 169 | VolcanoPlanner volcanoPlanner = new VolcanoPlanner(); 170 | RelOptCluster cluster = newCluster(volcanoPlanner); 171 | RelBuilderFactory LOGICAL_BUILDER = 172 | RelBuilder.proto(Contexts.of(DEFAULT_STRUCT)); 173 | final RelBuilder relBuilder = LOGICAL_BUILDER.create(cluster, null); 174 | 175 | try { 176 | SqlNode parse1 = planner.parse(sql); 177 | SqlNode validate = planner.validate(parse1); 178 | root = planner.rel(validate); 179 | RelNode rel = root.rel; 180 | } catch (Exception e) { 181 | e.printStackTrace(); 182 | } 183 | root = root.withRel(RelDecorrelator.decorrelateQuery(root.rel, relBuilder)); 184 | return root; 185 | } 186 | } 187 | -------------------------------------------------------------------------------- /src/main/java/pers/shezm/calcite/utils/CalciteUtil.java: -------------------------------------------------------------------------------- 1 | package pers.shezm.calcite.utils; 2 | 3 | import com.google.common.collect.Lists; 4 | import com.google.common.collect.Maps; 5 | 6 | import java.net.URL; 7 | import java.net.URLDecoder; 8 | import java.sql.Connection; 9 | import java.sql.DriverManager; 10 | import java.sql.ResultSet; 11 | import java.sql.ResultSetMetaData; 12 | import java.util.List; 13 | import java.util.Map; 14 | import java.util.Properties; 15 | 16 | public class CalciteUtil { 17 | /** 18 | * 根据给定的 model.json 文件获取 Connection 19 | * 20 | * @param filePath 21 | * @return 22 | */ 23 | public static Connection getConnect(String filePath) { 24 | Connection connection = null; 25 | try { 26 | URL url = CalciteUtil.class.getResource(filePath); 27 | String str = URLDecoder.decode(url.toString(), "UTF-8"); 28 | Properties info = new Properties(); 29 | info.put("model", str.replace("file:", "")); 30 | connection = DriverManager.getConnection("jdbc:calcite:", info); 31 | // connection.unwrap(CalciteConnection.class); 32 | } catch (Exception e) { 33 | e.printStackTrace(); 34 | } 35 | return connection; 36 | } 37 | 38 | /** 39 | * 归集查询后的数据并注入到 List 40 | * @param resultSet 41 | * @return 42 | * @throws Exception 43 | */ 44 | public static List> getData(ResultSet resultSet) throws Exception { 45 | List> list = Lists.newArrayList(); 46 | ResultSetMetaData metaData = resultSet.getMetaData(); 47 | int columnSize = metaData.getColumnCount(); 48 | 49 | while (resultSet.next()) { 50 | 51 | Map map = Maps.newLinkedHashMap(); 52 | for (int i = 1; i < columnSize + 1; i++) { 53 | map.put(metaData.getColumnLabel(i), resultSet.getObject(i)); 54 | } 55 | list.add(map); 56 | } 57 | return list; 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /src/main/resources/TEST01.csv: -------------------------------------------------------------------------------- 1 | ID:VARCHAR,NAME1:VARCHAR,NAME2:VARCHAR 2 | 0,first,second 3 | 1,hello,world 4 | 3,hello,shezm 5 | 4,hello,other 6 | 5,hi,world 7 | 6,Harry,Potter 8 | -------------------------------------------------------------------------------- /src/main/resources/TEST02.csv: -------------------------------------------------------------------------------- 1 | ID1:VARCHAR,NAME3:VARCHAR,NAME4:VARCHAR 2 | 0,first,second 3 | 1,hello,world 4 | 5,hi,world 5 | 6,Harry,Potter 6 | 7,Harry,Pony 7 | -------------------------------------------------------------------------------- /src/main/resources/model.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "1.0", 3 | "defaultSchema": "TEST_CSV", 4 | "schemas": [ 5 | { 6 | "name": "TEST_CSV", 7 | "type": "custom", 8 | "factory": "pers.shezm.calcite.csv.CsvSchemaFactory", 9 | "operand": { 10 | "dataFile": "TEST01.csv,TEST02.csv" 11 | } 12 | } 13 | ] 14 | } -------------------------------------------------------------------------------- /src/test/java/org/example/AppTest.java: -------------------------------------------------------------------------------- 1 | package org.example; 2 | 3 | import static org.junit.Assert.assertTrue; 4 | 5 | import org.junit.Test; 6 | 7 | /** 8 | * Unit test for simple App. 9 | */ 10 | public class AppTest 11 | { 12 | /** 13 | * Rigorous Test :-) 14 | */ 15 | @Test 16 | public void shouldAnswerWithTrue() 17 | { 18 | assertTrue( true ); 19 | } 20 | } 21 | --------------------------------------------------------------------------------