├── .idea └── icon.png ├── examples ├── isthmus-api │ ├── .gitignore │ ├── build.gradle.kts │ └── src │ │ └── main │ │ └── java │ │ └── io │ │ └── substrait │ │ └── examples │ │ ├── SchemaHelper.java │ │ └── IsthmusAppExamples.java └── substrait-spark │ ├── .gitignore │ ├── docker-compose.yaml │ ├── src │ └── main │ │ ├── java │ │ └── io │ │ │ └── substrait │ │ │ └── examples │ │ │ ├── App.java │ │ │ ├── SparkHelper.java │ │ │ └── util │ │ │ ├── FunctionArgStringify.java │ │ │ └── ParentStringify.java │ │ └── resources │ │ └── tests_subset_2023.csv │ └── build.gradle.kts ├── spark ├── .gitignore └── src │ ├── test │ ├── resources │ │ ├── dataset-a.orc │ │ ├── dataset-a.parquet │ │ ├── dataset-a.txt │ │ ├── csv │ │ │ ├── dataset-a.csv │ │ │ └── dataset-b.csv │ │ └── dataset-a.csv │ ├── scala │ │ └── io │ │ │ └── substrait │ │ │ └── spark │ │ │ ├── DateTimeSuite.scala │ │ │ └── expression │ │ │ └── PredicateSuite.scala │ └── spark-3.2 │ │ └── org │ │ └── apache │ │ └── spark │ │ └── sql │ │ └── DatasetUtil.scala │ └── main │ ├── scala │ ├── io │ │ └── substrait │ │ │ └── spark │ │ │ ├── FileHolder.scala │ │ │ ├── HasOutputStack.scala │ │ │ ├── DefaultRelVisitor.scala │ │ │ └── expression │ │ │ └── Enum.scala │ └── org │ │ └── apache │ │ └── spark │ │ └── substrait │ │ └── SparkTypeUtil.scala │ └── resources │ └── spark.yml ├── gradle ├── gradle-daemon-jvm.properties └── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties ├── .gitmodules ├── ci └── release │ ├── img │ ├── repositories.png │ ├── actions_secrets.png │ ├── release_process.png │ └── automate_release_is_failing.png │ ├── verify.sh │ ├── publish.sh │ ├── prepare.sh │ ├── run.sh │ ├── dry_run.sh │ └── sanity.sh ├── core └── src │ ├── main │ └── java │ │ └── io │ │ └── substrait │ │ ├── util │ │ ├── VisitationContext.java │ │ ├── EmptyVisitationContext.java │ │ └── Util.java │ │ ├── function │ │ ├── NullableType.java │ │ └── ExtendedTypeCreator.java │ │ ├── package-info.java │ │ ├── relation │ │ ├── ToProto.java │ │ ├── ZeroInputRel.java │ │ ├── SingleInputRel.java │ │ ├── BiRel.java │ │ ├── HasExtension.java │ │ ├── EmptyScan.java │ │ ├── NamedDdl.java │ │ ├── AbstractRel.java │ │ ├── NamedScan.java │ │ ├── NamedWrite.java │ │ ├── ExtensionWrite.java │ │ ├── NamedUpdate.java │ │ ├── physical │ │ │ ├── BroadcastExchange.java │ │ │ ├── RoundRobinExchange.java │ │ │ ├── TargetType.java │ │ │ ├── SingleBucketExchange.java │ │ │ ├── ScatterExchange.java │ │ │ ├── MultiBucketExchange.java │ │ │ └── AbstractExchangeRel.java │ │ ├── ExtensionDdl.java │ │ ├── LocalFiles.java │ │ ├── AbstractReadRel.java │ │ ├── ExtensionLeaf.java │ │ ├── Filter.java │ │ ├── Fetch.java │ │ ├── ExtensionTable.java │ │ ├── Sort.java │ │ ├── ExtensionSingle.java │ │ ├── Cross.java │ │ ├── Project.java │ │ ├── ExtensionMulti.java │ │ ├── Extension.java │ │ └── Rel.java │ │ ├── type │ │ ├── parser │ │ │ └── ThrowVisitor.java │ │ └── TypeExpressionEvaluator.java │ │ ├── expression │ │ ├── FunctionOption.java │ │ ├── AbstractFunctionInvocation.java │ │ ├── AggregateFunctionInvocation.java │ │ └── EnumArg.java │ │ ├── extension │ │ └── ExtensionLookup.java │ │ └── extendedexpression │ │ └── ExtendedExpression.java │ └── test │ ├── java │ └── io │ │ └── substrait │ │ ├── utils │ │ ├── StringHolderHandlingExtensionProtoConverter.java │ │ ├── StringHolderHandlingRelProtoConverter.java │ │ └── StringHolderHandlingProtoExtensionConverter.java │ │ ├── relation │ │ └── SpecVersionTest.java │ │ ├── type │ │ └── proto │ │ │ └── LiteralRoundtripTest.java │ │ ├── TestBase.java │ │ └── extension │ │ └── ExtensionCollectorUriUrnTest.java │ └── resources │ └── extensions │ └── custom_extensions.yaml ├── isthmus-cli └── src │ ├── main │ └── java │ │ └── io │ │ └── substrait │ │ └── isthmus │ │ └── cli │ │ └── InitializeAtBuildTime.java │ └── test │ ├── script │ ├── tpch_smoke.sh │ └── smoke.sh │ └── java │ └── io │ └── substrait │ └── isthmus │ └── cli │ └── IsthmusEntryPointTest.java ├── .gitignore ├── .yamllint.yaml ├── isthmus ├── src │ ├── test │ │ ├── resources │ │ │ ├── subquery │ │ │ │ ├── nested_scalar_subquery_in_select.sql │ │ │ │ └── nested_scalar_subquery_in_filter.sql │ │ │ ├── tpch │ │ │ │ ├── queries │ │ │ │ │ ├── 06.sql │ │ │ │ │ ├── 17.sql │ │ │ │ │ ├── 14.sql │ │ │ │ │ ├── 13.sql │ │ │ │ │ ├── 04.sql │ │ │ │ │ ├── 03.sql │ │ │ │ │ ├── 01.sql │ │ │ │ │ ├── 18.sql │ │ │ │ │ ├── 05.sql │ │ │ │ │ ├── 16.sql │ │ │ │ │ ├── 11.sql │ │ │ │ │ ├── 15.sql │ │ │ │ │ ├── 10.sql │ │ │ │ │ ├── 12.sql │ │ │ │ │ ├── 09.sql │ │ │ │ │ ├── 20.sql │ │ │ │ │ ├── 21.sql │ │ │ │ │ ├── 22.sql │ │ │ │ │ ├── 02.sql │ │ │ │ │ ├── 08.sql │ │ │ │ │ ├── 07.sql │ │ │ │ │ └── 19.sql │ │ │ │ └── schema_error.sql │ │ │ ├── tpcds │ │ │ │ └── queries │ │ │ │ │ ├── 55.sql │ │ │ │ │ ├── 96.sql │ │ │ │ │ ├── 52.sql │ │ │ │ │ ├── 03.sql │ │ │ │ │ ├── 42.sql │ │ │ │ │ ├── 82.sql │ │ │ │ │ ├── 37.sql │ │ │ │ │ ├── 15.sql │ │ │ │ │ ├── 84.sql │ │ │ │ │ ├── 07.sql │ │ │ │ │ ├── 22.sql │ │ │ │ │ ├── 01.sql │ │ │ │ │ ├── 86.sql │ │ │ │ │ ├── 26.sql │ │ │ │ │ ├── 06.sql │ │ │ │ │ ├── 45.sql │ │ │ │ │ ├── 19.sql │ │ │ │ │ ├── 32.sql │ │ │ │ │ ├── 27.sql │ │ │ │ │ ├── 92.sql │ │ │ │ │ ├── 93.sql │ │ │ │ │ ├── 12.sql │ │ │ │ │ ├── 20.sql │ │ │ │ │ ├── 98.sql │ │ │ │ │ ├── 36.sql │ │ │ │ │ ├── 65.sql │ │ │ │ │ ├── 94.sql │ │ │ │ │ ├── 38.sql │ │ │ │ │ ├── 87.sql │ │ │ │ │ ├── 79.sql │ │ │ │ │ ├── 40.sql │ │ │ │ │ ├── 97.sql │ │ │ │ │ ├── 90.sql │ │ │ │ │ ├── 43.sql │ │ │ │ │ ├── 21.sql │ │ │ │ │ ├── 53.sql │ │ │ │ │ ├── 91.sql │ │ │ │ │ ├── 95.sql │ │ │ │ │ ├── 86a.sql │ │ │ │ │ ├── 25.sql │ │ │ │ │ ├── 76.sql │ │ │ │ │ ├── 16.sql │ │ │ │ │ ├── 24.sql │ │ │ │ │ ├── 70.sql │ │ │ │ │ ├── 62.sql │ │ │ │ │ ├── 39.sql │ │ │ │ │ ├── 99.sql │ │ │ │ │ ├── 30.sql │ │ │ │ │ ├── 67.sql │ │ │ │ │ ├── 72.sql │ │ │ │ │ ├── 18.sql │ │ │ │ │ ├── 63.sql │ │ │ │ │ ├── 73.sql │ │ │ │ │ ├── 27a.sql │ │ │ │ │ ├── 81.sql │ │ │ │ │ ├── 29.sql │ │ │ │ │ ├── 61.sql │ │ │ │ │ ├── 89.sql │ │ │ │ │ ├── 36a.sql │ │ │ │ │ ├── 46.sql │ │ │ │ │ ├── 48.sql │ │ │ │ │ ├── 69.sql │ │ │ │ │ └── 68.sql │ │ │ └── extensions │ │ │ │ ├── functions_duplicate_urn1.yaml │ │ │ │ ├── substrait.proto │ │ │ │ ├── functions_duplicate_urn2.yaml │ │ │ │ └── scalar_functions_custom.yaml │ │ └── java │ │ │ └── io │ │ │ └── substrait │ │ │ └── isthmus │ │ │ ├── ProjectTest.java │ │ │ ├── KeyConstraintsTest.java │ │ │ ├── FetchTest.java │ │ │ ├── DdlRoundtripTest.java │ │ │ ├── LogarithmicFunctionTest.java │ │ │ ├── RoundingFunctionTest.java │ │ │ ├── DmlRoundtripTest.java │ │ │ ├── EmptyArrayLiteralTest.java │ │ │ ├── expression │ │ │ └── AggregateFunctionConverterTest.java │ │ │ ├── TpchQueryTest.java │ │ │ └── CalciteObjs.java │ └── main │ │ └── java │ │ └── io │ │ └── substrait │ │ └── isthmus │ │ ├── expression │ │ ├── ExtractIndexing.java │ │ ├── SubstraitFunctionMapping.java │ │ └── ScalarFunctionMapper.java │ │ ├── CallConverter.java │ │ ├── SubstraitToSql.java │ │ ├── UserTypeMapper.java │ │ ├── calcite │ │ ├── SubstraitTable.java │ │ ├── SubstraitSchema.java │ │ └── rel │ │ │ ├── CreateView.java │ │ │ └── CreateTable.java │ │ ├── sql │ │ ├── SubstraitSqlValidator.java │ │ └── SubstraitSqlDialect.java │ │ └── FeatureBoard.java └── README.md ├── settings.gradle.kts ├── .pre-commit-config.yaml ├── .github ├── dependabot.yml └── workflows │ └── sanity.yml ├── .editorconfig ├── Makefile ├── gradle.properties └── bom └── build.gradle.kts /.idea/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/substrait-io/substrait-java/HEAD/.idea/icon.png -------------------------------------------------------------------------------- /examples/isthmus-api/.gitignore: -------------------------------------------------------------------------------- 1 | _apps 2 | _data 3 | **/*/bin 4 | build 5 | substrait.plan 6 | -------------------------------------------------------------------------------- /examples/substrait-spark/.gitignore: -------------------------------------------------------------------------------- 1 | spark-warehouse 2 | derby.log 3 | _apps 4 | _data 5 | bin 6 | -------------------------------------------------------------------------------- /spark/.gitignore: -------------------------------------------------------------------------------- 1 | metastore_db 2 | spark-warehouse 3 | /src/test/resources/write-a.csv 4 | derby.log 5 | -------------------------------------------------------------------------------- /gradle/gradle-daemon-jvm.properties: -------------------------------------------------------------------------------- 1 | #This file is generated by updateDaemonJvm 2 | toolchainVersion=17 3 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "spec"] 2 | path = substrait 3 | url = https://github.com/substrait-io/substrait.git 4 | -------------------------------------------------------------------------------- /ci/release/img/repositories.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/substrait-io/substrait-java/HEAD/ci/release/img/repositories.png -------------------------------------------------------------------------------- /ci/release/img/actions_secrets.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/substrait-io/substrait-java/HEAD/ci/release/img/actions_secrets.png -------------------------------------------------------------------------------- /ci/release/img/release_process.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/substrait-io/substrait-java/HEAD/ci/release/img/release_process.png -------------------------------------------------------------------------------- /ci/release/verify.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # shellcheck shell=bash 3 | 4 | set -euo pipefail 5 | 6 | ls -latr native/libs 7 | -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/substrait-io/substrait-java/HEAD/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /core/src/main/java/io/substrait/util/VisitationContext.java: -------------------------------------------------------------------------------- 1 | package io.substrait.util; 2 | 3 | public interface VisitationContext {} 4 | -------------------------------------------------------------------------------- /spark/src/test/resources/dataset-a.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/substrait-io/substrait-java/HEAD/spark/src/test/resources/dataset-a.orc -------------------------------------------------------------------------------- /spark/src/test/resources/dataset-a.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/substrait-io/substrait-java/HEAD/spark/src/test/resources/dataset-a.parquet -------------------------------------------------------------------------------- /ci/release/img/automate_release_is_failing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/substrait-io/substrait-java/HEAD/ci/release/img/automate_release_is_failing.png -------------------------------------------------------------------------------- /isthmus-cli/src/main/java/io/substrait/isthmus/cli/InitializeAtBuildTime.java: -------------------------------------------------------------------------------- 1 | package io.substrait.isthmus.cli; 2 | 3 | public class InitializeAtBuildTime {} 4 | -------------------------------------------------------------------------------- /core/src/main/java/io/substrait/function/NullableType.java: -------------------------------------------------------------------------------- 1 | package io.substrait.function; 2 | 3 | public interface NullableType { 4 | boolean nullable(); 5 | } 6 | -------------------------------------------------------------------------------- /spark/src/test/resources/dataset-a.txt: -------------------------------------------------------------------------------- 1 | 1|one 2 | 2|two 3 | 3|three 4 | 4|'fo|ur' 5 | 5|five 6 | 6|'six' 7 | 7|seven 8 | 8|eight 9 | 9|nine 10 | 10|ten 11 | -------------------------------------------------------------------------------- /core/src/main/java/io/substrait/package-info.java: -------------------------------------------------------------------------------- 1 | @org.immutables.value.Value.Style(allowedClasspathAnnotations = {java.lang.Override.class}) 2 | package io.substrait; 3 | -------------------------------------------------------------------------------- /spark/src/test/resources/csv/dataset-a.csv: -------------------------------------------------------------------------------- 1 | ID,VALUE 2 | 1,one 3 | 2,two 4 | 3,three 5 | 4,four 6 | 5,five 7 | 6,six 8 | 7,seven 9 | 8,eight 10 | 9,nine 11 | 10,ten 12 | -------------------------------------------------------------------------------- /spark/src/test/resources/dataset-a.csv: -------------------------------------------------------------------------------- 1 | ID,VALUE 2 | 1,one 3 | 2,"two" 4 | 3,"three" 5 | 4,"fo,ur" 6 | 5,five 7 | 6,six 8 | 7,seven 9 | 8,eight 10 | 9,nine 11 | 10,ten 12 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | **/target 2 | **/.gradle 3 | **/.idea 4 | **/build 5 | gen 6 | **/.DS_Store 7 | *.iml 8 | out/** 9 | *.iws 10 | .vscode 11 | .pmdCache 12 | 13 | */bin 14 | .metals 15 | .bloop 16 | -------------------------------------------------------------------------------- /spark/src/test/resources/csv/dataset-b.csv: -------------------------------------------------------------------------------- 1 | ID,VALUE 2 | 11,eleven 3 | 12,twelve 4 | 13,thirteen 5 | 14,fourteen 6 | 15,fifteen 7 | 16,sixteen 8 | 17,seventeen 9 | 18,eighteen 10 | 19,nineteen 11 | 20,twenty 12 | -------------------------------------------------------------------------------- /.yamllint.yaml: -------------------------------------------------------------------------------- 1 | rules: 2 | line-length: 3 | max: 120 4 | brackets: 5 | forbid: false 6 | min-spaces-inside: 0 7 | max-spaces-inside: 1 8 | min-spaces-inside-empty: 0 9 | max-spaces-inside-empty: 0 10 | -------------------------------------------------------------------------------- /core/src/main/java/io/substrait/relation/ToProto.java: -------------------------------------------------------------------------------- 1 | package io.substrait.relation; 2 | 3 | import com.google.protobuf.Message; 4 | 5 | public interface ToProto { 6 | T toProto(RelProtoConverter converter); 7 | } 8 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/subquery/nested_scalar_subquery_in_select.sql: -------------------------------------------------------------------------------- 1 | SELECT p_partkey, (SELECT sum(l_orderkey) 2 | FROM lineitem l 3 | WHERE l.l_partkey = p.p_partkey 4 | ) 5 | FROM part p -------------------------------------------------------------------------------- /core/src/main/java/io/substrait/util/EmptyVisitationContext.java: -------------------------------------------------------------------------------- 1 | package io.substrait.util; 2 | 3 | public class EmptyVisitationContext implements VisitationContext { 4 | public static final EmptyVisitationContext INSTANCE = new EmptyVisitationContext(); 5 | } 6 | -------------------------------------------------------------------------------- /settings.gradle.kts: -------------------------------------------------------------------------------- 1 | rootProject.name = "substrait" 2 | 3 | includeBuild("build-logic") 4 | 5 | include( 6 | "bom", 7 | "core", 8 | "isthmus", 9 | "isthmus-cli", 10 | "spark", 11 | "examples:substrait-spark", 12 | "examples:isthmus-api", 13 | ) 14 | -------------------------------------------------------------------------------- /ci/release/publish.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # shellcheck shell=bash 3 | 4 | set -euo pipefail 5 | 6 | # ensure the submodule tags exist 7 | git submodule foreach 'git fetch --unshallow || true' 8 | 9 | ./gradlew clean 10 | ./gradlew publishAggregationToCentralPortal 11 | -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionBase=GRADLE_USER_HOME 2 | distributionPath=wrapper/dists 3 | distributionUrl=https\://services.gradle.org/distributions/gradle-9.1.0-bin.zip 4 | networkTimeout=10000 5 | validateDistributionUrl=true 6 | zipStoreBase=GRADLE_USER_HOME 7 | zipStorePath=wrapper/dists 8 | -------------------------------------------------------------------------------- /ci/release/prepare.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # shellcheck shell=bash 3 | 4 | set -euo pipefail 5 | 6 | # update gradle.properties version number 7 | LAST=`sed -rn 's/^version = (.*)$/\1/p' gradle.properties` 8 | echo "Upgrade version from $LAST to $1" 9 | sed -ir "s/^version = .*/version = $1/" gradle.properties 10 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpch/queries/06.sql: -------------------------------------------------------------------------------- 1 | select 2 | sum(l_extendedprice * l_discount) as revenue 3 | from 4 | "lineitem" 5 | where 6 | l_shipdate >= date '1997-01-01' 7 | and l_shipdate < date '1997-01-01' + interval '1' year 8 | and 9 | l_discount between 0.03 - 0.01 and 0.03 + 0.01 10 | and l_quantity < 24 11 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/adrienverge/yamllint.git 3 | rev: v1.33.0 4 | hooks: 5 | - id: yamllint 6 | args: [-c=.yamllint.yaml] 7 | - repo: https://github.com/alessandrojcm/commitlint-pre-commit-hook 8 | rev: v9.9.0 9 | hooks: 10 | - id: commitlint 11 | stages: [commit-msg] 12 | -------------------------------------------------------------------------------- /core/src/main/java/io/substrait/relation/ZeroInputRel.java: -------------------------------------------------------------------------------- 1 | package io.substrait.relation; 2 | 3 | import java.util.Collections; 4 | import java.util.List; 5 | 6 | public abstract class ZeroInputRel extends AbstractRel { 7 | 8 | @Override 9 | public final List getInputs() { 10 | return Collections.emptyList(); 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /isthmus/src/main/java/io/substrait/isthmus/expression/ExtractIndexing.java: -------------------------------------------------------------------------------- 1 | package io.substrait.isthmus.expression; 2 | 3 | /** 4 | * Enum to define the INDEXING property on the date functions. 5 | * 6 | *

Controls if the number used for example in months is 0 or 1 based. 7 | */ 8 | public enum ExtractIndexing { 9 | ONE, 10 | ZERO 11 | } 12 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "gradle" 4 | directory: "/" 5 | schedule: 6 | interval: "weekly" 7 | commit-message: 8 | prefix: "build(deps): " 9 | - package-ecosystem: "github-actions" 10 | directory: "/" 11 | schedule: 12 | interval: "weekly" 13 | commit-message: 14 | prefix: "build(deps): " 15 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/55.sql: -------------------------------------------------------------------------------- 1 | select i_brand_id brand_id, i_brand brand, 2 | sum(ss_ext_sales_price) ext_price 3 | from date_dim, store_sales, item 4 | where d_date_sk = ss_sold_date_sk 5 | and ss_item_sk = i_item_sk 6 | and i_manager_id=30 7 | and d_moy=12 8 | and d_year=1998 9 | group by i_brand, i_brand_id 10 | order by ext_price desc, i_brand_id 11 | LIMIT 100 12 | -------------------------------------------------------------------------------- /core/src/main/java/io/substrait/relation/SingleInputRel.java: -------------------------------------------------------------------------------- 1 | package io.substrait.relation; 2 | 3 | import java.util.Collections; 4 | import java.util.List; 5 | 6 | public abstract class SingleInputRel extends AbstractRel { 7 | 8 | public abstract Rel getInput(); 9 | 10 | @Override 11 | public final List getInputs() { 12 | return Collections.singletonList(getInput()); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /core/src/main/java/io/substrait/type/parser/ThrowVisitor.java: -------------------------------------------------------------------------------- 1 | package io.substrait.type.parser; 2 | 3 | import io.substrait.type.SubstraitTypeBaseVisitor; 4 | import org.antlr.v4.runtime.tree.RuleNode; 5 | 6 | class ThrowVisitor extends SubstraitTypeBaseVisitor { 7 | 8 | @Override 9 | public T visitChildren(final RuleNode node) { 10 | throw new UnsupportedOperationException(); 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /core/src/main/java/io/substrait/relation/BiRel.java: -------------------------------------------------------------------------------- 1 | package io.substrait.relation; 2 | 3 | import java.util.Arrays; 4 | import java.util.List; 5 | 6 | public abstract class BiRel extends AbstractRel { 7 | 8 | public abstract Rel getLeft(); 9 | 10 | public abstract Rel getRight(); 11 | 12 | @Override 13 | public final List getInputs() { 14 | return Arrays.asList(getLeft(), getRight()); 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpch/queries/17.sql: -------------------------------------------------------------------------------- 1 | select 2 | sum(l.l_extendedprice) / 7.0 as avg_yearly 3 | from 4 | "lineitem" l, 5 | "part" p 6 | where 7 | p.p_partkey = l.l_partkey 8 | and p.p_brand = 'Brand#13' 9 | and p.p_container = 'JUMBO CAN' 10 | and l.l_quantity < ( 11 | select 12 | 0.2 * avg(l2.l_quantity) 13 | from 14 | "lineitem" l2 15 | where 16 | l2.l_partkey = p.p_partkey 17 | ) 18 | -------------------------------------------------------------------------------- /core/src/main/java/io/substrait/function/ExtendedTypeCreator.java: -------------------------------------------------------------------------------- 1 | package io.substrait.function; 2 | 3 | public interface ExtendedTypeCreator { 4 | T fixedCharE(I len); 5 | 6 | T varCharE(I len); 7 | 8 | T fixedBinaryE(I len); 9 | 10 | T decimalE(I precision, I scale); 11 | 12 | T structE(T... types); 13 | 14 | T structE(Iterable types); 15 | 16 | T listE(T type); 17 | 18 | T mapE(T key, T value); 19 | } 20 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/extensions/functions_duplicate_urn1.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | urn: extension:io.substrait:functions_string 4 | 5 | scalar_functions: 6 | - name: "concat" 7 | description: "concatenate strings" 8 | impls: 9 | - args: 10 | - name: str1 11 | value: string 12 | - name: str2 13 | value: string 14 | variadic: 15 | min: 0 16 | return: string 17 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/extensions/substrait.proto: -------------------------------------------------------------------------------- 1 | syntax="proto3"; 2 | 3 | package isthmus; 4 | 5 | import "substrait/algebra.proto"; 6 | import "substrait/type.proto"; 7 | 8 | option java_package = "io.substrait.isthmus.extensions.test.protobuf"; 9 | option java_multiple_files = true; 10 | 11 | // Append the literal to the output rows of the relation 12 | message ColumnAppendDetail { 13 | substrait.Expression.Literal literal = 1; 14 | } 15 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/subquery/nested_scalar_subquery_in_filter.sql: -------------------------------------------------------------------------------- 1 | SELECT p_partkey, p_size 2 | FROM part p 3 | WHERE p_size < 4 | (SELECT sum(l_orderkey) 5 | FROM lineitem l 6 | WHERE l.l_partkey = p.p_partkey 7 | AND l_linenumber > 8 | (SELECT count(*) cnt 9 | FROM partsupp ps 10 | WHERE ps.ps_partkey = p.p_partkey 11 | AND PS.ps_suppkey = l.l_suppkey)) 12 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpch/queries/14.sql: -------------------------------------------------------------------------------- 1 | select 2 | 100.00 * sum(case 3 | when p.p_type like 'PROMO%' 4 | then l.l_extendedprice * (1 - l.l_discount) 5 | else 0 6 | end) / sum(l.l_extendedprice * (1 - l.l_discount)) as promo_revenue 7 | from 8 | "lineitem" l, 9 | "part" p 10 | where 11 | l.l_partkey = p.p_partkey 12 | and l.l_shipdate >= date '1994-08-01' 13 | and l.l_shipdate < date '1994-08-01' + interval '1' month 14 | -------------------------------------------------------------------------------- /core/src/main/java/io/substrait/relation/HasExtension.java: -------------------------------------------------------------------------------- 1 | package io.substrait.relation; 2 | 3 | import io.substrait.extension.AdvancedExtension; 4 | import java.util.Optional; 5 | 6 | /** Used to indicate the potential presence of an {@link AdvancedExtension} */ 7 | public interface HasExtension { 8 | /** 9 | * @return the {@link AdvancedExtension} associated directly with the class 10 | */ 11 | Optional getExtension(); 12 | } 13 | -------------------------------------------------------------------------------- /core/src/main/java/io/substrait/expression/FunctionOption.java: -------------------------------------------------------------------------------- 1 | package io.substrait.expression; 2 | 3 | import java.util.List; 4 | import org.immutables.value.Value; 5 | 6 | @Value.Immutable 7 | public abstract class FunctionOption { 8 | 9 | public abstract String getName(); 10 | 11 | public abstract List values(); 12 | 13 | public static ImmutableFunctionOption.Builder builder() { 14 | return ImmutableFunctionOption.builder(); 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/96.sql: -------------------------------------------------------------------------------- 1 | select count(*) 2 | from store_sales 3 | ,household_demographics 4 | ,time_dim, store 5 | where ss_sold_time_sk = time_dim.t_time_sk 6 | and ss_hdemo_sk = household_demographics.hd_demo_sk 7 | and ss_store_sk = s_store_sk 8 | and time_dim.t_hour = 20 9 | and time_dim.t_minute >= 30 10 | and household_demographics.hd_dep_count = 7 11 | and store.s_store_name = 'ese' 12 | order by count(*) 13 | limit 100 -------------------------------------------------------------------------------- /core/src/test/java/io/substrait/utils/StringHolderHandlingExtensionProtoConverter.java: -------------------------------------------------------------------------------- 1 | package io.substrait.utils; 2 | 3 | import com.google.protobuf.Any; 4 | import io.substrait.extension.ExtensionProtoConverter; 5 | 6 | public class StringHolderHandlingExtensionProtoConverter 7 | extends ExtensionProtoConverter { 8 | @Override 9 | protected Any toProto(final StringHolder holder) { 10 | return holder.toProto(null); 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /isthmus/src/main/java/io/substrait/isthmus/CallConverter.java: -------------------------------------------------------------------------------- 1 | package io.substrait.isthmus; 2 | 3 | import io.substrait.expression.Expression; 4 | import java.util.Optional; 5 | import java.util.function.Function; 6 | import org.apache.calcite.rex.RexCall; 7 | import org.apache.calcite.rex.RexNode; 8 | 9 | @FunctionalInterface 10 | public interface CallConverter { 11 | Optional convert(RexCall call, Function topLevelConverter); 12 | } 13 | -------------------------------------------------------------------------------- /ci/release/run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # shellcheck shell=bash 3 | 4 | set -euo pipefail 5 | 6 | npx --yes \ 7 | -p semantic-release \ 8 | -p "@semantic-release/commit-analyzer" \ 9 | -p "@semantic-release/release-notes-generator" \ 10 | -p "@semantic-release/changelog" \ 11 | -p "@semantic-release/github" \ 12 | -p "@semantic-release/exec" \ 13 | -p "@semantic-release/git" \ 14 | -p "conventional-changelog-conventionalcommits" \ 15 | semantic-release --ci 16 | -------------------------------------------------------------------------------- /core/src/test/java/io/substrait/utils/StringHolderHandlingRelProtoConverter.java: -------------------------------------------------------------------------------- 1 | package io.substrait.utils; 2 | 3 | import io.substrait.extension.ExtensionCollector; 4 | import io.substrait.relation.RelProtoConverter; 5 | 6 | public class StringHolderHandlingRelProtoConverter extends RelProtoConverter { 7 | 8 | public StringHolderHandlingRelProtoConverter(final ExtensionCollector extensionCollector) { 9 | super(extensionCollector, new StringHolderHandlingExtensionProtoConverter()); 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpch/queries/13.sql: -------------------------------------------------------------------------------- 1 | select 2 | c_count, 3 | count(*) as custdist 4 | from 5 | ( 6 | select 7 | c.c_custkey, 8 | count(o.o_orderkey) 9 | from 10 | "customer" c 11 | left outer join "orders" o 12 | on c.c_custkey = o.o_custkey 13 | and o.o_comment not like '%special%requests%' 14 | group by 15 | c.c_custkey 16 | ) as orders (c_custkey, c_count) 17 | group by 18 | c_count 19 | order by 20 | custdist desc, 21 | c_count desc 22 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpch/queries/04.sql: -------------------------------------------------------------------------------- 1 | select 2 | o.o_orderpriority, 3 | count(*) as order_count 4 | from 5 | "orders" o 6 | 7 | where 8 | o.o_orderdate >= date '1996-10-01' 9 | and o.o_orderdate < date '1996-10-01' + interval '3' month 10 | and 11 | exists ( 12 | select 13 | * 14 | from 15 | "lineitem" l 16 | where 17 | l.l_orderkey = o.o_orderkey 18 | and l.l_commitdate < l.l_receiptdate 19 | ) 20 | group by 21 | o.o_orderpriority 22 | order by 23 | o.o_orderpriority 24 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/52.sql: -------------------------------------------------------------------------------- 1 | select dt.d_year 2 | ,item.i_brand_id brand_id 3 | ,item.i_brand brand 4 | ,sum(ss_ext_sales_price) ext_price 5 | from date_dim dt 6 | ,store_sales 7 | ,item 8 | where dt.d_date_sk = store_sales.ss_sold_date_sk 9 | and store_sales.ss_item_sk = item.i_item_sk 10 | and item.i_manager_id = 1 11 | and dt.d_moy=12 12 | and dt.d_year=2001 13 | group by dt.d_year 14 | ,item.i_brand 15 | ,item.i_brand_id 16 | order by dt.d_year 17 | ,ext_price desc 18 | ,brand_id 19 | LIMIT 100 20 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/03.sql: -------------------------------------------------------------------------------- 1 | select dt.d_year 2 | ,item.i_brand_id brand_id 3 | ,item.i_brand brand 4 | ,sum(ss_sales_price) sum_agg 5 | from date_dim dt 6 | ,store_sales 7 | ,item 8 | where dt.d_date_sk = store_sales.ss_sold_date_sk 9 | and store_sales.ss_item_sk = item.i_item_sk 10 | and item.i_manufact_id = 30 11 | and dt.d_moy=12 12 | group by dt.d_year 13 | ,item.i_brand 14 | ,item.i_brand_id 15 | order by dt.d_year 16 | ,sum_agg desc 17 | ,brand_id 18 | LIMIT 100 19 | -------------------------------------------------------------------------------- /isthmus/README.md: -------------------------------------------------------------------------------- 1 | # Isthmus 2 | 3 | ## Overview 4 | 5 | Substrait Isthmus is a Java library which enables serializing SQL queries to [Substrait Protobuf](https://substrait.io/serialization/binary_serialization/) and SQL expressions to [Extended Expressions](https://substrait.io/expressions/extended_expression/) using 6 | the Calcite SQL compiler. Optionally, you can leverage the Calcite RelNode to Substrait Plan translator as an IR translation. 7 | 8 | The capability provided by this library can be accessed using a command-line interface, provided by [isthmus-cli](../isthmus-cli). 9 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | charset = utf-8 5 | end_of_line = lf 6 | insert_final_newline = true 7 | indent_style = space 8 | trim_trailing_whitespace = true 9 | 10 | [*.{yaml,yml}] 11 | indent_size = 2 12 | 13 | [{**/*.sql,**/OuterReferenceResolver.md,**gradlew.bat,**/*.parquet,**/*.orc}] 14 | charset = unset 15 | end_of_line = unset 16 | insert_final_newline = unset 17 | indent_style = unset 18 | trim_trailing_whitespace = unset 19 | 20 | [*.gradle.kts] 21 | indent_size = 2 22 | 23 | [.gitmodules] 24 | indent_style = tab 25 | 26 | [Makefile] 27 | indent_style = tab 28 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/42.sql: -------------------------------------------------------------------------------- 1 | select dt.d_year 2 | ,item.i_category_id 3 | ,item.i_category 4 | ,sum(ss_ext_sales_price) 5 | from date_dim dt 6 | ,store_sales 7 | ,item 8 | where dt.d_date_sk = store_sales.ss_sold_date_sk 9 | and store_sales.ss_item_sk = item.i_item_sk 10 | and item.i_manager_id = 1 11 | and dt.d_moy=12 12 | and dt.d_year=2001 13 | group by dt.d_year 14 | ,item.i_category_id 15 | ,item.i_category 16 | order by sum(ss_ext_sales_price) desc,dt.d_year 17 | ,item.i_category_id 18 | ,item.i_category 19 | LIMIT 100 20 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/82.sql: -------------------------------------------------------------------------------- 1 | select i_item_id 2 | ,i_item_desc 3 | ,i_current_price 4 | from item, inventory, date_dim, store_sales 5 | where i_current_price between 44 and 44+30 6 | and inv_item_sk = i_item_sk 7 | and d_date_sk=inv_date_sk 8 | and d_date between cast('2000-05-25' as date) and (cast('2000-05-25' as date) + interval '60' day) 9 | and i_manufact_id in (129,270,821,423) 10 | and inv_quantity_on_hand between 100 and 500 11 | and ss_item_sk = i_item_sk 12 | group by i_item_id,i_item_desc,i_current_price 13 | order by i_item_id 14 | LIMIT 100 15 | -------------------------------------------------------------------------------- /core/src/main/java/io/substrait/relation/EmptyScan.java: -------------------------------------------------------------------------------- 1 | package io.substrait.relation; 2 | 3 | import io.substrait.util.VisitationContext; 4 | import org.immutables.value.Value; 5 | 6 | @Value.Immutable 7 | public abstract class EmptyScan extends AbstractReadRel { 8 | 9 | @Override 10 | public O accept( 11 | RelVisitor visitor, C context) throws E { 12 | return visitor.visit(this, context); 13 | } 14 | 15 | public static ImmutableEmptyScan.Builder builder() { 16 | return ImmutableEmptyScan.builder(); 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/37.sql: -------------------------------------------------------------------------------- 1 | select i_item_id 2 | ,i_item_desc 3 | ,i_current_price 4 | from item, inventory, date_dim, catalog_sales 5 | where i_current_price between 29 and 29 + 30 6 | and inv_item_sk = i_item_sk 7 | and d_date_sk=inv_date_sk 8 | and d_date between cast('2000-02-01'as date) and (cast('2000-02-01' as date) + interval '60' day) 9 | and i_manufact_id in (677,940,694,808) 10 | and inv_quantity_on_hand between 100 and 500 11 | and cs_item_sk = i_item_sk 12 | group by i_item_id,i_item_desc,i_current_price 13 | order by i_item_id 14 | LIMIT 100 15 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpch/queries/03.sql: -------------------------------------------------------------------------------- 1 | select 2 | l.l_orderkey, 3 | sum(l.l_extendedprice * (1 - l.l_discount)) as revenue, 4 | o.o_orderdate, 5 | o.o_shippriority 6 | 7 | from 8 | "customer" c, 9 | "orders" o, 10 | "lineitem" l 11 | 12 | where 13 | c.c_mktsegment = 'HOUSEHOLD' 14 | and c.c_custkey = o.o_custkey 15 | and l.l_orderkey = o.o_orderkey 16 | and o.o_orderdate < date '1995-03-25' 17 | and l.l_shipdate > date '1995-03-25' 18 | 19 | group by 20 | l.l_orderkey, 21 | o.o_orderdate, 22 | o.o_shippriority 23 | order by 24 | revenue desc, 25 | o.o_orderdate 26 | limit 10 27 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | 5 | OSV_SCANNER_IMAGE := ghcr.io/google/osv-scanner:v2.0.2 6 | 7 | .PHONY: scan 8 | scan: 9 | ifdef component 10 | ./gradlew --quiet ':$(component):dependencies' --write-locks --configuration runtimeClasspath 11 | docker run --rm --volume './$(component)/gradle.lockfile:/gradle.lockfile' $(OSV_SCANNER_IMAGE) scan --lockfile /gradle.lockfile 12 | else 13 | $(MAKE) component=core scan 14 | $(MAKE) component=isthmus scan 15 | $(MAKE) component=isthmus-cli scan 16 | endif 17 | 18 | .PHONY: clean 19 | clean: 20 | find . -depth 2 -type f -name gradle.lockfile -delete -print 21 | -------------------------------------------------------------------------------- /core/src/main/java/io/substrait/type/TypeExpressionEvaluator.java: -------------------------------------------------------------------------------- 1 | package io.substrait.type; 2 | 3 | import io.substrait.extension.SimpleExtension; 4 | import io.substrait.function.TypeExpression; 5 | import java.util.List; 6 | 7 | public class TypeExpressionEvaluator { 8 | 9 | public static Type evaluateExpression( 10 | TypeExpression returnExpression, 11 | List parameterizedTypeList, 12 | List actualTypes) { 13 | 14 | if (returnExpression instanceof Type) { 15 | return (Type) returnExpression; 16 | } 17 | throw new UnsupportedOperationException("NYI"); 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/15.sql: -------------------------------------------------------------------------------- 1 | select ca_zip 2 | ,sum(cs_sales_price) 3 | from catalog_sales 4 | ,customer 5 | ,customer_address 6 | ,date_dim 7 | where cs_bill_customer_sk = c_customer_sk 8 | and c_current_addr_sk = ca_address_sk 9 | and ( substring(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', 10 | '85392', '85460', '80348', '81792') 11 | or ca_state in ('CA','WA','GA') 12 | or cs_sales_price > 500) 13 | and cs_sold_date_sk = d_date_sk 14 | and d_qoy = 2 and d_year = 1998 15 | group by ca_zip 16 | order by ca_zip 17 | LIMIT 100 18 | -------------------------------------------------------------------------------- /isthmus/src/test/java/io/substrait/isthmus/ProjectTest.java: -------------------------------------------------------------------------------- 1 | package io.substrait.isthmus; 2 | 3 | import io.substrait.dsl.SubstraitBuilder; 4 | import io.substrait.relation.Project; 5 | import io.substrait.relation.Rel; 6 | import org.junit.jupiter.api.Test; 7 | 8 | class ProjectTest extends PlanTestBase { 9 | final SubstraitBuilder b = new SubstraitBuilder(extensions); 10 | final Rel emptyTable = b.emptyScan(); 11 | 12 | @Test 13 | void avoidProjectRemapOnEmptyInput() { 14 | Rel projection = 15 | Project.builder().input(emptyTable).addExpressions(b.add(b.i32(1), b.i32(2))).build(); 16 | assertFullRoundTrip(projection); 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpch/queries/01.sql: -------------------------------------------------------------------------------- 1 | select 2 | l_returnflag, 3 | l_linestatus, 4 | sum(l_quantity) as sum_qty, 5 | sum(l_extendedprice) as sum_base_price, 6 | sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, 7 | sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge, 8 | avg(l_quantity) as avg_qty, 9 | avg(l_extendedprice) as avg_price, 10 | avg(l_discount) as avg_disc, 11 | count(*) as count_order 12 | from 13 | lineitem 14 | where 15 | l_shipdate <= date '1998-12-01' - interval '120' day (3) 16 | group by 17 | l_returnflag, 18 | l_linestatus 19 | 20 | order by 21 | l_returnflag, 22 | l_linestatus 23 | -------------------------------------------------------------------------------- /core/src/main/java/io/substrait/relation/NamedDdl.java: -------------------------------------------------------------------------------- 1 | package io.substrait.relation; 2 | 3 | import io.substrait.util.VisitationContext; 4 | import java.util.List; 5 | import org.immutables.value.Value; 6 | 7 | @Value.Immutable 8 | public abstract class NamedDdl extends AbstractDdlRel { 9 | public abstract List getNames(); 10 | 11 | @Override 12 | public O accept( 13 | RelVisitor visitor, C context) throws E { 14 | return visitor.visit(this, context); 15 | } 16 | 17 | public static ImmutableNamedDdl.Builder builder() { 18 | return ImmutableNamedDdl.builder(); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /core/src/main/java/io/substrait/relation/AbstractRel.java: -------------------------------------------------------------------------------- 1 | package io.substrait.relation; 2 | 3 | import io.substrait.type.Type; 4 | import io.substrait.util.Util; 5 | import java.util.function.Supplier; 6 | 7 | public abstract class AbstractRel implements Rel { 8 | 9 | private Supplier recordType = 10 | Util.memoize( 11 | () -> { 12 | Type.Struct s = deriveRecordType(); 13 | return getRemap().map(r -> r.remap(s)).orElse(s); 14 | }); 15 | 16 | protected abstract Type.Struct deriveRecordType(); 17 | 18 | @Override 19 | public final Type.Struct getRecordType() { 20 | return recordType.get(); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /core/src/main/java/io/substrait/relation/NamedScan.java: -------------------------------------------------------------------------------- 1 | package io.substrait.relation; 2 | 3 | import io.substrait.util.VisitationContext; 4 | import java.util.List; 5 | import org.immutables.value.Value; 6 | 7 | @Value.Immutable 8 | public abstract class NamedScan extends AbstractReadRel { 9 | 10 | public abstract List getNames(); 11 | 12 | @Override 13 | public O accept( 14 | RelVisitor visitor, C context) throws E { 15 | return visitor.visit(this, context); 16 | } 17 | 18 | public static ImmutableNamedScan.Builder builder() { 19 | return ImmutableNamedScan.builder(); 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /core/src/main/java/io/substrait/relation/NamedWrite.java: -------------------------------------------------------------------------------- 1 | package io.substrait.relation; 2 | 3 | import io.substrait.util.VisitationContext; 4 | import java.util.List; 5 | import org.immutables.value.Value; 6 | 7 | @Value.Immutable 8 | public abstract class NamedWrite extends AbstractWriteRel { 9 | public abstract List getNames(); 10 | 11 | @Override 12 | public O accept( 13 | RelVisitor visitor, C context) throws E { 14 | return visitor.visit(this, context); 15 | } 16 | 17 | public static ImmutableNamedWrite.Builder builder() { 18 | return ImmutableNamedWrite.builder(); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /core/src/main/java/io/substrait/relation/ExtensionWrite.java: -------------------------------------------------------------------------------- 1 | package io.substrait.relation; 2 | 3 | import io.substrait.util.VisitationContext; 4 | import org.immutables.value.Value; 5 | 6 | @Value.Immutable 7 | public abstract class ExtensionWrite extends AbstractWriteRel { 8 | public abstract Extension.WriteExtensionObject getDetail(); 9 | 10 | @Override 11 | public O accept( 12 | RelVisitor visitor, C context) throws E { 13 | return visitor.visit(this, context); 14 | } 15 | 16 | public static ImmutableExtensionWrite.Builder builder() { 17 | return ImmutableExtensionWrite.builder(); 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /core/src/main/java/io/substrait/relation/NamedUpdate.java: -------------------------------------------------------------------------------- 1 | package io.substrait.relation; 2 | 3 | import io.substrait.util.VisitationContext; 4 | import java.util.List; 5 | import org.immutables.value.Value; 6 | 7 | @Value.Immutable 8 | public abstract class NamedUpdate extends AbstractUpdate { 9 | 10 | public abstract List getNames(); 11 | 12 | @Override 13 | public O accept( 14 | RelVisitor visitor, C context) throws E { 15 | return visitor.visit(this, context); 16 | } 17 | 18 | public static ImmutableNamedUpdate.Builder builder() { 19 | return ImmutableNamedUpdate.builder(); 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /core/src/main/java/io/substrait/relation/physical/BroadcastExchange.java: -------------------------------------------------------------------------------- 1 | package io.substrait.relation.physical; 2 | 3 | import io.substrait.relation.RelVisitor; 4 | import io.substrait.util.VisitationContext; 5 | import org.immutables.value.Value; 6 | 7 | @Value.Immutable 8 | public abstract class BroadcastExchange extends AbstractExchangeRel { 9 | @Override 10 | public O accept( 11 | RelVisitor visitor, C context) throws E { 12 | return visitor.visit(this, context); 13 | } 14 | 15 | public static ImmutableBroadcastExchange.Builder builder() { 16 | return ImmutableBroadcastExchange.builder(); 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /core/src/main/java/io/substrait/relation/ExtensionDdl.java: -------------------------------------------------------------------------------- 1 | package io.substrait.relation; 2 | 3 | import io.substrait.util.VisitationContext; 4 | import org.immutables.value.Value; 5 | 6 | @Value.Immutable 7 | public abstract class ExtensionDdl extends AbstractDdlRel implements HasExtension { 8 | public abstract Extension.DdlExtensionObject getDetail(); 9 | 10 | @Override 11 | public O accept( 12 | RelVisitor visitor, C context) throws E { 13 | return visitor.visit(this, context); 14 | } 15 | 16 | public static ImmutableExtensionDdl.Builder builder() { 17 | return ImmutableExtensionDdl.builder(); 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/84.sql: -------------------------------------------------------------------------------- 1 | select c_customer_id as customer_id 2 | ,c_last_name || ', ' || c_first_name as customername 3 | from customer 4 | ,customer_address 5 | ,customer_demographics 6 | ,household_demographics 7 | ,income_band 8 | ,store_returns 9 | where ca_city = 'dist(cities, 1, large)' 10 | and c_current_addr_sk = ca_address_sk 11 | and ib_lower_bound >= 38936 12 | and ib_upper_bound <= 38936 + 50000 13 | and ib_income_band_sk = hd_income_band_sk 14 | and cd_demo_sk = c_current_cdemo_sk 15 | and hd_demo_sk = c_current_hdemo_sk 16 | and sr_cdemo_sk = cd_demo_sk 17 | order by c_customer_id 18 | LIMIT 100 19 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpch/queries/18.sql: -------------------------------------------------------------------------------- 1 | select 2 | c.c_name, 3 | c.c_custkey, 4 | o.o_orderkey, 5 | o.o_orderdate, 6 | o.o_totalprice, 7 | sum(l.l_quantity) 8 | from 9 | "customer" c, 10 | "orders" o, 11 | "lineitem" l 12 | where 13 | o.o_orderkey in ( 14 | select 15 | l_orderkey 16 | from 17 | "lineitem" 18 | group by 19 | l_orderkey having 20 | sum(l_quantity) > 300 21 | ) 22 | and c.c_custkey = o.o_custkey 23 | and o.o_orderkey = l.l_orderkey 24 | group by 25 | c.c_name, 26 | c.c_custkey, 27 | o.o_orderkey, 28 | o.o_orderdate, 29 | o.o_totalprice 30 | order by 31 | o.o_totalprice desc, 32 | o.o_orderdate 33 | limit 100 34 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/extensions/functions_duplicate_urn2.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | urn: extension:com.domain:string 4 | 5 | scalar_functions: 6 | - name: "ltrim" 7 | description: "left trim from custom domain" 8 | impls: 9 | - args: 10 | - name: str 11 | value: string 12 | - name: chars 13 | value: string 14 | return: string 15 | - name: "concat" 16 | description: "concatenate strings from custom domain" 17 | impls: 18 | - args: 19 | - name: str1 20 | value: string 21 | - name: str2 22 | value: string 23 | variadic: 24 | min: 0 25 | return: string 26 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/07.sql: -------------------------------------------------------------------------------- 1 | SELECT I_ITEM_ID, 2 | AVG(SS_QUANTITY) AGG1, 3 | AVG(SS_LIST_PRICE) AGG2, 4 | AVG(SS_COUPON_AMT) AGG3, 5 | AVG(SS_SALES_PRICE) AGG4 6 | FROM STORE_SALES, CUSTOMER_DEMOGRAPHICS, DATE_DIM, ITEM, PROMOTION 7 | WHERE SS_SOLD_DATE_SK = D_DATE_SK AND 8 | SS_ITEM_SK = I_ITEM_SK AND 9 | SS_CDEMO_SK = CD_DEMO_SK AND 10 | SS_PROMO_SK = P_PROMO_SK AND 11 | CD_GENDER = 'F' AND 12 | CD_MARITAL_STATUS = 'W' AND 13 | CD_EDUCATION_STATUS = 'Primary' AND 14 | (P_CHANNEL_EMAIL = 'N' OR P_CHANNEL_EVENT = 'N') AND 15 | D_YEAR = 1998 16 | GROUP BY I_ITEM_ID 17 | ORDER BY I_ITEM_ID 18 | LIMIT 100 19 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpch/queries/05.sql: -------------------------------------------------------------------------------- 1 | select 2 | n.n_name, 3 | sum(l.l_extendedprice * (1 - l.l_discount)) as revenue 4 | 5 | from 6 | "customer" c, 7 | "orders" o, 8 | "lineitem" l, 9 | "supplier" s, 10 | "nation" n, 11 | "region" r 12 | 13 | where 14 | c.c_custkey = o.o_custkey 15 | and l.l_orderkey = o.o_orderkey 16 | and l.l_suppkey = s.s_suppkey 17 | and c.c_nationkey = s.s_nationkey 18 | and s.s_nationkey = n.n_nationkey 19 | and n.n_regionkey = r.r_regionkey 20 | and r.r_name = 'EUROPE' 21 | and o.o_orderdate >= date '1997-01-01' 22 | and o.o_orderdate < date '1997-01-01' + interval '1' year 23 | group by 24 | n.n_name 25 | 26 | order by 27 | revenue desc 28 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpch/queries/16.sql: -------------------------------------------------------------------------------- 1 | select 2 | p.p_brand, 3 | p.p_type, 4 | p.p_size, 5 | count(distinct ps.ps_suppkey) as supplier_cnt 6 | from 7 | "partsupp" ps, 8 | "part" p 9 | where 10 | p.p_partkey = ps.ps_partkey 11 | and p.p_brand <> 'Brand#21' 12 | and p.p_type not like 'MEDIUM PLATED%' 13 | and p.p_size in (38, 2, 8, 31, 44, 5, 14, 24) 14 | and ps.ps_suppkey not in ( 15 | select 16 | s.s_suppkey 17 | from 18 | "supplier" s 19 | where 20 | s.s_comment like '%Customer%Complaints%' 21 | ) 22 | group by 23 | p.p_brand, 24 | p.p_type, 25 | p.p_size 26 | order by 27 | supplier_cnt desc, 28 | p.p_brand, 29 | p.p_type, 30 | p.p_size 31 | -------------------------------------------------------------------------------- /.github/workflows/sanity.yml: -------------------------------------------------------------------------------- 1 | name: Sanity Check Environment 2 | 3 | on: 4 | workflow_dispatch: 5 | 6 | jobs: 7 | sanity_check: 8 | name: Sanity Check 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@v6 12 | with: 13 | fetch-depth: 0 14 | - name: Validate credentials for Sonatype and Signing 15 | run: ./ci/release/sanity.sh 16 | env: 17 | SONATYPE_USER: ${{ secrets.SONATYPE_USER }} 18 | SONATYPE_PASSWORD: ${{ secrets.SONATYPE_PASSWORD }} 19 | SIGNING_KEY_ID: ${{ secrets.SIGNING_KEY_ID }} 20 | SIGNING_PASSWORD: ${{ secrets.SIGNING_PASSWORD }} 21 | SIGNING_KEY: ${{ secrets.SIGNING_KEY }} 22 | -------------------------------------------------------------------------------- /examples/isthmus-api/build.gradle.kts: -------------------------------------------------------------------------------- 1 | plugins { 2 | // Apply the application plugin to add support for building a CLI application in Java. 3 | id("application") 4 | alias(libs.plugins.spotless) 5 | id("substrait.java-conventions") 6 | } 7 | 8 | repositories { mavenCentral() } 9 | 10 | dependencies { 11 | implementation(project(":isthmus")) 12 | implementation(libs.calcite.core) 13 | implementation(libs.calcite.server) 14 | } 15 | 16 | application { mainClass = "io.substrait.examples.IsthmusAppExamples" } 17 | 18 | tasks.named("test") { useJUnitPlatform() } 19 | 20 | java { toolchain { languageVersion.set(JavaLanguageVersion.of(17)) } } 21 | 22 | tasks.pmdMain { dependsOn(":core:shadowJar") } 23 | -------------------------------------------------------------------------------- /core/src/test/java/io/substrait/relation/SpecVersionTest.java: -------------------------------------------------------------------------------- 1 | package io.substrait.relation; 2 | 3 | import static org.junit.jupiter.api.Assertions.assertEquals; 4 | import static org.junit.jupiter.api.Assertions.assertNotNull; 5 | 6 | import io.substrait.plan.Plan.Version; 7 | import java.util.Optional; 8 | import org.junit.jupiter.api.Test; 9 | 10 | class SpecVersionTest { 11 | @Test 12 | void testSubstraitVersionDefaultValues() { 13 | Version version = Version.DEFAULT_VERSION; 14 | 15 | assertNotNull(version.getMajor()); 16 | assertNotNull(version.getMinor()); 17 | assertNotNull(version.getPatch()); 18 | 19 | assertEquals(Optional.of("substrait-java"), version.getProducer()); 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/22.sql: -------------------------------------------------------------------------------- 1 | select i_product_name 2 | ,i_brand 3 | ,i_class 4 | ,i_category 5 | ,avg(inv_quantity_on_hand) qoh 6 | from inventory 7 | ,date_dim 8 | ,item 9 | ,warehouse 10 | where inv_date_sk=d_date_sk 11 | and inv_item_sk=i_item_sk 12 | and inv_warehouse_sk = w_warehouse_sk 13 | and d_month_seq between 1220 and 1220 + 11 14 | group by rollup(i_product_name 15 | ,i_brand 16 | ,i_class 17 | ,i_category) 18 | order by qoh, i_product_name, i_brand, i_class, i_category 19 | LIMIT 100 20 | -------------------------------------------------------------------------------- /core/src/main/java/io/substrait/expression/AbstractFunctionInvocation.java: -------------------------------------------------------------------------------- 1 | package io.substrait.expression; 2 | 3 | import io.substrait.extension.SimpleExtension; 4 | import io.substrait.type.Type; 5 | import java.util.List; 6 | 7 | public abstract class AbstractFunctionInvocation { 8 | 9 | public abstract T declaration(); 10 | 11 | public abstract List arguments(); 12 | 13 | public abstract Expression.AggregationPhase aggregationPhase(); 14 | 15 | public abstract List sort(); 16 | 17 | public abstract Type outputType(); 18 | 19 | public Type getType() { 20 | return outputType(); 21 | } 22 | 23 | public abstract I invocation(); 24 | } 25 | -------------------------------------------------------------------------------- /isthmus-cli/src/test/script/tpch_smoke.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -eu -o pipefail 4 | 5 | parent_path=$( cd "$(dirname "${BASH_SOURCE[0]}")" ; pwd -P ) 6 | cd "${parent_path}/../../.." 7 | CMD="${ISTHMUS:-build/native/nativeCompile/isthmus}" 8 | 9 | TPCH="../isthmus/src/test/resources/tpch" 10 | 11 | DDL=$(cat ${TPCH}/schema.sql) 12 | QUERY_FOLDER="${TPCH}/queries" 13 | 14 | for QUERY_NUM in {1..22}; do 15 | if [ "${QUERY_NUM}" -lt 10 ]; then 16 | QUERY=$(cat "${QUERY_FOLDER}/0${QUERY_NUM}.sql") 17 | else 18 | QUERY=$(cat "${QUERY_FOLDER}/${QUERY_NUM}.sql") 19 | fi 20 | 21 | echo "Processing tpc-h query ${QUERY_NUM}" 22 | echo "${QUERY}" 23 | "${CMD}" --create "${DDL}" -- "${QUERY}" 24 | done 25 | -------------------------------------------------------------------------------- /core/src/main/java/io/substrait/relation/LocalFiles.java: -------------------------------------------------------------------------------- 1 | package io.substrait.relation; 2 | 3 | import io.substrait.relation.files.FileOrFiles; 4 | import io.substrait.util.VisitationContext; 5 | import java.util.List; 6 | import org.immutables.value.Value; 7 | 8 | @Value.Immutable 9 | public abstract class LocalFiles extends AbstractReadRel { 10 | 11 | public abstract List getItems(); 12 | 13 | @Override 14 | public O accept( 15 | RelVisitor visitor, C context) throws E { 16 | return visitor.visit(this, context); 17 | } 18 | 19 | public static ImmutableLocalFiles.Builder builder() { 20 | return ImmutableLocalFiles.builder(); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /core/src/main/java/io/substrait/relation/AbstractReadRel.java: -------------------------------------------------------------------------------- 1 | package io.substrait.relation; 2 | 3 | import io.substrait.expression.Expression; 4 | import io.substrait.type.NamedStruct; 5 | import io.substrait.type.Type; 6 | import java.util.Optional; 7 | 8 | public abstract class AbstractReadRel extends ZeroInputRel implements HasExtension { 9 | 10 | public abstract NamedStruct getInitialSchema(); 11 | 12 | public abstract Optional getFilter(); 13 | 14 | public abstract Optional getBestEffortFilter(); 15 | 16 | // TODO: 17 | // public abstract Optional 18 | 19 | @Override 20 | protected final Type.Struct deriveRecordType() { 21 | return getInitialSchema().struct(); 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /core/src/main/java/io/substrait/relation/physical/RoundRobinExchange.java: -------------------------------------------------------------------------------- 1 | package io.substrait.relation.physical; 2 | 3 | import io.substrait.relation.RelVisitor; 4 | import io.substrait.util.VisitationContext; 5 | import org.immutables.value.Value; 6 | 7 | @Value.Immutable 8 | public abstract class RoundRobinExchange extends AbstractExchangeRel { 9 | public abstract boolean getExact(); 10 | 11 | @Override 12 | public O accept( 13 | RelVisitor visitor, C context) throws E { 14 | return visitor.visit(this, context); 15 | } 16 | 17 | public static ImmutableRoundRobinExchange.Builder builder() { 18 | return ImmutableRoundRobinExchange.builder(); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /core/src/test/java/io/substrait/utils/StringHolderHandlingProtoExtensionConverter.java: -------------------------------------------------------------------------------- 1 | package io.substrait.utils; 2 | 3 | import com.google.protobuf.Any; 4 | import io.substrait.extension.AdvancedExtension.Enhancement; 5 | import io.substrait.extension.AdvancedExtension.Optimization; 6 | import io.substrait.extension.ProtoExtensionConverter; 7 | 8 | public class StringHolderHandlingProtoExtensionConverter extends ProtoExtensionConverter { 9 | @Override 10 | protected Enhancement enhancementFromAdvancedExtension(final Any any) { 11 | return StringHolder.fromProto(any); 12 | } 13 | 14 | @Override 15 | protected Optimization optimizationFromAdvancedExtension(final Any any) { 16 | return StringHolder.fromProto(any); 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpch/queries/11.sql: -------------------------------------------------------------------------------- 1 | select 2 | ps.ps_partkey, 3 | sum(ps.ps_supplycost * ps.ps_availqty) as "value" 4 | from 5 | "partsupp" ps, 6 | "supplier" s, 7 | "nation" n 8 | where 9 | ps.ps_suppkey = s.s_suppkey 10 | and s.s_nationkey = n.n_nationkey 11 | and n.n_name = 'JAPAN' 12 | group by 13 | ps.ps_partkey having 14 | sum(ps.ps_supplycost * ps.ps_availqty) > ( 15 | select 16 | sum(ps.ps_supplycost * ps.ps_availqty) * 0.0001000000 17 | from 18 | "partsupp" ps, 19 | "supplier" s, 20 | "nation" n 21 | where 22 | ps.ps_suppkey = s.s_suppkey 23 | and s.s_nationkey = n.n_nationkey 24 | and n.n_name = 'JAPAN' 25 | ) 26 | order by 27 | "value" desc 28 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/01.sql: -------------------------------------------------------------------------------- 1 | with customer_total_return as 2 | (select sr_customer_sk as ctr_customer_sk 3 | ,sr_store_sk as ctr_store_sk 4 | ,sum(SR_RETURN_AMT_INC_TAX) as ctr_total_return 5 | from store_returns 6 | ,date_dim 7 | where sr_returned_date_sk = d_date_sk 8 | and d_year =2001 9 | group by sr_customer_sk 10 | ,sr_store_sk) 11 | select c_customer_id 12 | from customer_total_return ctr1 13 | ,store 14 | ,customer 15 | where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 16 | from customer_total_return ctr2 17 | where ctr1.ctr_store_sk = ctr2.ctr_store_sk) 18 | and s_store_sk = ctr1.ctr_store_sk 19 | and s_state = 'distmember(fips_county, 61, 3)' 20 | and ctr1.ctr_customer_sk = c_customer_sk 21 | order by c_customer_id 22 | LIMIT 100 23 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpch/queries/15.sql: -------------------------------------------------------------------------------- 1 | -- converted to CTE since DDL is not part of Substrait. 2 | with revenue0(supplier_no, total_revenue) as ( 3 | select 4 | l_suppkey, 5 | sum(l_extendedprice * (1 - l_discount)) 6 | from 7 | "lineitem" 8 | where 9 | l_shipdate >= date '1993-05-01' 10 | and l_shipdate < date '1993-05-01' + interval '3' month 11 | group by 12 | l_suppkey) 13 | 14 | select 15 | s.s_suppkey, 16 | s.s_name, 17 | s.s_address, 18 | s.s_phone, 19 | r.total_revenue 20 | from 21 | "supplier" s, 22 | revenue0 r 23 | where 24 | s.s_suppkey = r.supplier_no 25 | and r.total_revenue = ( 26 | select 27 | max(total_revenue) 28 | from 29 | revenue0 30 | ) 31 | order by 32 | s.s_suppkey 33 | -------------------------------------------------------------------------------- /core/src/main/java/io/substrait/relation/physical/TargetType.java: -------------------------------------------------------------------------------- 1 | package io.substrait.relation.physical; 2 | 3 | import org.immutables.value.Value; 4 | 5 | @Value.Enclosing 6 | public interface TargetType { 7 | 8 | @Value.Immutable 9 | abstract class Uri implements TargetType { 10 | public abstract String getUri(); 11 | 12 | public static ImmutableTargetType.Uri.Builder builder() { 13 | return ImmutableTargetType.Uri.builder(); 14 | } 15 | } 16 | 17 | @Value.Immutable 18 | abstract class Extended implements TargetType { 19 | public abstract com.google.protobuf.Any getExtended(); 20 | 21 | public static ImmutableTargetType.Extended.Builder builder() { 22 | return ImmutableTargetType.Extended.builder(); 23 | } 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/86.sql: -------------------------------------------------------------------------------- 1 | select 2 | sum(ws_net_paid) as total_sum 3 | ,i_category 4 | ,i_class 5 | ,grouping(i_category)+grouping(i_class) as lochierarchy 6 | ,rank() over ( 7 | partition by grouping(i_category)+grouping(i_class), 8 | case when grouping(i_class) = 0 then i_category end 9 | order by sum(ws_net_paid) desc) as rank_within_parent 10 | from 11 | web_sales 12 | ,date_dim d1 13 | ,item 14 | where 15 | d1.d_month_seq between 1220 and 1220+11 16 | and d1.d_date_sk = ws_sold_date_sk 17 | and i_item_sk = ws_item_sk 18 | group by rollup(i_category,i_class) 19 | order by 20 | lochierarchy desc, 21 | case when lochierarchy = 0 then i_category end, 22 | rank_within_parent 23 | LIMIT 100 24 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpch/queries/10.sql: -------------------------------------------------------------------------------- 1 | select 2 | c.c_custkey, 3 | c.c_name, 4 | sum(l.l_extendedprice * (1 - l.l_discount)) as revenue, 5 | c.c_acctbal, 6 | n.n_name, 7 | c.c_address, 8 | c.c_phone, 9 | c.c_comment 10 | from 11 | "customer" c, 12 | "orders" o, 13 | "lineitem" l, 14 | "nation" n 15 | where 16 | c.c_custkey = o.o_custkey 17 | and l.l_orderkey = o.o_orderkey 18 | and o.o_orderdate >= date '1994-03-01' 19 | and o.o_orderdate < date '1994-03-01' + interval '3' month 20 | and l.l_returnflag = 'R' 21 | and c.c_nationkey = n.n_nationkey 22 | group by 23 | c.c_custkey, 24 | c.c_name, 25 | c.c_acctbal, 26 | c.c_phone, 27 | n.n_name, 28 | c.c_address, 29 | c.c_comment 30 | order by 31 | revenue desc 32 | limit 20 33 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/26.sql: -------------------------------------------------------------------------------- 1 | select i_item_id, 2 | avg(cs_quantity) agg1, 3 | avg(cs_list_price) agg2, 4 | avg(cs_coupon_amt) agg3, 5 | avg(cs_sales_price) agg4 6 | from catalog_sales, customer_demographics, date_dim, item, promotion 7 | where cs_sold_date_sk = d_date_sk and 8 | cs_item_sk = i_item_sk and 9 | cs_bill_cdemo_sk = cd_demo_sk and 10 | cs_promo_sk = p_promo_sk and 11 | cd_gender = 'dist(gender, 1, 1)' and 12 | cd_marital_status = 'dist(marital_status, 1, 1)' and 13 | cd_education_status = 'dist(education, 1, 1)' and 14 | (p_channel_email = 'N' or p_channel_event = 'N') and 15 | d_year = 1998 16 | group by i_item_id 17 | order by i_item_id 18 | LIMIT 100 19 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/06.sql: -------------------------------------------------------------------------------- 1 | select a.ca_state state, count(*) cnt 2 | from customer_address a 3 | ,customer c 4 | ,store_sales s 5 | ,date_dim d 6 | ,item i 7 | where a.ca_address_sk = c.c_current_addr_sk 8 | and c.c_customer_sk = s.ss_customer_sk 9 | and s.ss_sold_date_sk = d.d_date_sk 10 | and s.ss_item_sk = i.i_item_sk 11 | and d.d_month_seq = 12 | (select distinct (d_month_seq) 13 | from date_dim 14 | where d_year = 1998 15 | and d_moy = 3 ) 16 | and i.i_current_price > 1.2 * 17 | (select avg(j.i_current_price) 18 | from item j 19 | where j.i_category = i.i_category) 20 | group by a.ca_state 21 | having count(*) >= 10 22 | order by cnt 23 | LIMIT 100 24 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/45.sql: -------------------------------------------------------------------------------- 1 | select ca_zip, ca_city, sum(ws_sales_price) 2 | from web_sales, customer, customer_address, date_dim, item 3 | where ws_bill_customer_sk = c_customer_sk 4 | and c_current_addr_sk = ca_address_sk 5 | and ws_item_sk = i_item_sk 6 | and ( substring(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', '85392', '85460', '80348', '81792') 7 | or 8 | i_item_id in (select i_item_id 9 | from item 10 | where i_item_sk in (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) 11 | ) 12 | ) 13 | and ws_sold_date_sk = d_date_sk 14 | and d_qoy = 1 and d_year = 2001 15 | group by ca_zip, ca_city 16 | order by ca_zip, ca_city 17 | LIMIT 100 18 | -------------------------------------------------------------------------------- /core/src/main/java/io/substrait/relation/ExtensionLeaf.java: -------------------------------------------------------------------------------- 1 | package io.substrait.relation; 2 | 3 | import io.substrait.util.VisitationContext; 4 | import org.immutables.value.Value; 5 | 6 | @Value.Immutable 7 | public abstract class ExtensionLeaf extends ZeroInputRel { 8 | 9 | public abstract Extension.LeafRelDetail getDetail(); 10 | 11 | @Override 12 | public O accept( 13 | RelVisitor visitor, C context) throws E { 14 | return visitor.visit(this, context); 15 | } 16 | 17 | public static ImmutableExtensionLeaf.Builder from(Extension.LeafRelDetail detail) { 18 | return ImmutableExtensionLeaf.builder() 19 | .detail(detail) 20 | .deriveRecordType(detail.deriveRecordType()); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/19.sql: -------------------------------------------------------------------------------- 1 | select i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact, 2 | sum(ss_ext_sales_price) ext_price 3 | from date_dim, store_sales, item,customer,customer_address,store 4 | where d_date_sk = ss_sold_date_sk 5 | and ss_item_sk = i_item_sk 6 | and i_manager_id=28 7 | and d_moy=12 8 | and d_year=1998 9 | and ss_customer_sk = c_customer_sk 10 | and c_current_addr_sk = ca_address_sk 11 | and substring(ca_zip,1,5) <> substring(s_zip,1,5) 12 | and ss_store_sk = s_store_sk 13 | group by i_brand 14 | ,i_brand_id 15 | ,i_manufact_id 16 | ,i_manufact 17 | order by ext_price desc 18 | ,i_brand 19 | ,i_brand_id 20 | ,i_manufact_id 21 | ,i_manufact 22 | LIMIT 100 23 | -------------------------------------------------------------------------------- /isthmus/src/main/java/io/substrait/isthmus/SubstraitToSql.java: -------------------------------------------------------------------------------- 1 | package io.substrait.isthmus; 2 | 3 | import io.substrait.extension.SimpleExtension; 4 | import io.substrait.relation.Rel; 5 | import org.apache.calcite.prepare.Prepare; 6 | import org.apache.calcite.rel.RelNode; 7 | 8 | public class SubstraitToSql extends SqlConverterBase { 9 | 10 | public SubstraitToSql() { 11 | super(FEATURES_DEFAULT); 12 | } 13 | 14 | public SubstraitToSql(SimpleExtension.ExtensionCollection extensions) { 15 | super(FEATURES_DEFAULT, extensions); 16 | } 17 | 18 | public RelNode substraitRelToCalciteRel(Rel relRoot, Prepare.CatalogReader catalog) { 19 | return SubstraitRelNodeConverter.convert( 20 | relRoot, relOptCluster, catalog, parserConfig, extensionCollection); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpch/queries/12.sql: -------------------------------------------------------------------------------- 1 | select 2 | l.l_shipmode, 3 | sum(case 4 | when o.o_orderpriority = '1-URGENT' 5 | or o.o_orderpriority = '2-HIGH' 6 | then 1 7 | else 0 8 | end) as high_line_count, 9 | sum(case 10 | when o.o_orderpriority <> '1-URGENT' 11 | and o.o_orderpriority <> '2-HIGH' 12 | then 1 13 | else 0 14 | end) as low_line_count 15 | from 16 | "orders" o, 17 | "lineitem" l 18 | where 19 | o.o_orderkey = l.l_orderkey 20 | and l.l_shipmode in ('TRUCK', 'REG AIR') 21 | and l.l_commitdate < l.l_receiptdate 22 | and l.l_shipdate < l.l_commitdate 23 | and l.l_receiptdate >= date '1994-01-01' 24 | and l.l_receiptdate < date '1994-01-01' + interval '1' year 25 | group by 26 | l.l_shipmode 27 | order by 28 | l.l_shipmode 29 | -------------------------------------------------------------------------------- /core/src/main/java/io/substrait/relation/physical/SingleBucketExchange.java: -------------------------------------------------------------------------------- 1 | package io.substrait.relation.physical; 2 | 3 | import io.substrait.expression.Expression; 4 | import io.substrait.relation.RelVisitor; 5 | import io.substrait.util.VisitationContext; 6 | import org.immutables.value.Value; 7 | 8 | @Value.Immutable 9 | public abstract class SingleBucketExchange extends AbstractExchangeRel { 10 | public abstract Expression getExpression(); 11 | 12 | @Override 13 | public O accept( 14 | RelVisitor visitor, C context) throws E { 15 | return visitor.visit(this, context); 16 | } 17 | 18 | public static ImmutableSingleBucketExchange.Builder builder() { 19 | return ImmutableSingleBucketExchange.builder(); 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /isthmus/src/main/java/io/substrait/isthmus/UserTypeMapper.java: -------------------------------------------------------------------------------- 1 | package io.substrait.isthmus; 2 | 3 | import io.substrait.type.Type; 4 | import org.apache.calcite.rel.type.RelDataType; 5 | import org.jspecify.annotations.Nullable; 6 | 7 | /** Defines conversion of user-defined types between Substrait and Calcite */ 8 | public interface UserTypeMapper { 9 | /** 10 | * @param relDataType the Calcite {@link RelDataType} type to convert 11 | * @return the Substrait representation of the input type 12 | */ 13 | @Nullable Type toSubstrait(RelDataType relDataType); 14 | 15 | /** 16 | * @param type the Subtrait {@link Type.UserDefined} type to convert 17 | * @return the Calcite {@link RelDataType} representing the input type 18 | */ 19 | @Nullable RelDataType toCalcite(Type.UserDefined type); 20 | } 21 | -------------------------------------------------------------------------------- /core/src/main/java/io/substrait/relation/physical/ScatterExchange.java: -------------------------------------------------------------------------------- 1 | package io.substrait.relation.physical; 2 | 3 | import io.substrait.expression.FieldReference; 4 | import io.substrait.relation.RelVisitor; 5 | import io.substrait.util.VisitationContext; 6 | import java.util.List; 7 | import org.immutables.value.Value; 8 | 9 | @Value.Immutable 10 | public abstract class ScatterExchange extends AbstractExchangeRel { 11 | public abstract List getFields(); 12 | 13 | @Override 14 | public O accept( 15 | RelVisitor visitor, C context) throws E { 16 | return visitor.visit(this, context); 17 | } 18 | 19 | public static ImmutableScatterExchange.Builder builder() { 20 | return ImmutableScatterExchange.builder(); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /isthmus/src/test/java/io/substrait/isthmus/KeyConstraintsTest.java: -------------------------------------------------------------------------------- 1 | package io.substrait.isthmus; 2 | 3 | import io.substrait.isthmus.sql.SubstraitCreateStatementParser; 4 | import org.apache.calcite.prepare.Prepare; 5 | import org.junit.jupiter.params.ParameterizedTest; 6 | import org.junit.jupiter.params.provider.ValueSource; 7 | 8 | class KeyConstraintsTest extends PlanTestBase { 9 | 10 | @ParameterizedTest 11 | @ValueSource(ints = {7}) 12 | void tpcds(int query) throws Exception { 13 | SqlToSubstrait s = new SqlToSubstrait(); 14 | String values = asString("keyconstraints_schema.sql"); 15 | Prepare.CatalogReader catalog = 16 | SubstraitCreateStatementParser.processCreateStatementsToCatalog(values); 17 | s.convert(asString(String.format("tpcds/queries/%02d.sql", query)), catalog); 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /isthmus/src/main/java/io/substrait/isthmus/calcite/SubstraitTable.java: -------------------------------------------------------------------------------- 1 | package io.substrait.isthmus.calcite; 2 | 3 | import org.apache.calcite.rel.type.RelDataType; 4 | import org.apache.calcite.rel.type.RelDataTypeFactory; 5 | import org.apache.calcite.schema.impl.AbstractTable; 6 | 7 | /** Basic {@link AbstractTable} implementation */ 8 | public class SubstraitTable extends AbstractTable { 9 | 10 | private final RelDataType rowType; 11 | private final String tableName; 12 | 13 | public SubstraitTable(String tableName, RelDataType rowType) { 14 | this.tableName = tableName; 15 | this.rowType = rowType; 16 | } 17 | 18 | public String getName() { 19 | return tableName; 20 | } 21 | 22 | @Override 23 | public RelDataType getRowType(RelDataTypeFactory typeFactory) { 24 | return rowType; 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/32.sql: -------------------------------------------------------------------------------- 1 | select sum(cs_ext_discount_amt) as "excess discount amount" 2 | from 3 | catalog_sales 4 | ,item 5 | ,date_dim 6 | where 7 | i_manufact_id = 361 8 | and i_item_sk = cs_item_sk 9 | and d_date between '2000-01-27' and 10 | (cast('2000-01-27' as date) + interval '90' day) 11 | and d_date_sk = cs_sold_date_sk 12 | and cs_ext_discount_amt 13 | > ( 14 | select 15 | 1.3 * avg(cs_ext_discount_amt) 16 | from 17 | catalog_sales 18 | ,date_dim 19 | where 20 | cs_item_sk = i_item_sk 21 | and d_date between '2000-01-27' and 22 | (cast('2000-01-27' as date) + interval '90' day) 23 | and d_date_sk = cs_sold_date_sk 24 | ) 25 | LIMIT 100 26 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/27.sql: -------------------------------------------------------------------------------- 1 | select i_item_id, 2 | s_state, grouping(s_state) g_state, 3 | avg(ss_quantity) agg1, 4 | avg(ss_list_price) agg2, 5 | avg(ss_coupon_amt) agg3, 6 | avg(ss_sales_price) agg4 7 | from store_sales, customer_demographics, date_dim, store, item 8 | where ss_sold_date_sk = d_date_sk and 9 | ss_item_sk = i_item_sk and 10 | ss_store_sk = s_store_sk and 11 | ss_cdemo_sk = cd_demo_sk and 12 | cd_gender = 'dist(gender, 1, 1)' and 13 | cd_marital_status = 'dist(marital_status, 1, 1)' and 14 | cd_education_status = 'dist(education, 1, 1)' and 15 | d_year = 1998 and 16 | s_state in ('TN','TN', 'TN', 'TN', 'TN', 'TN') 17 | group by rollup (i_item_id, s_state) 18 | order by i_item_id 19 | ,s_state 20 | LIMIT 100 21 | -------------------------------------------------------------------------------- /core/src/main/java/io/substrait/relation/physical/MultiBucketExchange.java: -------------------------------------------------------------------------------- 1 | package io.substrait.relation.physical; 2 | 3 | import io.substrait.expression.Expression; 4 | import io.substrait.relation.RelVisitor; 5 | import io.substrait.util.VisitationContext; 6 | import org.immutables.value.Value; 7 | 8 | @Value.Immutable 9 | public abstract class MultiBucketExchange extends AbstractExchangeRel { 10 | public abstract Expression getExpression(); 11 | 12 | public abstract boolean getConstrainedToCount(); 13 | 14 | @Override 15 | public O accept( 16 | RelVisitor visitor, C context) throws E { 17 | return visitor.visit(this, context); 18 | } 19 | 20 | public static ImmutableMultiBucketExchange.Builder builder() { 21 | return ImmutableMultiBucketExchange.builder(); 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /gradle.properties: -------------------------------------------------------------------------------- 1 | org.gradle.jvmargs=-XX:+UseG1GC -Xmx1g -XX:MaxMetaspaceSize=512m --add-exports jdk.compiler/com.sun.tools.javac.api=ALL-UNNAMED \ 2 | --add-exports jdk.compiler/com.sun.tools.javac.file=ALL-UNNAMED \ 3 | --add-exports jdk.compiler/com.sun.tools.javac.parser=ALL-UNNAMED \ 4 | --add-exports jdk.compiler/com.sun.tools.javac.tree=ALL-UNNAMED \ 5 | --add-exports jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED 6 | org.gradle.parallel=true 7 | # Build cache can be disabled with --no-build-cache option 8 | org.gradle.caching=true 9 | 10 | #version that is going to be updated automatically by releases 11 | version = 0.74.0 12 | 13 | #signing 14 | SIGNING_KEY_ID = 193EAE47 15 | SIGNING_PASSWORD = password 16 | SIGNING_KEY = ALDqwcArqwfsdqweqwrwr 17 | 18 | #sonatype credentials 19 | MAVENCENTRAL_USERNAME = admin 20 | MAVENCENTRAL_PASSWORD = password 21 | -------------------------------------------------------------------------------- /core/src/main/java/io/substrait/relation/Filter.java: -------------------------------------------------------------------------------- 1 | package io.substrait.relation; 2 | 3 | import io.substrait.expression.Expression; 4 | import io.substrait.type.Type; 5 | import io.substrait.util.VisitationContext; 6 | import org.immutables.value.Value; 7 | 8 | @Value.Immutable 9 | public abstract class Filter extends SingleInputRel implements HasExtension { 10 | 11 | public abstract Expression getCondition(); 12 | 13 | @Override 14 | protected Type.Struct deriveRecordType() { 15 | return getInput().getRecordType(); 16 | } 17 | 18 | @Override 19 | public O accept( 20 | RelVisitor visitor, C context) throws E { 21 | return visitor.visit(this, context); 22 | } 23 | 24 | public static ImmutableFilter.Builder builder() { 25 | return ImmutableFilter.builder(); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/92.sql: -------------------------------------------------------------------------------- 1 | select 2 | sum(ws_ext_discount_amt) as Excess_Discount_Amount 3 | from 4 | web_sales 5 | ,item 6 | ,date_dim 7 | where 8 | i_manufact_id = 350 9 | and i_item_sk = ws_item_sk 10 | and d_date between '2000-01-27' and 11 | (cast('2000-01-27' as date) + interval '90' day ) 12 | and d_date_sk = ws_sold_date_sk 13 | and ws_ext_discount_amt 14 | > ( 15 | SELECT 16 | 1.3 * avg(ws_ext_discount_amt) 17 | FROM 18 | web_sales 19 | ,date_dim 20 | WHERE 21 | ws_item_sk = i_item_sk 22 | and d_date between '2000-01-27' and 23 | (cast('2000-01-27' as date) + interval '90' day ) 24 | and d_date_sk = ws_sold_date_sk 25 | ) 26 | order by sum(ws_ext_discount_amt) 27 | limit 100; -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/93.sql: -------------------------------------------------------------------------------- 1 | select ss_customer_sk 2 | ,sum(act_sales) sumsales 3 | from (select ss_item_sk 4 | ,ss_ticket_number 5 | ,ss_customer_sk 6 | ,case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price 7 | else (ss_quantity*ss_sales_price) end act_sales 8 | from store_sales left outer join store_returns on (sr_item_sk = ss_item_sk 9 | and sr_ticket_number = ss_ticket_number) 10 | ,reason 11 | where sr_reason_sk = r_reason_sk 12 | and r_reason_desc = 'dist(return_reasons, 1, 1)') t 13 | group by ss_customer_sk 14 | order by sumsales, ss_customer_sk 15 | LIMIT 100 16 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpch/queries/09.sql: -------------------------------------------------------------------------------- 1 | select 2 | nation, 3 | o_year, 4 | sum(amount) as sum_profit 5 | from 6 | ( 7 | select 8 | n.n_name as nation, 9 | extract(year from o.o_orderdate) as o_year, 10 | l.l_extendedprice * (1 - l.l_discount) - ps.ps_supplycost * l.l_quantity as amount 11 | from 12 | "part" p, 13 | "supplier" s, 14 | "lineitem" l, 15 | "partsupp" ps, 16 | "orders" o, 17 | "nation" n 18 | where 19 | s.s_suppkey = l.l_suppkey 20 | and ps.ps_suppkey = l.l_suppkey 21 | and ps.ps_partkey = l.l_partkey 22 | and p.p_partkey = l.l_partkey 23 | and o.o_orderkey = l.l_orderkey 24 | and s.s_nationkey = n.n_nationkey 25 | and p.p_name like '%yellow%' 26 | ) as profit 27 | group by 28 | nation, 29 | o_year 30 | order by 31 | nation, 32 | o_year desc 33 | -------------------------------------------------------------------------------- /core/src/main/java/io/substrait/extension/ExtensionLookup.java: -------------------------------------------------------------------------------- 1 | package io.substrait.extension; 2 | 3 | /** 4 | * Interface with operations for resolving references to {@link 5 | * io.substrait.proto.SimpleExtensionDeclaration}s within an individual plan to their corresponding 6 | * functions or types. 7 | */ 8 | public interface ExtensionLookup { 9 | SimpleExtension.ScalarFunctionVariant getScalarFunction( 10 | int reference, SimpleExtension.ExtensionCollection extensions); 11 | 12 | SimpleExtension.WindowFunctionVariant getWindowFunction( 13 | int reference, SimpleExtension.ExtensionCollection extensions); 14 | 15 | SimpleExtension.AggregateFunctionVariant getAggregateFunction( 16 | int reference, SimpleExtension.ExtensionCollection extensions); 17 | 18 | SimpleExtension.Type getType(int reference, SimpleExtension.ExtensionCollection extensions); 19 | } 20 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/12.sql: -------------------------------------------------------------------------------- 1 | select i_item_id 2 | ,i_item_desc 3 | ,i_category 4 | ,i_class 5 | ,i_current_price 6 | ,sum(ws_ext_sales_price) as itemrevenue 7 | ,sum(ws_ext_sales_price)*100/sum(sum(ws_ext_sales_price)) over 8 | (partition by i_class) as revenueratio 9 | from 10 | web_sales 11 | ,item 12 | ,date_dim 13 | where 14 | ws_item_sk = i_item_sk 15 | and i_category in ('Sports', 'Books', 'Home') 16 | and ws_sold_date_sk = d_date_sk 17 | and d_date between cast('1999-02-22' as date) 18 | and cast('1999-02-22' as date) + interval '30' days 19 | group by 20 | i_item_id 21 | ,i_item_desc 22 | ,i_category 23 | ,i_class 24 | ,i_current_price 25 | order by 26 | i_category 27 | ,i_class 28 | ,i_item_id 29 | ,i_item_desc 30 | ,revenueratio 31 | limit 100 -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/20.sql: -------------------------------------------------------------------------------- 1 | select i_item_desc 2 | ,i_category 3 | ,i_class 4 | ,i_current_price 5 | ,sum(cs_ext_sales_price) as itemrevenue 6 | ,sum(cs_ext_sales_price)*100/sum(sum(cs_ext_sales_price)) over 7 | (partition by i_class) as revenueratio 8 | from catalog_sales 9 | ,item 10 | ,date_dim 11 | where cs_item_sk = i_item_sk 12 | and i_category in ('Sports', 'Books', 'Home') 13 | and cs_sold_date_sk = d_date_sk 14 | and d_date between cast('1999-02-22' as date) 15 | and (cast('1999-02-22' as date) + interval '30' day) 16 | group by i_item_id 17 | ,i_item_desc 18 | ,i_category 19 | ,i_class 20 | ,i_current_price 21 | order by i_category 22 | ,i_class 23 | ,i_item_id 24 | ,i_item_desc 25 | ,revenueratio 26 | LIMIT 100 27 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/98.sql: -------------------------------------------------------------------------------- 1 | select i_item_id 2 | ,i_item_desc 3 | ,i_category 4 | ,i_class 5 | ,i_current_price 6 | ,sum(ss_ext_sales_price) as itemrevenue 7 | ,sum(ss_ext_sales_price)*100/sum(sum(ss_ext_sales_price)) over 8 | (partition by i_class) as revenueratio 9 | from 10 | store_sales 11 | ,item 12 | ,date_dim 13 | where 14 | ss_item_sk = i_item_sk 15 | and trim(TRAILING from i_category) in ('Sports', 'Books', 'Home') 16 | and ss_sold_date_sk = d_date_sk 17 | and d_date between cast('1999-02-22' as date) 18 | and (cast('1999-02-22' as date) + interval '30' day ) 19 | group by 20 | i_item_id 21 | ,i_item_desc 22 | ,i_category 23 | ,i_class 24 | ,i_current_price 25 | order by 26 | i_category 27 | ,i_class 28 | ,i_item_id 29 | ,i_item_desc 30 | ,revenueratio -------------------------------------------------------------------------------- /core/src/main/java/io/substrait/relation/Fetch.java: -------------------------------------------------------------------------------- 1 | package io.substrait.relation; 2 | 3 | import io.substrait.type.Type; 4 | import io.substrait.util.VisitationContext; 5 | import java.util.OptionalLong; 6 | import org.immutables.value.Value; 7 | 8 | @Value.Immutable 9 | public abstract class Fetch extends SingleInputRel implements HasExtension { 10 | 11 | public abstract long getOffset(); 12 | 13 | public abstract OptionalLong getCount(); 14 | 15 | @Override 16 | protected Type.Struct deriveRecordType() { 17 | return getInput().getRecordType(); 18 | } 19 | 20 | @Override 21 | public O accept( 22 | RelVisitor visitor, C context) throws E { 23 | return visitor.visit(this, context); 24 | } 25 | 26 | public static ImmutableFetch.Builder builder() { 27 | return ImmutableFetch.builder(); 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /core/src/main/java/io/substrait/relation/ExtensionTable.java: -------------------------------------------------------------------------------- 1 | package io.substrait.relation; 2 | 3 | import io.substrait.util.VisitationContext; 4 | import org.immutables.value.Value; 5 | 6 | @Value.Immutable 7 | public abstract class ExtensionTable extends AbstractReadRel { 8 | 9 | public abstract Extension.ExtensionTableDetail getDetail(); 10 | 11 | @Override 12 | public O accept( 13 | RelVisitor visitor, C context) throws E { 14 | return visitor.visit(this, context); 15 | } 16 | 17 | public static ImmutableExtensionTable.Builder from(Extension.ExtensionTableDetail detail) { 18 | return ImmutableExtensionTable.builder().initialSchema(detail.deriveSchema()).detail(detail); 19 | } 20 | 21 | public static ImmutableExtensionTable.Builder builder() { 22 | return ImmutableExtensionTable.builder(); 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /core/src/main/java/io/substrait/relation/Sort.java: -------------------------------------------------------------------------------- 1 | package io.substrait.relation; 2 | 3 | import io.substrait.expression.Expression; 4 | import io.substrait.type.Type; 5 | import io.substrait.util.VisitationContext; 6 | import java.util.List; 7 | import org.immutables.value.Value; 8 | 9 | @Value.Immutable 10 | public abstract class Sort extends SingleInputRel implements HasExtension { 11 | 12 | public abstract List getSortFields(); 13 | 14 | @Override 15 | protected Type.Struct deriveRecordType() { 16 | return getInput().getRecordType(); 17 | } 18 | 19 | @Override 20 | public O accept( 21 | RelVisitor visitor, C context) throws E { 22 | return visitor.visit(this, context); 23 | } 24 | 25 | public static ImmutableSort.Builder builder() { 26 | return ImmutableSort.builder(); 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /isthmus-cli/src/test/java/io/substrait/isthmus/cli/IsthmusEntryPointTest.java: -------------------------------------------------------------------------------- 1 | package io.substrait.isthmus.cli; 2 | 3 | import static org.junit.jupiter.api.Assertions.assertEquals; 4 | 5 | import org.junit.jupiter.api.Test; 6 | import picocli.CommandLine; 7 | 8 | class IsthmusEntryPointTest { 9 | 10 | @Test 11 | void canProcessQuery() { 12 | IsthmusEntryPoint isthmusEntryPoint = new IsthmusEntryPoint(); 13 | CommandLine cli = new CommandLine(isthmusEntryPoint); 14 | int statusCode = cli.execute("SELECT 1;"); 15 | assertEquals(0, statusCode); 16 | } 17 | 18 | @Test 19 | void canProcessQueryWithCreates() { 20 | IsthmusEntryPoint isthmusEntryPoint = new IsthmusEntryPoint(); 21 | CommandLine cli = new CommandLine(isthmusEntryPoint); 22 | int statusCode = cli.execute("SELECT * FROM foo", "--create", "CREATE TABLE foo(id INT)"); 23 | assertEquals(0, statusCode); 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/36.sql: -------------------------------------------------------------------------------- 1 | select 2 | sum(ss_net_profit)/sum(ss_ext_sales_price) as gross_margin 3 | ,i_category 4 | ,i_class 5 | ,grouping(i_category)+grouping(i_class) as lochierarchy 6 | ,rank() over ( 7 | partition by grouping(i_category)+grouping(i_class), 8 | case when grouping(i_class) = 0 then i_category end 9 | order by sum(ss_net_profit)/sum(ss_ext_sales_price) asc) as rank_within_parent 10 | from 11 | store_sales 12 | ,date_dim d1 13 | ,item 14 | ,store 15 | where 16 | d1.d_year = 1998 17 | and d1.d_date_sk = ss_sold_date_sk 18 | and i_item_sk = ss_item_sk 19 | and s_store_sk = ss_store_sk 20 | and s_state in ('TN','TN','TN','TN', 'TN','TN','TN','TN') 21 | group by rollup(i_category,i_class) 22 | order by 23 | lochierarchy desc 24 | ,case when lochierarchy = 0 then i_category end 25 | ,rank_within_parent 26 | LIMIT 100 27 | -------------------------------------------------------------------------------- /spark/src/main/scala/io/substrait/spark/FileHolder.scala: -------------------------------------------------------------------------------- 1 | package io.substrait.spark 2 | 3 | import com.google.protobuf 4 | import io.substrait.extension.ExtensionLookup 5 | import io.substrait.relation.{ProtoRelConverter, RelProtoConverter} 6 | import io.substrait.relation.Extension.WriteExtensionObject 7 | import io.substrait.relation.files.FileOrFiles 8 | 9 | case class FileHolder(fileOrFiles: FileOrFiles) extends WriteExtensionObject { 10 | 11 | override def toProto(converter: RelProtoConverter): protobuf.Any = { 12 | protobuf.Any.pack(fileOrFiles.toProto) 13 | } 14 | } 15 | 16 | class FileHolderHandlingProtoRelConverter(lookup: ExtensionLookup) 17 | extends ProtoRelConverter(lookup) { 18 | override def detailFromWriteExtensionObject(any: protobuf.Any): WriteExtensionObject = { 19 | FileHolder( 20 | newFileOrFiles(any.unpack(classOf[io.substrait.proto.ReadRel.LocalFiles.FileOrFiles]))) 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpch/queries/20.sql: -------------------------------------------------------------------------------- 1 | select 2 | s.s_name, 3 | s.s_address 4 | from 5 | "supplier" s, 6 | "nation" n 7 | where 8 | s.s_suppkey in ( 9 | select 10 | ps.ps_suppkey 11 | from 12 | "partsupp" ps 13 | where 14 | ps. ps_partkey in ( 15 | select 16 | p.p_partkey 17 | from 18 | "part" p 19 | where 20 | p.p_name like 'antique%' 21 | ) 22 | and ps.ps_availqty > ( 23 | select 24 | 0.5 * sum(l.l_quantity) 25 | from 26 | "lineitem" l 27 | where 28 | l.l_partkey = ps.ps_partkey 29 | and l.l_suppkey = ps.ps_suppkey 30 | and l.l_shipdate >= date '1993-01-01' 31 | and l.l_shipdate < date '1993-01-01' + interval '1' year 32 | ) 33 | ) 34 | and s.s_nationkey = n.n_nationkey 35 | and n.n_name = 'KENYA' 36 | order by 37 | s.s_name 38 | -------------------------------------------------------------------------------- /isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitSqlValidator.java: -------------------------------------------------------------------------------- 1 | package io.substrait.isthmus.sql; 2 | 3 | import io.substrait.isthmus.calcite.SubstraitOperatorTable; 4 | import org.apache.calcite.prepare.Prepare; 5 | import org.apache.calcite.sql.SqlOperatorTable; 6 | import org.apache.calcite.sql.validate.SqlValidator; 7 | import org.apache.calcite.sql.validate.SqlValidatorImpl; 8 | 9 | public class SubstraitSqlValidator extends SqlValidatorImpl { 10 | 11 | static SqlValidator.Config CONFIG = Config.DEFAULT.withIdentifierExpansion(true); 12 | 13 | public SubstraitSqlValidator(Prepare.CatalogReader catalogReader) { 14 | super(SubstraitOperatorTable.INSTANCE, catalogReader, catalogReader.getTypeFactory(), CONFIG); 15 | } 16 | 17 | public SubstraitSqlValidator(Prepare.CatalogReader catalogReader, SqlOperatorTable opTable) { 18 | super(opTable, catalogReader, catalogReader.getTypeFactory(), CONFIG); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpch/queries/21.sql: -------------------------------------------------------------------------------- 1 | select 2 | s.s_name, 3 | count(*) as numwait 4 | from 5 | "supplier" s, 6 | "lineitem" l1, 7 | "orders" o, 8 | "nation" n 9 | where 10 | s.s_suppkey = l1.l_suppkey 11 | and o.o_orderkey = l1.l_orderkey 12 | and o.o_orderstatus = 'F' 13 | and l1.l_receiptdate > l1.l_commitdate 14 | and exists ( 15 | select 16 | * 17 | from 18 | "lineitem" l2 19 | where 20 | l2.l_orderkey = l1.l_orderkey 21 | and l2.l_suppkey <> l1.l_suppkey 22 | ) 23 | and not exists ( 24 | select 25 | * 26 | from 27 | "lineitem" l3 28 | where 29 | l3.l_orderkey = l1.l_orderkey 30 | and l3.l_suppkey <> l1.l_suppkey 31 | and l3.l_receiptdate > l3.l_commitdate 32 | ) 33 | and s.s_nationkey = n.n_nationkey 34 | and n.n_name = 'BRAZIL' 35 | group by 36 | s.s_name 37 | order by 38 | numwait desc, 39 | s.s_name 40 | limit 100 41 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpch/queries/22.sql: -------------------------------------------------------------------------------- 1 | select 2 | cntrycode, 3 | count(*) as numcust, 4 | sum(c_acctbal) as totacctbal 5 | from 6 | ( 7 | select 8 | substring(c_phone from 1 for 2) as cntrycode, 9 | c_acctbal 10 | from 11 | "customer" c 12 | where 13 | substring(c_phone from 1 for 2) in 14 | ('24', '31', '11', '16', '21', '20', '34') 15 | and c_acctbal > ( 16 | select 17 | avg(c_acctbal) 18 | from 19 | "customer" 20 | where 21 | c_acctbal > 0.00 22 | and substring(c_phone from 1 for 2) in 23 | ('24', '31', '11', '16', '21', '20', '34') 24 | ) 25 | and not exists ( 26 | select 27 | * 28 | from 29 | "orders" o 30 | where 31 | o.o_custkey = c.c_custkey 32 | ) 33 | ) as custsale 34 | group by 35 | cntrycode 36 | order by 37 | cntrycode 38 | -------------------------------------------------------------------------------- /isthmus/src/test/java/io/substrait/isthmus/FetchTest.java: -------------------------------------------------------------------------------- 1 | package io.substrait.isthmus; 2 | 3 | import io.substrait.dsl.SubstraitBuilder; 4 | import io.substrait.relation.Rel; 5 | import io.substrait.type.TypeCreator; 6 | import java.util.List; 7 | import org.junit.jupiter.api.Test; 8 | 9 | class FetchTest extends PlanTestBase { 10 | 11 | static final TypeCreator R = TypeCreator.of(false); 12 | 13 | final SubstraitBuilder b = new SubstraitBuilder(extensions); 14 | 15 | final Rel TABLE = b.namedScan(List.of("test"), List.of("col1"), List.of(R.STRING)); 16 | 17 | @Test 18 | void limitOnly() { 19 | Rel rel = b.limit(50, TABLE); 20 | assertFullRoundTrip(rel); 21 | } 22 | 23 | @Test 24 | void offsetOnly() { 25 | Rel rel = b.offset(50, TABLE); 26 | assertFullRoundTrip(rel); 27 | } 28 | 29 | @Test 30 | void offsetAndLimit() { 31 | Rel rel = b.fetch(50, 10, TABLE); 32 | assertFullRoundTrip(rel); 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /core/src/main/java/io/substrait/relation/ExtensionSingle.java: -------------------------------------------------------------------------------- 1 | package io.substrait.relation; 2 | 3 | import io.substrait.util.VisitationContext; 4 | import org.immutables.value.Value; 5 | 6 | @Value.Immutable 7 | public abstract class ExtensionSingle extends SingleInputRel { 8 | 9 | public abstract Extension.SingleRelDetail getDetail(); 10 | 11 | @Override 12 | public O accept( 13 | RelVisitor visitor, C context) throws E { 14 | return visitor.visit(this, context); 15 | } 16 | 17 | public static ImmutableExtensionSingle.Builder from(Extension.SingleRelDetail detail, Rel input) { 18 | return ImmutableExtensionSingle.builder() 19 | .input(input) 20 | .detail(detail) 21 | .deriveRecordType(detail.deriveRecordType(input)); 22 | } 23 | 24 | public static ImmutableExtensionSingle.Builder builder() { 25 | return ImmutableExtensionSingle.builder(); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /core/src/main/java/io/substrait/relation/Cross.java: -------------------------------------------------------------------------------- 1 | package io.substrait.relation; 2 | 3 | import io.substrait.type.Type; 4 | import io.substrait.type.TypeCreator; 5 | import io.substrait.util.VisitationContext; 6 | import java.util.stream.Stream; 7 | import org.immutables.value.Value; 8 | 9 | @Value.Immutable 10 | public abstract class Cross extends BiRel implements HasExtension { 11 | 12 | @Override 13 | protected Type.Struct deriveRecordType() { 14 | return TypeCreator.REQUIRED.struct( 15 | Stream.concat( 16 | getLeft().getRecordType().fields().stream(), 17 | getRight().getRecordType().fields().stream())); 18 | } 19 | 20 | @Override 21 | public O accept( 22 | RelVisitor visitor, C context) throws E { 23 | return visitor.visit(this, context); 24 | } 25 | 26 | public static ImmutableCross.Builder builder() { 27 | return ImmutableCross.builder(); 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /core/src/main/java/io/substrait/relation/physical/AbstractExchangeRel.java: -------------------------------------------------------------------------------- 1 | package io.substrait.relation.physical; 2 | 3 | import io.substrait.relation.HasExtension; 4 | import io.substrait.relation.SingleInputRel; 5 | import io.substrait.type.Type; 6 | import java.util.List; 7 | import org.immutables.value.Value; 8 | 9 | public abstract class AbstractExchangeRel extends SingleInputRel implements HasExtension { 10 | public abstract Integer getPartitionCount(); 11 | 12 | public abstract List getTargets(); 13 | 14 | @Override 15 | protected Type.Struct deriveRecordType() { 16 | return getInput().getRecordType(); 17 | } 18 | 19 | @Value.Immutable 20 | public abstract static class ExchangeTarget { 21 | public abstract List getPartitionIds(); 22 | 23 | public abstract TargetType getType(); 24 | 25 | public static ImmutableExchangeTarget.Builder builder() { 26 | return ImmutableExchangeTarget.builder(); 27 | } 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/65.sql: -------------------------------------------------------------------------------- 1 | select 2 | s_store_name, 3 | i_item_desc, 4 | sc.revenue, 5 | i_current_price, 6 | i_wholesale_cost, 7 | i_brand 8 | from store, item, 9 | (select ss_store_sk, avg(revenue) as ave 10 | from 11 | (select ss_store_sk, ss_item_sk, 12 | sum(ss_sales_price) as revenue 13 | from store_sales, date_dim 14 | where ss_sold_date_sk = d_date_sk and d_month_seq between 1220 and 1220+11 15 | group by ss_store_sk, ss_item_sk) sa 16 | group by ss_store_sk) sb, 17 | (select ss_store_sk, ss_item_sk, sum(ss_sales_price) as revenue 18 | from store_sales, date_dim 19 | where ss_sold_date_sk = d_date_sk and d_month_seq between 1220 and 1220+11 20 | group by ss_store_sk, ss_item_sk) sc 21 | where sb.ss_store_sk = sc.ss_store_sk and 22 | sc.revenue <= 0.1 * sb.ave and 23 | s_store_sk = sc.ss_store_sk and 24 | i_item_sk = sc.ss_item_sk 25 | order by s_store_name, i_item_desc 26 | LIMIT 100 27 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/94.sql: -------------------------------------------------------------------------------- 1 | select 2 | count(distinct ws_order_number) as "order count" 3 | ,sum(ws_ext_ship_cost) as "total shipping cost" 4 | ,sum(ws_net_profit) as "total net profit" 5 | from 6 | web_sales ws1 7 | ,date_dim 8 | ,customer_address 9 | ,web_site 10 | where 11 | d_date between '2001-5-01' and 12 | (cast('2001-5-01' as date) + interval '60' day) 13 | and ws1.ws_ship_date_sk = d_date_sk 14 | and ws1.ws_ship_addr_sk = ca_address_sk 15 | and ca_state = 'dist(fips_county,3,1)' 16 | and ws1.ws_web_site_sk = web_site_sk 17 | and web_company_name = 'pri' 18 | and exists (select * 19 | from web_sales ws2 20 | where ws1.ws_order_number = ws2.ws_order_number 21 | and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) 22 | and not exists(select * 23 | from web_returns wr1 24 | where ws1.ws_order_number = wr1.wr_order_number) 25 | order by count(distinct ws_order_number) 26 | LIMIT 100 27 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/38.sql: -------------------------------------------------------------------------------- 1 | select count(*) from ( 2 | select distinct c_last_name, c_first_name, d_date 3 | from store_sales, date_dim, customer 4 | where store_sales.ss_sold_date_sk = date_dim.d_date_sk 5 | and store_sales.ss_customer_sk = customer.c_customer_sk 6 | and d_month_seq between 1220 and 1220 + 11 7 | intersect 8 | select distinct c_last_name, c_first_name, d_date 9 | from catalog_sales, date_dim, customer 10 | where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk 11 | and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk 12 | and d_month_seq between 1220 and 1220 + 11 13 | intersect 14 | select distinct c_last_name, c_first_name, d_date 15 | from web_sales, date_dim, customer 16 | where web_sales.ws_sold_date_sk = date_dim.d_date_sk 17 | and web_sales.ws_bill_customer_sk = customer.c_customer_sk 18 | and d_month_seq between 1220 and 1220 + 11 19 | ) hot_cust 20 | LIMIT 100 21 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/87.sql: -------------------------------------------------------------------------------- 1 | select count(*) 2 | from ((select distinct c_last_name, c_first_name, d_date 3 | from store_sales, date_dim, customer 4 | where store_sales.ss_sold_date_sk = date_dim.d_date_sk 5 | and store_sales.ss_customer_sk = customer.c_customer_sk 6 | and d_month_seq between 1220 and 1220+11) 7 | except 8 | (select distinct c_last_name, c_first_name, d_date 9 | from catalog_sales, date_dim, customer 10 | where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk 11 | and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk 12 | and d_month_seq between 1220 and 1220+11) 13 | except 14 | (select distinct c_last_name, c_first_name, d_date 15 | from web_sales, date_dim, customer 16 | where web_sales.ws_sold_date_sk = date_dim.d_date_sk 17 | and web_sales.ws_bill_customer_sk = customer.c_customer_sk 18 | and d_month_seq between 1220 and 1220+11) 19 | ) cool_cust 20 | 21 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/79.sql: -------------------------------------------------------------------------------- 1 | select 2 | c_last_name,c_first_name,substring(s_city,1,30),ss_ticket_number,amt,profit 3 | from 4 | (select ss_ticket_number 5 | ,ss_customer_sk 6 | ,store.s_city 7 | ,sum(ss_coupon_amt) amt 8 | ,sum(ss_net_profit) profit 9 | from store_sales,date_dim,store,household_demographics 10 | where store_sales.ss_sold_date_sk = date_dim.d_date_sk 11 | and store_sales.ss_store_sk = store.s_store_sk 12 | and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk 13 | and (household_demographics.hd_dep_count = 0 or household_demographics.hd_vehicle_count > 0) 14 | and date_dim.d_dow = 1 15 | and date_dim.d_year in (1999,1999+1,1999+2) 16 | and store.s_number_employees between 200 and 295 17 | group by ss_ticket_number,ss_customer_sk,ss_addr_sk,store.s_city) ms,customer 18 | where ss_customer_sk = c_customer_sk 19 | order by c_last_name,c_first_name,substring(s_city,1,30), profit 20 | LIMIT 100 21 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/40.sql: -------------------------------------------------------------------------------- 1 | select 2 | w_state 3 | ,i_item_id 4 | ,sum(case when (cast(d_date as date) < cast ('2000-03-11' as date)) 5 | then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_before 6 | ,sum(case when (cast(d_date as date) >= cast ('2000-03-11' as date)) 7 | then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_after 8 | from 9 | catalog_sales left outer join catalog_returns on 10 | (cs_order_number = cr_order_number 11 | and cs_item_sk = cr_item_sk) 12 | ,warehouse 13 | ,item 14 | ,date_dim 15 | where 16 | i_current_price between 0.99 and 1.49 17 | and i_item_sk = cs_item_sk 18 | and cs_warehouse_sk = w_warehouse_sk 19 | and cs_sold_date_sk = d_date_sk 20 | and d_date between (cast ('2000-03-11' as date) - interval '30' days) 21 | and (cast ('2000-03-11' as date) + interval '30' days) 22 | group by 23 | w_state,i_item_id 24 | order by w_state,i_item_id 25 | LIMIT 100 26 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpch/schema_error.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE LINEITEM ( 2 | L_ORDERKEY BIGINT NOT NULL, 3 | L_PARTKEY BIGINT NOT NULL, 4 | L_SUPPKEY BIGINT NOT NULL, 5 | L_LINENUMBER INTEGER, 6 | L_QUANTITY DECIMAL, 7 | L_EXTENDEDPRICE DECIMAL, 8 | L_DISCOUNT DECIMAL, 9 | L_TAX DECIMAL, 10 | L_RETURNFLAG CHAR(1), 11 | L_LINESTATUS CHAR(1), 12 | L_SHIPDATE DATE, 13 | L_COMMITDATE DATE, 14 | L_RECEIPTDATE DATE, 15 | L_SHIPINSTRUCT CHAR(25), 16 | L_SHIPMODE CHAR(10), 17 | L_COMMENT VARCHAR(44) 18 | ); 19 | CREATE TABLE LINEITEM_DUPLICATED ( 20 | L_ORDERKEY BIGINT NOT NULL, 21 | L_PARTKEY BIGINT NOT NULL, 22 | L_SUPPKEY BIGINT NOT NULL, 23 | L_LINENUMBER INTEGER, 24 | L_QUANTITY DECIMAL, 25 | L_EXTENDEDPRICE DECIMAL, 26 | L_DISCOUNT DECIMAL, 27 | L_TAX DECIMAL, 28 | L_RETURNFLAG CHAR(1), 29 | L_LINESTATUS CHAR(1), 30 | L_SHIPDATE DATE, 31 | L_COMMITDATE DATE, 32 | L_RECEIPTDATE DATE, 33 | L_SHIPINSTRUCT CHAR(25), 34 | L_SHIPMODE CHAR(10), 35 | L_COMMENT VARCHAR(44) 36 | ); 37 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/97.sql: -------------------------------------------------------------------------------- 1 | with ssci as ( 2 | select ss_customer_sk customer_sk 3 | ,ss_item_sk item_sk 4 | from store_sales,date_dim 5 | where ss_sold_date_sk = d_date_sk 6 | and d_month_seq between 1220 and 1220 + 11 7 | group by ss_customer_sk 8 | ,ss_item_sk), 9 | csci as( 10 | select cs_bill_customer_sk customer_sk 11 | ,cs_item_sk item_sk 12 | from catalog_sales,date_dim 13 | where cs_sold_date_sk = d_date_sk 14 | and d_month_seq between 1220 and 1220 + 11 15 | group by cs_bill_customer_sk 16 | ,cs_item_sk) 17 | select sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) store_only 18 | ,sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) catalog_only 19 | ,sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) store_and_catalog 20 | from ssci full outer join csci on (ssci.customer_sk=csci.customer_sk 21 | and ssci.item_sk = csci.item_sk) 22 | LIMIT 100 23 | -------------------------------------------------------------------------------- /core/src/test/resources/extensions/custom_extensions.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | urn: extension:test:custom_extensions 4 | types: 5 | - name: "customType1" 6 | description: >- 7 | some description 8 | variadic: true 9 | parameters: [] 10 | - name: "customType2" 11 | structure: 12 | field1: i32 13 | field2: fp32 14 | 15 | scalar_functions: 16 | - name: "scalar1" 17 | description: "a custom scalar functions" 18 | impls: 19 | - args: 20 | - name: arg1 21 | value: u!customType1 22 | return: i64 23 | - name: "scalar2" 24 | description: "a custom scalar functions" 25 | impls: 26 | - args: 27 | - name: arg1 28 | value: i64 29 | return: u!customType2 30 | - name: "array_index" 31 | description: "returns the element in the array at index, or NULL if index is out of bounds" 32 | impls: 33 | - args: 34 | - name: array 35 | value: list 36 | - name: index 37 | value: i64 38 | return: any1? 39 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/90.sql: -------------------------------------------------------------------------------- 1 | select cast(amc as decimal(15,4))/cast(pmc as decimal(15,4)) am_pm_ratio 2 | from ( select count(*) amc 3 | from web_sales, household_demographics , time_dim, web_page 4 | where ws_sold_time_sk = time_dim.t_time_sk 5 | and ws_ship_hdemo_sk = household_demographics.hd_demo_sk 6 | and ws_web_page_sk = web_page.wp_web_page_sk 7 | and time_dim.t_hour between 8 and 8+1 8 | and household_demographics.hd_dep_count = 0 9 | and web_page.wp_char_count between 5000 and 5200) at1, 10 | ( select count(*) pmc 11 | from web_sales, household_demographics , time_dim, web_page 12 | where ws_sold_time_sk = time_dim.t_time_sk 13 | and ws_ship_hdemo_sk = household_demographics.hd_demo_sk 14 | and ws_web_page_sk = web_page.wp_web_page_sk 15 | and time_dim.t_hour between 17 and 17+1 16 | and household_demographics.hd_dep_count = 0 17 | and web_page.wp_char_count between 5000 and 5200) pt 18 | order by am_pm_ratio 19 | LIMIT 100 20 | -------------------------------------------------------------------------------- /spark/src/test/scala/io/substrait/spark/DateTimeSuite.scala: -------------------------------------------------------------------------------- 1 | package io.substrait.spark 2 | 3 | import org.apache.spark.SparkFunSuite 4 | import org.apache.spark.sql.test.SharedSparkSession 5 | 6 | class DateTimeSuite extends SparkFunSuite with SharedSparkSession with SubstraitPlanTestBase { 7 | 8 | override def beforeAll(): Unit = { 9 | super.beforeAll() 10 | sparkContext.setLogLevel("WARN") 11 | } 12 | 13 | test("date_add") { 14 | val qry = 15 | "select cast(d AS DATE) + interval 5 days from (values ('2025-03-27'), ('2025-01-02')) as table(d)" 16 | assertSqlSubstraitRelRoundTrip(qry) 17 | } 18 | 19 | test("date_sub") { 20 | val qry = 21 | "select cast(d AS DATE) - interval 5 days from (values ('2025-03-27'), ('2025-01-02')) as table(d)" 22 | assertSqlSubstraitRelRoundTrip(qry) 23 | } 24 | 25 | test("extract_year_month") { 26 | val qry = "select year(cast(d AS DATE)), month(cast(d AS DATE)) " + 27 | "from (values ('2025-03-27'), ('2025-01-02')) as table(d)" 28 | assertSqlSubstraitRelRoundTrip(qry) 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /examples/substrait-spark/docker-compose.yaml: -------------------------------------------------------------------------------- 1 | services: 2 | spark: 3 | image: docker.io/bitnami/spark:3.5 4 | user: ":${MY_GID}" 5 | environment: 6 | - SPARK_MODE=master 7 | - SPARK_RPC_AUTHENTICATION_ENABLED=no 8 | - SPARK_RPC_ENCRYPTION_ENABLED=no 9 | - SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED=no 10 | - SPARK_SSL_ENABLED=no 11 | - SPARK_USER=spark 12 | ports: 13 | - '8080:8080' 14 | volumes: 15 | - ./_apps:/opt/spark-apps 16 | - ./_data:/opt/spark-data 17 | spark-worker: 18 | image: docker.io/bitnami/spark:3.5 19 | user: ":${MY_GID}" 20 | environment: 21 | - SPARK_MODE=worker 22 | - SPARK_MASTER_URL=spark://spark:7077 23 | - SPARK_WORKER_MEMORY=1G 24 | - SPARK_WORKER_CORES=1 25 | - SPARK_RPC_AUTHENTICATION_ENABLED=no 26 | - SPARK_RPC_ENCRYPTION_ENABLED=no 27 | - SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED=no 28 | - SPARK_SSL_ENABLED=no 29 | - SPARK_USER=spark 30 | volumes: 31 | - ./_apps:/opt/spark-apps 32 | - ./_data:/opt/spark-data 33 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/43.sql: -------------------------------------------------------------------------------- 1 | select s_store_name, s_store_id, 2 | sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales, 3 | sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales, 4 | sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales, 5 | sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales, 6 | sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales, 7 | sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales, 8 | sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales 9 | from date_dim, store_sales, store 10 | where d_date_sk = ss_sold_date_sk and 11 | s_store_sk = ss_store_sk and 12 | s_gmt_offset = -5 and 13 | d_year = 2001 14 | group by s_store_name, s_store_id 15 | order by s_store_name, s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales 16 | LIMIT 100 17 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpch/queries/02.sql: -------------------------------------------------------------------------------- 1 | select 2 | s.s_acctbal, 3 | s.s_name, 4 | n.n_name, 5 | p.p_partkey, 6 | p.p_mfgr, 7 | s.s_address, 8 | s.s_phone, 9 | s.s_comment 10 | from 11 | "part" p, 12 | "supplier" s, 13 | "partsupp" ps, 14 | "nation" n, 15 | "region" r 16 | where 17 | p.p_partkey = ps.ps_partkey 18 | and s.s_suppkey = ps.ps_suppkey 19 | and p.p_size = 41 20 | and p.p_type like '%NICKEL' 21 | and s.s_nationkey = n.n_nationkey 22 | and n.n_regionkey = r.r_regionkey 23 | and r.r_name = 'EUROPE' 24 | and ps.ps_supplycost = ( 25 | 26 | select 27 | min(ps.ps_supplycost) 28 | 29 | from 30 | "partsupp" ps, 31 | "supplier" s, 32 | "nation" n, 33 | "region" r 34 | where 35 | p.p_partkey = ps.ps_partkey 36 | and s.s_suppkey = ps.ps_suppkey 37 | and s.s_nationkey = n.n_nationkey 38 | and n.n_regionkey = r.r_regionkey 39 | and r.r_name = 'EUROPE' 40 | ) 41 | 42 | order by 43 | s.s_acctbal desc, 44 | n.n_name, 45 | s.s_name, 46 | p.p_partkey 47 | limit 100 48 | -------------------------------------------------------------------------------- /examples/substrait-spark/src/main/java/io/substrait/examples/App.java: -------------------------------------------------------------------------------- 1 | package io.substrait.examples; 2 | 3 | /** Main class */ 4 | public final class App { 5 | 6 | /** Implemented by all examples */ 7 | public interface Action { 8 | 9 | /** 10 | * Run 11 | * 12 | * @param arg argument 13 | */ 14 | void run(String arg); 15 | } 16 | 17 | private App() {} 18 | 19 | /** 20 | * Traditional main method 21 | * 22 | * @param args string[] 23 | */ 24 | public static void main(String args[]) { 25 | try { 26 | 27 | if (args.length == 0) { 28 | args = new String[] {"SparkDataset"}; 29 | } 30 | String exampleClass = args[0]; 31 | 32 | Class clz = Class.forName(App.class.getPackageName() + "." + exampleClass); 33 | Action action = (Action) clz.getDeclaredConstructor().newInstance(); 34 | 35 | if (args.length == 2) { 36 | action.run(args[1]); 37 | } else { 38 | action.run(null); 39 | } 40 | 41 | } catch (Exception e) { 42 | e.printStackTrace(); 43 | } 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpch/queries/08.sql: -------------------------------------------------------------------------------- 1 | select 2 | o_year, 3 | sum(case 4 | when nation = 'EGYPT' then volume 5 | else 0 6 | end) / sum(volume) as mkt_share 7 | from 8 | ( 9 | select 10 | extract(year from o.o_orderdate) as o_year, 11 | l.l_extendedprice * (1 - l.l_discount) as volume, 12 | n2.n_name as nation 13 | from 14 | "part" p, 15 | "supplier" s, 16 | "lineitem" l, 17 | "orders" o, 18 | "customer" c, 19 | "nation" n1, 20 | "nation" n2, 21 | "region" r 22 | where 23 | p.p_partkey = l.l_partkey 24 | and s.s_suppkey = l.l_suppkey 25 | and l.l_orderkey = o.o_orderkey 26 | and o.o_custkey = c.c_custkey 27 | and c.c_nationkey = n1.n_nationkey 28 | and n1.n_regionkey = r.r_regionkey 29 | and r.r_name = 'MIDDLE EAST' 30 | and s.s_nationkey = n2.n_nationkey 31 | and o.o_orderdate between date '1995-01-01' and date '1996-12-31' 32 | and p.p_type = 'PROMO BRUSHED COPPER' 33 | ) as all_nations 34 | group by 35 | o_year 36 | order by 37 | o_year 38 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpch/queries/07.sql: -------------------------------------------------------------------------------- 1 | select 2 | supp_nation, 3 | cust_nation, 4 | l_year, 5 | sum(volume) as revenue 6 | from 7 | ( 8 | select 9 | n1.n_name as supp_nation, 10 | n2.n_name as cust_nation, 11 | extract(year from l.l_shipdate) as l_year, 12 | l.l_extendedprice * (1 - l.l_discount) as volume 13 | from 14 | "supplier" s, 15 | "lineitem" l, 16 | "orders" o, 17 | "customer" c, 18 | "nation" n1, 19 | "nation" n2 20 | where 21 | s.s_suppkey = l.l_suppkey 22 | and o.o_orderkey = l.l_orderkey 23 | and c.c_custkey = o.o_custkey 24 | and s.s_nationkey = n1.n_nationkey 25 | and c.c_nationkey = n2.n_nationkey 26 | and ( 27 | (n1.n_name = 'EGYPT' and n2.n_name = 'UNITED STATES') 28 | or (n1.n_name = 'UNITED STATES' and n2.n_name = 'EGYPT') 29 | ) 30 | and l.l_shipdate between date '1995-01-01' and date '1996-12-31' 31 | ) as shipping 32 | group by 33 | supp_nation, 34 | cust_nation, 35 | l_year 36 | order by 37 | supp_nation, 38 | cust_nation, 39 | l_year 40 | -------------------------------------------------------------------------------- /core/src/test/java/io/substrait/type/proto/LiteralRoundtripTest.java: -------------------------------------------------------------------------------- 1 | package io.substrait.type.proto; 2 | 3 | import static io.substrait.expression.proto.ProtoExpressionConverter.EMPTY_TYPE; 4 | import static org.junit.jupiter.api.Assertions.assertEquals; 5 | 6 | import io.substrait.TestBase; 7 | import io.substrait.expression.ExpressionCreator; 8 | import io.substrait.expression.proto.ExpressionProtoConverter; 9 | import io.substrait.expression.proto.ProtoExpressionConverter; 10 | import io.substrait.util.EmptyVisitationContext; 11 | import java.math.BigDecimal; 12 | import org.junit.jupiter.api.Test; 13 | 14 | class LiteralRoundtripTest extends TestBase { 15 | 16 | @Test 17 | void decimal() { 18 | io.substrait.expression.Expression.DecimalLiteral val = 19 | ExpressionCreator.decimal(false, BigDecimal.TEN, 10, 2); 20 | ExpressionProtoConverter to = new ExpressionProtoConverter(null, null); 21 | ProtoExpressionConverter from = 22 | new ProtoExpressionConverter(null, null, EMPTY_TYPE, protoRelConverter); 23 | assertEquals(val, from.from(val.accept(to, EmptyVisitationContext.INSTANCE))); 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /isthmus/src/main/java/io/substrait/isthmus/expression/SubstraitFunctionMapping.java: -------------------------------------------------------------------------------- 1 | package io.substrait.isthmus.expression; 2 | 3 | import io.substrait.extension.SimpleExtension.ScalarFunctionVariant; 4 | import java.util.List; 5 | import org.apache.calcite.rex.RexNode; 6 | 7 | /** 8 | * Associates operands with a matching Substrait function and possible matching function 9 | * implementations. 10 | */ 11 | class SubstraitFunctionMapping { 12 | 13 | private final String substraitName; 14 | private final List operands; 15 | private final List functions; 16 | 17 | public SubstraitFunctionMapping( 18 | final String substraitName, 19 | final List operands, 20 | final List functions) { 21 | this.substraitName = substraitName; 22 | this.operands = operands; 23 | this.functions = functions; 24 | } 25 | 26 | String substraitName() { 27 | return substraitName; 28 | } 29 | 30 | List operands() { 31 | return operands; 32 | } 33 | 34 | List functions() { 35 | return functions; 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /isthmus/src/test/java/io/substrait/isthmus/DdlRoundtripTest.java: -------------------------------------------------------------------------------- 1 | package io.substrait.isthmus; 2 | 3 | import io.substrait.isthmus.sql.SubstraitCreateStatementParser; 4 | import org.apache.calcite.prepare.Prepare; 5 | import org.apache.calcite.sql.parser.SqlParseException; 6 | import org.junit.jupiter.api.Test; 7 | 8 | class DdlRoundtripTest extends PlanTestBase { 9 | final Prepare.CatalogReader catalogReader = 10 | SubstraitCreateStatementParser.processCreateStatementsToCatalog( 11 | "create table src1 (intcol int, charcol varchar(10))", 12 | "create table src2 (intcol int, charcol varchar(10))"); 13 | 14 | public DdlRoundtripTest() throws SqlParseException { 15 | super(); 16 | } 17 | 18 | @Test 19 | void testCreateTable() throws Exception { 20 | String sql = "create table dst1 as select * from src1"; 21 | assertFullRoundTripWithIdentityProjectionWorkaround(sql, catalogReader); 22 | } 23 | 24 | @Test 25 | void testCreateView() throws Exception { 26 | String sql = "create view dst1 as select * from src1"; 27 | assertFullRoundTripWithIdentityProjectionWorkaround(sql, catalogReader); 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /examples/substrait-spark/src/main/java/io/substrait/examples/SparkHelper.java: -------------------------------------------------------------------------------- 1 | package io.substrait.examples; 2 | 3 | import org.apache.spark.sql.SparkSession; 4 | 5 | /** Collection of helper fns */ 6 | public final class SparkHelper { 7 | 8 | /** Vehicles table */ 9 | public static final String VEHICLE_TABLE = "vehicles"; 10 | 11 | /** Tests table (the vehicle safety tests) */ 12 | public static final String TESTS_TABLE = "tests"; 13 | 14 | /** Source data - csv */ 15 | public static final String VEHICLES_CSV = "vehicles_subset_2023.csv"; 16 | 17 | /** Source data - csv */ 18 | public static final String TESTS_CSV = "tests_subset_2023.csv"; 19 | 20 | /** In-container data location */ 21 | public static final String ROOT_DIR = "/opt/spark-data"; 22 | 23 | private SparkHelper() {} 24 | 25 | /** 26 | * Connects to the local spark cluister 27 | * 28 | * @return SparkSession 29 | */ 30 | public static SparkSession connectLocalSpark() { 31 | 32 | SparkSession spark = SparkSession.builder().enableHiveSupport().getOrCreate(); 33 | 34 | spark.sparkContext().setLogLevel("ERROR"); 35 | 36 | return spark; 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /spark/src/main/scala/org/apache/spark/substrait/SparkTypeUtil.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.spark.substrait 18 | 19 | import org.apache.spark.sql.types.DataType 20 | 21 | object SparkTypeUtil { 22 | 23 | def sameType(left: DataType, right: DataType): Boolean = { 24 | left.sameType(right) 25 | } 26 | 27 | } 28 | -------------------------------------------------------------------------------- /isthmus/src/main/java/io/substrait/isthmus/calcite/SubstraitSchema.java: -------------------------------------------------------------------------------- 1 | package io.substrait.isthmus.calcite; 2 | 3 | import java.util.HashMap; 4 | import java.util.Map; 5 | import org.apache.calcite.schema.Schema; 6 | import org.apache.calcite.schema.Table; 7 | import org.apache.calcite.schema.impl.AbstractSchema; 8 | 9 | /** Basic {@link AbstractSchema} implementation */ 10 | public class SubstraitSchema extends AbstractSchema { 11 | 12 | /** Map of table names to their associated tables */ 13 | protected final Map tableMap; 14 | 15 | /** Map of schema names to their associated schemas */ 16 | protected final Map schemaMap; 17 | 18 | public SubstraitSchema() { 19 | this.tableMap = new HashMap<>(); 20 | this.schemaMap = new HashMap<>(); 21 | } 22 | 23 | public SubstraitSchema(Map tableMap) { 24 | this.tableMap = tableMap; 25 | this.schemaMap = new HashMap<>(); 26 | } 27 | 28 | @Override 29 | public Map getTableMap() { 30 | return tableMap; 31 | } 32 | 33 | @Override 34 | protected Map getSubSchemaMap() { 35 | return schemaMap; 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /spark/src/main/resources/spark.yml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | %YAML 1.2 16 | --- 17 | urn: extension:substrait:spark 18 | scalar_functions: 19 | - name: add 20 | description: >- 21 | Adds days to a date 22 | impls: 23 | - args: 24 | - name: start_date 25 | value: date 26 | - name: days 27 | value: i32 28 | return: date 29 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/21.sql: -------------------------------------------------------------------------------- 1 | select * 2 | from(select w_warehouse_name 3 | ,i_item_id 4 | ,sum(case when (cast(d_date as date) < cast ('2000-03-11' as date)) 5 | then inv_quantity_on_hand 6 | else 0 end) as inv_before 7 | ,sum(case when (cast(d_date as date) >= cast ('2000-03-11' as date)) 8 | then inv_quantity_on_hand 9 | else 0 end) as inv_after 10 | from inventory 11 | ,warehouse 12 | ,item 13 | ,date_dim 14 | where i_current_price between 0.99 and 1.49 15 | and i_item_sk = inv_item_sk 16 | and inv_warehouse_sk = w_warehouse_sk 17 | and inv_date_sk = d_date_sk 18 | and d_date between (cast ('2000-03-11' as date) - interval '30' day) 19 | and (cast ('2000-03-11' as date) + interval '30' day) 20 | group by w_warehouse_name, i_item_id) x 21 | where (case when inv_before > 0 22 | then inv_after / inv_before 23 | else null 24 | end) between 2.0/3.0 and 3.0/2.0 25 | order by w_warehouse_name 26 | ,i_item_id 27 | LIMIT 100 28 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/53.sql: -------------------------------------------------------------------------------- 1 | select * from 2 | (select i_manufact_id, 3 | sum(ss_sales_price) sum_sales, 4 | avg(sum(ss_sales_price)) over (partition by i_manufact_id) avg_quarterly_sales 5 | from item, store_sales, date_dim, store 6 | where ss_item_sk = i_item_sk and 7 | ss_sold_date_sk = d_date_sk and 8 | ss_store_sk = s_store_sk and 9 | d_month_seq in (1220,1220+1,1220+2,1220+3,1220+4,1220+5,1220+6,1220+7,1220+8,1220+9,1220+10,1220+11) and 10 | ((i_category in ('Books','Children','Electronics') and 11 | i_class in ('personal','portable','reference','self-help') and 12 | i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7', 13 | 'exportiunivamalg #9','scholaramalgamalg #9')) 14 | or(i_category in ('Women','Music','Men') and 15 | i_class in ('accessories','classical','fragrances','pants') and 16 | i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1', 17 | 'importoamalg #1'))) 18 | group by i_manufact_id, d_qoy ) tmp1 19 | where case when avg_quarterly_sales > 0 20 | then abs (sum_sales - avg_quarterly_sales)/ avg_quarterly_sales 21 | else null end > 0.1 22 | order by avg_quarterly_sales, 23 | sum_sales, 24 | i_manufact_id 25 | LIMIT 100 26 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/91.sql: -------------------------------------------------------------------------------- 1 | select 2 | cc_call_center_id Call_Center, 3 | cc_name Call_Center_Name, 4 | cc_manager Manager, 5 | sum(cr_net_loss) Returns_Loss 6 | from 7 | call_center, 8 | catalog_returns, 9 | date_dim, 10 | customer, 11 | customer_address, 12 | customer_demographics, 13 | household_demographics 14 | where 15 | cr_call_center_sk = cc_call_center_sk 16 | and cr_returned_date_sk = d_date_sk 17 | and cr_returning_customer_sk= c_customer_sk 18 | and cd_demo_sk = c_current_cdemo_sk 19 | and hd_demo_sk = c_current_hdemo_sk 20 | and ca_address_sk = c_current_addr_sk 21 | and d_year = 1998 22 | and d_moy = 11 23 | and ( (cd_marital_status = 'M' and cd_education_status = 'Unknown') 24 | or(cd_marital_status = 'W' and cd_education_status = 'Advanced Degree')) 25 | and hd_buy_potential like 'Unknown%' 26 | and ca_gmt_offset = -7 27 | group by cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status 28 | order by sum(cr_net_loss) desc; -------------------------------------------------------------------------------- /core/src/main/java/io/substrait/relation/Project.java: -------------------------------------------------------------------------------- 1 | package io.substrait.relation; 2 | 3 | import io.substrait.expression.Expression; 4 | import io.substrait.type.Type; 5 | import io.substrait.type.TypeCreator; 6 | import io.substrait.util.VisitationContext; 7 | import java.util.List; 8 | import java.util.stream.Stream; 9 | import org.immutables.value.Value; 10 | 11 | @Value.Immutable 12 | public abstract class Project extends SingleInputRel implements HasExtension { 13 | 14 | public abstract List getExpressions(); 15 | 16 | @Override 17 | public Type.Struct deriveRecordType() { 18 | Type.Struct initial = getInput().getRecordType(); 19 | return TypeCreator.of(initial.nullable()) 20 | .struct( 21 | Stream.concat( 22 | initial.fields().stream(), getExpressions().stream().map(Expression::getType))); 23 | } 24 | 25 | @Override 26 | public O accept( 27 | RelVisitor visitor, C context) throws E { 28 | return visitor.visit(this, context); 29 | } 30 | 31 | public static ImmutableProject.Builder builder() { 32 | return ImmutableProject.builder(); 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /isthmus/src/test/java/io/substrait/isthmus/LogarithmicFunctionTest.java: -------------------------------------------------------------------------------- 1 | package io.substrait.isthmus; 2 | 3 | import org.junit.jupiter.params.ParameterizedTest; 4 | import org.junit.jupiter.params.provider.ValueSource; 5 | 6 | class LogarithmicFunctionTest extends PlanTestBase { 7 | 8 | static String CREATES = 9 | "CREATE TABLE numbers (i8 TINYINT, i16 SMALLINT, i32 INT, i64 BIGINT, fp32 REAL, fp64 DOUBLE)"; 10 | 11 | @ParameterizedTest 12 | @ValueSource(strings = {"fp32", "fp64"}) 13 | void ln(String column) throws Exception { 14 | String query = String.format("SELECT ln(%s) FROM numbers", column); 15 | assertFullRoundTrip(query, CREATES); 16 | } 17 | 18 | @ParameterizedTest 19 | @ValueSource(strings = {"fp32", "fp64"}) 20 | void log10(String column) throws Exception { 21 | String query = String.format("SELECT log10(%s) FROM numbers", column); 22 | assertFullRoundTrip(query, CREATES); 23 | } 24 | 25 | @ParameterizedTest 26 | @ValueSource(strings = {"i64", "fp32", "fp64"}) 27 | void log2(String column) throws Exception { 28 | String query = String.format("SELECT log2(%s) FROM numbers", column); 29 | assertFullRoundTrip(query, CREATES); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/95.sql: -------------------------------------------------------------------------------- 1 | with ws_wh as 2 | (select ws1.ws_order_number,ws1.ws_warehouse_sk wh1,ws2.ws_warehouse_sk wh2 3 | from web_sales ws1,web_sales ws2 4 | where ws1.ws_order_number = ws2.ws_order_number 5 | and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) 6 | select 7 | count(distinct ws_order_number) as "order count" 8 | ,sum(ws_ext_ship_cost) as "total shipping cost" 9 | ,sum(ws_net_profit) as "total net profit" 10 | from 11 | web_sales ws1 12 | ,date_dim 13 | ,customer_address 14 | ,web_site 15 | where 16 | d_date between '2001-5-01' and 17 | (cast('2001-5-01' as date) + interval '60' day) 18 | and ws1.ws_ship_date_sk = d_date_sk 19 | and ws1.ws_ship_addr_sk = ca_address_sk 20 | and ca_state = 'dist(fips_county,3,1)' 21 | and ws1.ws_web_site_sk = web_site_sk 22 | and web_company_name = 'pri' 23 | and ws1.ws_order_number in (select ws_order_number 24 | from ws_wh) 25 | and ws1.ws_order_number in (select wr_order_number 26 | from web_returns,ws_wh 27 | where wr_order_number = ws_wh.ws_order_number) 28 | order by count(distinct ws_order_number) 29 | LIMIT 100 30 | -------------------------------------------------------------------------------- /isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateView.java: -------------------------------------------------------------------------------- 1 | package io.substrait.isthmus.calcite.rel; 2 | 3 | import java.util.List; 4 | import org.apache.calcite.rel.AbstractRelNode; 5 | import org.apache.calcite.rel.RelNode; 6 | import org.apache.calcite.rel.RelWriter; 7 | import org.apache.calcite.rel.type.RelDataType; 8 | 9 | public class CreateView extends AbstractRelNode { 10 | private final List viewName; 11 | private final RelNode input; 12 | 13 | public CreateView(List viewName, RelNode input) { 14 | super(input.getCluster(), input.getTraitSet()); 15 | this.viewName = viewName; 16 | this.input = input; 17 | } 18 | 19 | @Override 20 | protected RelDataType deriveRowType() { 21 | return input.getRowType(); 22 | } 23 | 24 | @Override 25 | public RelWriter explainTerms(RelWriter pw) { 26 | return super.explainTerms(pw).input("input", getInput()).item("viewName", getViewName()); 27 | } 28 | 29 | @Override 30 | public List getInputs() { 31 | return List.of(input); 32 | } 33 | 34 | public List getViewName() { 35 | return viewName; 36 | } 37 | 38 | public RelNode getInput() { 39 | return input; 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/86a.sql: -------------------------------------------------------------------------------- 1 | with results as 2 | ( select sum(ws_net_paid) as total_sum, i_category, i_class, 0 as g_category, 0 as g_class 3 | from 4 | web_sales 5 | ,date_dim d1 6 | ,item 7 | where 8 | d1.d_month_seq between 1220 and 1220+11 9 | and d1.d_date_sk = ws_sold_date_sk 10 | and i_item_sk = ws_item_sk 11 | group by i_category,i_class 12 | ) , 13 | 14 | results_rollup as 15 | ( select total_sum ,i_category ,i_class, g_category, g_class, 0 as lochierarchy from results 16 | union 17 | select sum(total_sum) as total_sum, i_category, NULL as i_class, 0 as g_category, 1 as g_class, 1 as lochierarchy from results group by i_category 18 | union 19 | select sum(total_sum) as total_sum, NULL as i_category, NULL as i_class, 1 as g_category, 1 as g_class, 2 as lochierarchy from results) 20 | select total_sum ,i_category ,i_class, lochierarchy 21 | ,rank() over ( 22 | partition by lochierarchy, 23 | case when g_class = 0 then i_category end 24 | order by total_sum desc) as rank_within_parent 25 | from 26 | results_rollup 27 | order by 28 | lochierarchy desc, 29 | case when lochierarchy = 0 then i_category end, 30 | rank_within_parent 31 | LIMIT 100 32 | -------------------------------------------------------------------------------- /spark/src/main/scala/io/substrait/spark/HasOutputStack.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package io.substrait.spark 18 | 19 | import scala.collection.mutable 20 | 21 | trait HasOutputStack[T] { 22 | private val outputStack = mutable.Stack[T]() 23 | def currentOutput: T = outputStack.top 24 | def pushOutput(e: T): Unit = outputStack.push(e) 25 | def popOutput(): T = outputStack.pop() 26 | } 27 | -------------------------------------------------------------------------------- /isthmus/src/test/java/io/substrait/isthmus/RoundingFunctionTest.java: -------------------------------------------------------------------------------- 1 | package io.substrait.isthmus; 2 | 3 | import org.junit.jupiter.params.ParameterizedTest; 4 | import org.junit.jupiter.params.provider.ValueSource; 5 | 6 | class RoundingFunctionTest extends PlanTestBase { 7 | 8 | static String CREATES = 9 | "CREATE TABLE numbers (i8 TINYINT, i16 SMALLINT, i32 INT, i64 BIGINT, fp32 REAL, fp64 DOUBLE)"; 10 | 11 | @ParameterizedTest 12 | @ValueSource(strings = {"fp32", "fp64"}) 13 | void ceil(String column) throws Exception { 14 | String query = String.format("SELECT ceil(%s) FROM numbers", column); 15 | assertFullRoundTrip(query, CREATES); 16 | } 17 | 18 | @ParameterizedTest 19 | @ValueSource(strings = {"fp32", "fp64"}) 20 | void floor(String column) throws Exception { 21 | String query = String.format("SELECT floor(%s) FROM numbers", column); 22 | assertFullRoundTrip(query, CREATES); 23 | } 24 | 25 | @ParameterizedTest 26 | @ValueSource(strings = {"i8", "i16", "i32", "i64", "fp32", "fp64"}) 27 | void round(String column) throws Exception { 28 | String query = String.format("SELECT round(%s, 2) FROM numbers", column); 29 | assertFullRoundTrip(query, CREATES); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/25.sql: -------------------------------------------------------------------------------- 1 | select 2 | i_item_id 3 | ,i_item_desc 4 | ,s_store_id 5 | ,s_store_name 6 | ,max(ss_net_profit) as store_sales_profit 7 | ,max(sr_net_loss) as store_returns_loss 8 | ,max(cs_net_profit) as catalog_sales_profit 9 | from 10 | store_sales 11 | ,store_returns 12 | ,catalog_sales 13 | ,date_dim d1 14 | ,date_dim d2 15 | ,date_dim d3 16 | ,store 17 | ,item 18 | where 19 | d1.d_moy = 4 20 | and d1.d_year = 1998 21 | and d1.d_date_sk = ss_sold_date_sk 22 | and i_item_sk = ss_item_sk 23 | and s_store_sk = ss_store_sk 24 | and ss_customer_sk = sr_customer_sk 25 | and ss_item_sk = sr_item_sk 26 | and ss_ticket_number = sr_ticket_number 27 | and sr_returned_date_sk = d2.d_date_sk 28 | and d2.d_moy between 4 and 10 29 | and d2.d_year = 1998 30 | and sr_customer_sk = cs_bill_customer_sk 31 | and sr_item_sk = cs_item_sk 32 | and cs_sold_date_sk = d3.d_date_sk 33 | and d3.d_moy between 4 and 10 34 | and d3.d_year = 1998 35 | group by 36 | i_item_id 37 | ,i_item_desc 38 | ,s_store_id 39 | ,s_store_name 40 | order by 41 | i_item_id 42 | ,i_item_desc 43 | ,s_store_id 44 | ,s_store_name 45 | LIMIT 100 46 | -------------------------------------------------------------------------------- /isthmus/src/main/java/io/substrait/isthmus/calcite/rel/CreateTable.java: -------------------------------------------------------------------------------- 1 | package io.substrait.isthmus.calcite.rel; 2 | 3 | import java.util.List; 4 | import org.apache.calcite.rel.AbstractRelNode; 5 | import org.apache.calcite.rel.RelNode; 6 | import org.apache.calcite.rel.RelWriter; 7 | import org.apache.calcite.rel.type.RelDataType; 8 | 9 | public class CreateTable extends AbstractRelNode { 10 | 11 | private final List tableName; 12 | private final RelNode input; 13 | 14 | public CreateTable(List tableName, RelNode input) { 15 | super(input.getCluster(), input.getTraitSet()); 16 | 17 | this.tableName = tableName; 18 | this.input = input; 19 | } 20 | 21 | @Override 22 | protected RelDataType deriveRowType() { 23 | return input.getRowType(); 24 | } 25 | 26 | @Override 27 | public RelWriter explainTerms(RelWriter pw) { 28 | return super.explainTerms(pw).input("input", getInput()).item("tableName", getTableName()); 29 | } 30 | 31 | @Override 32 | public List getInputs() { 33 | return List.of(input); 34 | } 35 | 36 | public List getTableName() { 37 | return tableName; 38 | } 39 | 40 | public RelNode getInput() { 41 | return input; 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/76.sql: -------------------------------------------------------------------------------- 1 | select channel, col_name, d_year, d_qoy, i_category, COUNT(*) sales_cnt, SUM(ext_sales_price) sales_amt FROM ( 2 | SELECT 'store' as channel, 'ss_addr_sk' col_name, d_year, d_qoy, i_category, ss_ext_sales_price ext_sales_price 3 | FROM store_sales, item, date_dim 4 | WHERE ss_addr_sk IS NULL 5 | AND ss_sold_date_sk=d_date_sk 6 | AND ss_item_sk=i_item_sk 7 | UNION ALL 8 | SELECT 'web' as channel, 'ws_bill_customer_sk' col_name, d_year, d_qoy, i_category, ws_ext_sales_price ext_sales_price 9 | FROM web_sales, item, date_dim 10 | WHERE ws_bill_customer_sk IS NULL 11 | AND ws_sold_date_sk=d_date_sk 12 | AND ws_item_sk=i_item_sk 13 | UNION ALL 14 | SELECT 'catalog' as channel, 'cs_bill_customer_sk' col_name, d_year, d_qoy, i_category, cs_ext_sales_price ext_sales_price 15 | FROM catalog_sales, item, date_dim 16 | WHERE cs_bill_customer_sk IS NULL 17 | AND cs_sold_date_sk=d_date_sk 18 | AND cs_item_sk=i_item_sk) foo 19 | GROUP BY channel, col_name, d_year, d_qoy, i_category 20 | ORDER BY channel, col_name, d_year, d_qoy, i_category 21 | LIMIT 100 22 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpch/queries/19.sql: -------------------------------------------------------------------------------- 1 | select 2 | sum(l.l_extendedprice* (1 - l.l_discount)) as revenue 3 | from 4 | "lineitem" l, 5 | "part" p 6 | where 7 | ( 8 | p.p_partkey = l.l_partkey 9 | and p.p_brand = 'Brand#41' 10 | and p.p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') 11 | and l.l_quantity >= 2 and l.l_quantity <= 2 + 10 12 | and p.p_size between 1 and 5 13 | and l.l_shipmode in ('AIR', 'AIR REG') 14 | and l.l_shipinstruct = 'DELIVER IN PERSON' 15 | ) 16 | or 17 | ( 18 | p.p_partkey = l.l_partkey 19 | and p.p_brand = 'Brand#13' 20 | and p.p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') 21 | and l.l_quantity >= 14 and l.l_quantity <= 14 + 10 22 | and p.p_size between 1 and 10 23 | and l.l_shipmode in ('AIR', 'AIR REG') 24 | and l.l_shipinstruct = 'DELIVER IN PERSON' 25 | ) 26 | or 27 | ( 28 | p.p_partkey = l.l_partkey 29 | and p.p_brand = 'Brand#55' 30 | and p.p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') 31 | and l.l_quantity >= 23 and l.l_quantity <= 23 + 10 32 | and p.p_size between 1 and 15 33 | and l.l_shipmode in ('AIR', 'AIR REG') 34 | and l.l_shipinstruct = 'DELIVER IN PERSON' 35 | ) 36 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/16.sql: -------------------------------------------------------------------------------- 1 | select 2 | count(distinct cs_order_number) as "order count" 3 | ,sum(cs_ext_ship_cost) as "total shipping cost" 4 | ,sum(cs_net_profit) as "total net profit" 5 | from 6 | catalog_sales cs1 7 | ,date_dim 8 | ,customer_address 9 | ,call_center 10 | where 11 | d_date between '2001-5-01' and 12 | (cast('2001-5-01' as date) + interval '60' days) 13 | and cs1.cs_ship_date_sk = d_date_sk 14 | and cs1.cs_ship_addr_sk = ca_address_sk 15 | and ca_state = 'dist(fips_county,3,1)' 16 | and cs1.cs_call_center_sk = cc_call_center_sk 17 | and cc_county in ('distmember(fips_county, [COUNTYNUMBER.1], 2)','distmember(fips_county, [COUNTYNUMBER.2], 2)','distmember(fips_county, [COUNTYNUMBER.3], 2)','distmember(fips_county, [COUNTYNUMBER.4], 2)', 18 | 'distmember(fips_county, [COUNTYNUMBER.5], 2)' 19 | ) 20 | and exists (select * 21 | from catalog_sales cs2 22 | where cs1.cs_order_number = cs2.cs_order_number 23 | and cs1.cs_warehouse_sk <> cs2.cs_warehouse_sk) 24 | and not exists(select * 25 | from catalog_returns cr1 26 | where cs1.cs_order_number = cr1.cr_order_number) 27 | order by count(distinct cs_order_number) 28 | LIMIT 100 29 | -------------------------------------------------------------------------------- /spark/src/test/scala/io/substrait/spark/expression/PredicateSuite.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package io.substrait.spark.expression 18 | 19 | import org.apache.spark.SparkFunSuite 20 | import org.apache.spark.sql.catalyst.expressions.{And, Literal} 21 | 22 | class PredicateSuite extends SparkFunSuite with SubstraitExpressionTestBase { 23 | 24 | test("And") { 25 | runTest("and:bool", And(Literal(true), Literal(false))) 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /isthmus/src/main/java/io/substrait/isthmus/expression/ScalarFunctionMapper.java: -------------------------------------------------------------------------------- 1 | package io.substrait.isthmus.expression; 2 | 3 | import io.substrait.expression.Expression; 4 | import io.substrait.expression.FunctionArg; 5 | import java.util.List; 6 | import java.util.Optional; 7 | import org.apache.calcite.rex.RexCall; 8 | 9 | /** 10 | * Provides custom conversion for a Calcite call to corresponding Substrait functions and arguments. 11 | */ 12 | interface ScalarFunctionMapper { 13 | 14 | /** 15 | * If the supplied call is applicable to this mapper, get the custom mapping to the corresponding 16 | * Substrait function. 17 | * 18 | * @param call a Calcite call. 19 | * @return a custom function mapping, or an empty Optional if no mapping exists. 20 | */ 21 | Optional toSubstrait(RexCall call); 22 | 23 | /** 24 | * If the supplied expression is applicable to this mapper, get the function arguments that should 25 | * be used for the Substrait function call. 26 | * 27 | * @param expression an expression. 28 | * @return a list of function arguments, or an empty Optional if no mapping exists. 29 | */ 30 | Optional> getExpressionArguments( 31 | Expression.ScalarFunctionInvocation expression); 32 | } 33 | -------------------------------------------------------------------------------- /isthmus/src/main/java/io/substrait/isthmus/FeatureBoard.java: -------------------------------------------------------------------------------- 1 | package io.substrait.isthmus; 2 | 3 | import org.apache.calcite.avatica.util.Casing; 4 | import org.immutables.value.Value; 5 | 6 | /** 7 | * A feature board is a collection of flags that are enabled or configurations that control the 8 | * handling of a request to convert query [batch] to Substrait plans. 9 | */ 10 | @Value.Immutable 11 | public abstract class FeatureBoard { 12 | 13 | /** 14 | * @return Calcite's identifier casing policy for unquoted identifiers. 15 | */ 16 | @Value.Default 17 | public Casing unquotedCasing() { 18 | return Casing.TO_UPPER; 19 | } 20 | 21 | /** 22 | * Controls whether to support dynamic user-defined functions (UDFs) during SQL to Substrait plan 23 | * conversion. 24 | * 25 | *

When enabled, custom functions defined in extension YAML files are available for use in SQL 26 | * queries. These functions will be dynamically converted to SQL operators during plan conversion. 27 | * This feature must be explicitly enabled by users and is disabled by default. 28 | * 29 | * @return true if dynamic UDFs should be supported; false otherwise (default) 30 | */ 31 | @Value.Default 32 | public boolean allowDynamicUdfs() { 33 | return false; 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /ci/release/dry_run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # shellcheck shell=bash 3 | 4 | set -euo pipefail 5 | 6 | curdir="$PWD" 7 | worktree="$(mktemp -d)" 8 | branch="$(basename "$worktree")" 9 | 10 | git worktree add "$worktree" 11 | 12 | function cleanup() { 13 | cd "$curdir" || exit 1 14 | git worktree remove "$worktree" 15 | git worktree prune 16 | git branch -D "$branch" 17 | } 18 | 19 | trap cleanup EXIT ERR 20 | 21 | cd "$worktree" || exit 1 22 | 23 | export GITHUB_REF="$branch" 24 | 25 | npx --yes \ 26 | -p semantic-release \ 27 | -p "@semantic-release/commit-analyzer" \ 28 | -p "@semantic-release/release-notes-generator" \ 29 | -p "@semantic-release/changelog" \ 30 | -p "@semantic-release/exec" \ 31 | -p "@semantic-release/git" \ 32 | -p "conventional-changelog-conventionalcommits" \ 33 | semantic-release \ 34 | --ci false \ 35 | --dry-run \ 36 | --preset conventionalcommits \ 37 | --plugins \ 38 | --analyze-commits "@semantic-release/commit-analyzer" \ 39 | --generate-notes "@semantic-release/release-notes-generator" \ 40 | --verify-conditions "@semantic-release/changelog,@semantic-release/exec,@semantic-release/git" \ 41 | --prepare "@semantic-release/changelog,@semantic-release/exec" \ 42 | --branches "$branch" \ 43 | --repository-url "file://$PWD" 44 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/24.sql: -------------------------------------------------------------------------------- 1 | with ssales as 2 | (select c_last_name 3 | ,c_first_name 4 | ,s_store_name 5 | ,ca_state 6 | ,s_state 7 | ,i_color 8 | ,i_current_price 9 | ,i_manager_id 10 | ,i_units 11 | ,i_size 12 | ,sum(ss_net_profit) netpaid 13 | from store_sales 14 | ,store_returns 15 | ,store 16 | ,item 17 | ,customer 18 | ,customer_address 19 | where ss_ticket_number = sr_ticket_number 20 | and ss_item_sk = sr_item_sk 21 | and ss_customer_sk = c_customer_sk 22 | and ss_item_sk = i_item_sk 23 | and ss_store_sk = s_store_sk 24 | and c_birth_country = upper(ca_country) 25 | and s_zip = ca_zip 26 | and s_market_id=5 27 | group by c_last_name 28 | ,c_first_name 29 | ,s_store_name 30 | ,ca_state 31 | ,s_state 32 | ,i_color 33 | ,i_current_price 34 | ,i_manager_id 35 | ,i_units 36 | ,i_size) 37 | select c_last_name 38 | ,c_first_name 39 | ,s_store_name 40 | ,sum(netpaid) paid 41 | from ssales 42 | where i_color = '[COLOR.1]' 43 | group by c_last_name 44 | ,c_first_name 45 | ,s_store_name 46 | having sum(netpaid) > (select 0.05*avg(netpaid) 47 | from ssales) 48 | -------------------------------------------------------------------------------- /core/src/main/java/io/substrait/relation/ExtensionMulti.java: -------------------------------------------------------------------------------- 1 | package io.substrait.relation; 2 | 3 | import io.substrait.util.VisitationContext; 4 | import java.util.Arrays; 5 | import java.util.List; 6 | import java.util.stream.Collectors; 7 | import org.immutables.value.Value; 8 | 9 | @Value.Immutable 10 | public abstract class ExtensionMulti extends AbstractRel { 11 | 12 | public abstract Extension.MultiRelDetail getDetail(); 13 | 14 | @Override 15 | public O accept( 16 | RelVisitor visitor, C context) throws E { 17 | return visitor.visit(this, context); 18 | } 19 | 20 | public static ImmutableExtensionMulti.Builder from( 21 | Extension.MultiRelDetail detail, Rel... inputs) { 22 | return from(detail, Arrays.stream(inputs).collect(Collectors.toList())); 23 | } 24 | 25 | public static ImmutableExtensionMulti.Builder from( 26 | Extension.MultiRelDetail detail, List inputs) { 27 | return ImmutableExtensionMulti.builder() 28 | .addAllInputs(inputs) 29 | .detail(detail) 30 | .deriveRecordType(detail.deriveRecordType(inputs)); 31 | } 32 | 33 | public static ImmutableExtensionMulti.Builder builder() { 34 | return ImmutableExtensionMulti.builder(); 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /examples/substrait-spark/build.gradle.kts: -------------------------------------------------------------------------------- 1 | plugins { 2 | // Apply the application plugin to add support for building a CLI application in Java. 3 | id("java") 4 | alias(libs.plugins.spotless) 5 | id("substrait.java-conventions") 6 | } 7 | 8 | repositories { 9 | // Use Maven Central for resolving dependencies. 10 | mavenCentral() 11 | } 12 | 13 | dependencies { 14 | implementation(project(":spark")) 15 | 16 | // For a real Spark application, these would not be required since they would be in the Spark 17 | // server classpath 18 | runtimeOnly(libs.spark.core) 19 | runtimeOnly(libs.spark.hive) 20 | } 21 | 22 | tasks.jar { 23 | dependsOn(":spark:jar", ":core:jar", ":core:shadowJar") 24 | 25 | isZip64 = true 26 | exclude("META-INF/*.RSA") 27 | exclude("META-INF/*.SF") 28 | exclude("META-INF/*.DSA") 29 | 30 | duplicatesStrategy = DuplicatesStrategy.EXCLUDE 31 | manifest.attributes["Main-Class"] = "io.substrait.examples.App" 32 | from(configurations.runtimeClasspath.get().map({ if (it.isDirectory) it else zipTree(it) })) 33 | } 34 | 35 | tasks.named("test") { 36 | // Use JUnit Platform for unit tests. 37 | useJUnitPlatform() 38 | } 39 | 40 | java { toolchain { languageVersion.set(JavaLanguageVersion.of(17)) } } 41 | 42 | tasks.pmdMain { dependsOn(":core:shadowJar") } 43 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/70.sql: -------------------------------------------------------------------------------- 1 | select 2 | sum(ss_net_profit) as total_sum 3 | ,s_state 4 | ,s_county 5 | ,grouping(s_state)+grouping(s_county) as lochierarchy 6 | ,rank() over ( 7 | partition by grouping(s_state)+grouping(s_county), 8 | case when grouping(s_county) = 0 then s_state end 9 | order by sum(ss_net_profit) desc) as rank_within_parent 10 | from 11 | store_sales 12 | ,date_dim d1 13 | ,store 14 | where 15 | d1.d_month_seq between 1220 and 1220+11 16 | and d1.d_date_sk = ss_sold_date_sk 17 | and s_store_sk = ss_store_sk 18 | and s_state in 19 | ( select s_state 20 | from (select s_state as s_state, 21 | rank() over ( partition by s_state order by sum(ss_net_profit) desc) as ranking 22 | from store_sales, store, date_dim 23 | where d_month_seq between 1220 and 1220+11 24 | and d_date_sk = ss_sold_date_sk 25 | and s_store_sk = ss_store_sk 26 | group by s_state 27 | ) tmp1 28 | where ranking <= 5 29 | ) 30 | group by rollup(s_state,s_county) 31 | order by 32 | lochierarchy desc 33 | ,case when lochierarchy = 0 then s_state end 34 | ,rank_within_parent 35 | LIMIT 100 36 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/62.sql: -------------------------------------------------------------------------------- 1 | select 2 | substring(w_warehouse_name,1,20) 3 | ,sm_type 4 | ,web_name 5 | ,sum(case when (ws_ship_date_sk - ws_sold_date_sk <= 30 ) then 1 else 0 end) as "30 days" 6 | ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 30) and 7 | (ws_ship_date_sk - ws_sold_date_sk <= 60) then 1 else 0 end ) as "31-60 days" 8 | ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 60) and 9 | (ws_ship_date_sk - ws_sold_date_sk <= 90) then 1 else 0 end) as "61-90 days" 10 | ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 90) and 11 | (ws_ship_date_sk - ws_sold_date_sk <= 120) then 1 else 0 end) as "91-120 days" 12 | ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 120) then 1 else 0 end) as ">120 days" 13 | from 14 | web_sales 15 | ,warehouse 16 | ,ship_mode 17 | ,web_site 18 | ,date_dim 19 | where 20 | d_month_seq between 1220 and 1220 + 11 21 | and ws_ship_date_sk = d_date_sk 22 | and ws_warehouse_sk = w_warehouse_sk 23 | and ws_ship_mode_sk = sm_ship_mode_sk 24 | and ws_web_site_sk = web_site_sk 25 | group by 26 | substring(w_warehouse_name,1,20) 27 | ,sm_type 28 | ,web_name 29 | order by substring(w_warehouse_name,1,20) 30 | ,sm_type 31 | ,web_name 32 | LIMIT 100 33 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/39.sql: -------------------------------------------------------------------------------- 1 | with inv as 2 | (select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy 3 | ,stdev,mean, case mean when 0 then null else stdev/mean end cov 4 | from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy 5 | ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean 6 | from inventory 7 | ,item 8 | ,warehouse 9 | ,date_dim 10 | where inv_item_sk = i_item_sk 11 | and inv_warehouse_sk = w_warehouse_sk 12 | and inv_date_sk = d_date_sk 13 | and d_year =2001 14 | group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo 15 | where case mean when 0 then 0 else stdev/mean end > 1) 16 | select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov 17 | ,inv2.w_warehouse_sk as w_warehouse_sk_2,inv2.i_item_sk as i_item_sk_2,inv2.d_moy as d_moy_2,inv2.mean as mean_2, inv2.cov as cov_2 18 | from inv inv1,inv inv2 19 | where inv1.i_item_sk = inv2.i_item_sk 20 | and inv1.w_warehouse_sk = inv2.w_warehouse_sk 21 | and inv1.d_moy=1 22 | and inv2.d_moy=1+1 23 | order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov 24 | ,d_moy_2,mean_2, cov_2 -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/99.sql: -------------------------------------------------------------------------------- 1 | select 2 | substring(w_warehouse_name,1,20) 3 | ,sm_type 4 | ,cc_name 5 | ,sum(case when (cs_ship_date_sk - cs_sold_date_sk <= 30 ) then 1 else 0 end) as "30 days" 6 | ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 30) and 7 | (cs_ship_date_sk - cs_sold_date_sk <= 60) then 1 else 0 end ) as "31-60 days" 8 | ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 60) and 9 | (cs_ship_date_sk - cs_sold_date_sk <= 90) then 1 else 0 end) as "61-90 days" 10 | ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 90) and 11 | (cs_ship_date_sk - cs_sold_date_sk <= 120) then 1 else 0 end) as "91-120 days" 12 | ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 120) then 1 else 0 end) as ">120 days" 13 | from 14 | catalog_sales 15 | ,warehouse 16 | ,ship_mode 17 | ,call_center 18 | ,date_dim 19 | where 20 | d_month_seq between 1220 and 1220 + 11 21 | and cs_ship_date_sk = d_date_sk 22 | and cs_warehouse_sk = w_warehouse_sk 23 | and cs_ship_mode_sk = sm_ship_mode_sk 24 | and cs_call_center_sk = cc_call_center_sk 25 | group by 26 | substring(w_warehouse_name,1,20) 27 | ,sm_type 28 | ,cc_name 29 | order by substring(w_warehouse_name,1,20) 30 | ,sm_type 31 | ,cc_name 32 | LIMIT 100 33 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/extensions/scalar_functions_custom.yaml: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | urn: extension:substrait:functions_custom 4 | scalar_functions: 5 | - name: "regexp_extract_custom" 6 | impls: 7 | - args: 8 | - name: "text" 9 | value: string 10 | - name: "pattern" 11 | value: string 12 | return: string 13 | 14 | - name: "format_text" 15 | description: "Formats text based on a mode. The output is nullable if the input is." 16 | impls: 17 | - args: 18 | - name: "mode" 19 | value: string 20 | - name: "input_text" 21 | value: string 22 | return: string 23 | nullability: MIRROR 24 | 25 | - name: "system_property_get" 26 | description: "Safely gets a system property. Always returns a nullable string." 27 | impls: 28 | - args: 29 | - name: "property_name" 30 | value: string 31 | return: string? 32 | nullability: DECLARED_OUTPUT 33 | 34 | - name: "safe_divide_custom" 35 | description: "Performs division, returning NULL if the denominator is zero." 36 | impls: 37 | - args: 38 | - name: "numerator" 39 | value: i32 40 | - name: "denominator" 41 | value: i32 42 | return: fp32? 43 | nullability: DISCRETE 44 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/30.sql: -------------------------------------------------------------------------------- 1 | with customer_total_return as 2 | (select wr_returning_customer_sk as ctr_customer_sk 3 | ,ca_state as ctr_state, 4 | sum(wr_return_amt) as ctr_total_return 5 | from web_returns 6 | ,date_dim 7 | ,customer_address 8 | where wr_returned_date_sk = d_date_sk 9 | and d_year =2001 10 | and wr_returning_addr_sk = ca_address_sk 11 | group by wr_returning_customer_sk 12 | ,ca_state) 13 | select c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag 14 | ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address 15 | ,c_last_review_date_sk,ctr_total_return 16 | from customer_total_return ctr1 17 | ,customer_address 18 | ,customer 19 | where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 20 | from customer_total_return ctr2 21 | where ctr1.ctr_state = ctr2.ctr_state) 22 | and ca_address_sk = c_current_addr_sk 23 | and ca_state = 'dist(fips_county, 3, 1)' 24 | and ctr1.ctr_customer_sk = c_customer_sk 25 | order by c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag 26 | ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address 27 | ,c_last_review_date_sk,ctr_total_return 28 | LIMIT 100 29 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/67.sql: -------------------------------------------------------------------------------- 1 | select * 2 | from (select i_category 3 | ,i_class 4 | ,i_brand 5 | ,i_product_name 6 | ,d_year 7 | ,d_qoy 8 | ,d_moy 9 | ,s_store_id 10 | ,sumsales 11 | ,rank() over (partition by i_category order by sumsales desc) rk 12 | from (select i_category 13 | ,i_class 14 | ,i_brand 15 | ,i_product_name 16 | ,d_year 17 | ,d_qoy 18 | ,d_moy 19 | ,s_store_id 20 | ,sum(coalesce(ss_sales_price*ss_quantity,0)) sumsales 21 | from store_sales 22 | ,date_dim 23 | ,store 24 | ,item 25 | where ss_sold_date_sk=d_date_sk 26 | and ss_item_sk=i_item_sk 27 | and ss_store_sk = s_store_sk 28 | and d_month_seq between 1220 and 1220+11 29 | group by rollup(i_category, i_class, i_brand, i_product_name, d_year, d_qoy, d_moy,s_store_id))dw1) dw2 30 | where rk <= 100 31 | order by i_category 32 | ,i_class 33 | ,i_brand 34 | ,i_product_name 35 | ,d_year 36 | ,d_qoy 37 | ,d_moy 38 | ,s_store_id 39 | ,sumsales 40 | ,rk 41 | LIMIT 100 42 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/72.sql: -------------------------------------------------------------------------------- 1 | select i_item_desc 2 | ,w_warehouse_name 3 | ,d1.d_week_seq 4 | ,count(case when p_promo_sk is null then 1 else 0 end) no_promo 5 | ,count(case when p_promo_sk is not null then 1 else 0 end) promo 6 | ,count(*) total_cnt 7 | from catalog_sales 8 | join inventory on (cs_item_sk = inv_item_sk) 9 | join warehouse on (w_warehouse_sk=inv_warehouse_sk) 10 | join item on (i_item_sk = cs_item_sk) 11 | join customer_demographics on (cs_bill_cdemo_sk = cd_demo_sk) 12 | join household_demographics on (cs_bill_hdemo_sk = hd_demo_sk) 13 | join date_dim d1 on (cs_sold_date_sk = d1.d_date_sk) 14 | join date_dim d2 on (inv_date_sk = d2.d_date_sk) 15 | join date_dim d3 on (cs_ship_date_sk = d3.d_date_sk) 16 | left outer join promotion on (cs_promo_sk=p_promo_sk) 17 | left outer join catalog_returns on (cr_item_sk = cs_item_sk and cr_order_number = cs_order_number) 18 | where d1.d_week_seq = d2.d_week_seq 19 | and inv_quantity_on_hand < cs_quantity 20 | and d3.d_date > d1.d_date + interval '5' day 21 | and hd_buy_potential = '1001-5000' 22 | and d1.d_year = 1998 23 | and hd_buy_potential = '1001-5000' 24 | and cd_marital_status = 'D' 25 | and d1.d_year = 1998 26 | group by i_item_desc,w_warehouse_name,d1.d_week_seq 27 | order by total_cnt desc, i_item_desc, w_warehouse_name, d_week_seq 28 | LIMIT 100 29 | -------------------------------------------------------------------------------- /isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitSqlDialect.java: -------------------------------------------------------------------------------- 1 | package io.substrait.isthmus.sql; 2 | 3 | import org.apache.calcite.rel.RelNode; 4 | import org.apache.calcite.rel.rel2sql.RelToSqlConverter; 5 | import org.apache.calcite.sql.SqlDialect; 6 | import org.apache.calcite.sql.SqlNode; 7 | import org.apache.calcite.sql.util.SqlString; 8 | 9 | /** 10 | * {@link SqlDialect} used by Isthmus for parsing 11 | * 12 | *

Intended primarily for internal testing 13 | */ 14 | public class SubstraitSqlDialect extends SqlDialect { 15 | 16 | public static SqlDialect.Context DEFAULT_CONTEXT = SqlDialect.EMPTY_CONTEXT; 17 | 18 | public static SqlDialect DEFAULT = new SubstraitSqlDialect(DEFAULT_CONTEXT); 19 | 20 | public static SqlString toSql(RelNode relNode) { 21 | RelToSqlConverter relToSql = new RelToSqlConverter(DEFAULT); 22 | SqlNode sqlNode = relToSql.visitRoot(relNode).asStatement(); 23 | return sqlNode.toSqlString( 24 | c -> 25 | c.withAlwaysUseParentheses(false) 26 | .withSelectListItemsOnSeparateLines(false) 27 | .withUpdateSetListNewline(false) 28 | .withIndentation(0)); 29 | } 30 | 31 | public SubstraitSqlDialect(Context context) { 32 | super(context); 33 | } 34 | 35 | @Override 36 | public boolean supportsApproxCountDistinct() { 37 | return true; 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/18.sql: -------------------------------------------------------------------------------- 1 | select i_item_id, 2 | ca_country, 3 | ca_state, 4 | ca_county, 5 | avg( cast(cs_quantity as numeric(12,2))) agg1, 6 | avg( cast(cs_list_price as numeric(12,2))) agg2, 7 | avg( cast(cs_coupon_amt as numeric(12,2))) agg3, 8 | avg( cast(cs_sales_price as numeric(12,2))) agg4, 9 | avg( cast(cs_net_profit as numeric(12,2))) agg5, 10 | avg( cast(c_birth_year as numeric(12,2))) agg6, 11 | avg( cast(cd1.cd_dep_count as numeric(12,2))) agg7 12 | from catalog_sales, customer_demographics cd1, 13 | customer_demographics cd2, customer, customer_address, date_dim, item 14 | where cs_sold_date_sk = d_date_sk and 15 | cs_item_sk = i_item_sk and 16 | cs_bill_cdemo_sk = cd1.cd_demo_sk and 17 | cs_bill_customer_sk = c_customer_sk and 18 | cd1.cd_gender = 'dist(gender, 1, 1)' and 19 | cd1.cd_education_status = 'dist(education, 1, 1)' and 20 | c_current_cdemo_sk = cd2.cd_demo_sk and 21 | c_current_addr_sk = ca_address_sk and 22 | c_birth_month in (1,6,8,9,12,2) and 23 | d_year = 1998 and 24 | ca_state in ('MS','IN','ND' ,'OK','NM','VA','MS') 25 | group by rollup (i_item_id, ca_country, ca_state, ca_county) 26 | order by ca_country, 27 | ca_state, 28 | ca_county, 29 | i_item_id 30 | LIMIT 100 31 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/63.sql: -------------------------------------------------------------------------------- 1 | select * 2 | from (select i_manager_id 3 | ,sum(ss_sales_price) sum_sales 4 | ,avg(sum(ss_sales_price)) over (partition by i_manager_id) avg_monthly_sales 5 | from item 6 | ,store_sales 7 | ,date_dim 8 | ,store 9 | where ss_item_sk = i_item_sk 10 | and ss_sold_date_sk = d_date_sk 11 | and ss_store_sk = s_store_sk 12 | and d_month_seq in (1220,1220+1,1220+2,1220+3,1220+4,1220+5,1220+6,1220+7,1220+8,1220+9,1220+10,1220+11) 13 | and (( i_category in ('Books','Children','Electronics') 14 | and i_class in ('personal','portable','refernece','self-help') 15 | and i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7', 16 | 'exportiunivamalg #9','scholaramalgamalg #9')) 17 | or( i_category in ('Women','Music','Men') 18 | and i_class in ('accessories','classical','fragrances','pants') 19 | and i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1', 20 | 'importoamalg #1'))) 21 | group by i_manager_id, d_moy) tmp1 22 | where case when avg_monthly_sales > 0 then abs (sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 23 | order by i_manager_id 24 | ,avg_monthly_sales 25 | ,sum_sales 26 | LIMIT 100 27 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/73.sql: -------------------------------------------------------------------------------- 1 | select c_last_name 2 | ,c_first_name 3 | ,c_salutation 4 | ,c_preferred_cust_flag 5 | ,ss_ticket_number 6 | ,cnt from 7 | (select ss_ticket_number 8 | ,ss_customer_sk 9 | ,count(*) cnt 10 | from store_sales,date_dim,store,household_demographics 11 | where store_sales.ss_sold_date_sk = date_dim.d_date_sk 12 | and store_sales.ss_store_sk = store.s_store_sk 13 | and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk 14 | and date_dim.d_dom between 1 and 2 15 | and (household_demographics.hd_buy_potential = '1001-5000' or 16 | household_demographics.hd_buy_potential = '0-500') 17 | and household_demographics.hd_vehicle_count > 0 18 | and case when household_demographics.hd_vehicle_count > 0 then 19 | household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count else null end > 1 20 | and date_dim.d_year in (1999,1999+1,1999+2) 21 | and store.s_county in ('distmember(fips_county, [COUNTYNUMBER.1], 2)','distmember(fips_county, [COUNTYNUMBER.2], 2)','distmember(fips_county, [COUNTYNUMBER.3], 2)','distmember(fips_county, [COUNTYNUMBER.4], 2)') 22 | group by ss_ticket_number,ss_customer_sk) dj,customer 23 | where ss_customer_sk = c_customer_sk 24 | and cnt between 1 and 5 25 | order by cnt desc 26 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/27a.sql: -------------------------------------------------------------------------------- 1 | with results as 2 | (select i_item_id, 3 | s_state, 0 as g_state, 4 | ss_quantity agg1, 5 | ss_list_price agg2, 6 | ss_coupon_amt agg3, 7 | ss_sales_price agg4 8 | from store_sales, customer_demographics, date_dim, store, item 9 | where ss_sold_date_sk = d_date_sk and 10 | ss_item_sk = i_item_sk and 11 | ss_store_sk = s_store_sk and 12 | ss_cdemo_sk = cd_demo_sk and 13 | cd_gender = 'dist(gender, 1, 1)' and 14 | cd_marital_status = 'dist(marital_status, 1, 1)' and 15 | cd_education_status = 'dist(education, 1, 1)' and 16 | d_year = 1998 and 17 | s_state in ('TN', 'TN', 'TN', 'TN', 'TN', 'TN') 18 | ) 19 | 20 | select i_item_id, s_state, g_state, agg1, agg2, agg3, agg4 21 | from ( 22 | select i_item_id, s_state, 0 as g_state, avg(agg1) agg1, avg(agg2) agg2, avg(agg3) agg3, avg(agg4) agg4 from results 23 | group by i_item_id, s_state 24 | union all 25 | select i_item_id, NULL AS s_state, 1 AS g_state, avg(agg1) agg1, avg(agg2) agg2, avg(agg3) agg3, 26 | avg(agg4) agg4 from results 27 | group by i_item_id 28 | union all 29 | select NULL AS i_item_id, NULL as s_state, 1 as g_state, avg(agg1) agg1, avg(agg2) agg2, avg(agg3) agg3, 30 | avg(agg4) agg4 from results 31 | ) foo 32 | order by i_item_id, s_state 33 | LIMIT 100 34 | -------------------------------------------------------------------------------- /examples/substrait-spark/src/main/java/io/substrait/examples/util/FunctionArgStringify.java: -------------------------------------------------------------------------------- 1 | package io.substrait.examples.util; 2 | 3 | import io.substrait.expression.EnumArg; 4 | import io.substrait.expression.Expression; 5 | import io.substrait.expression.FunctionArg.FuncArgVisitor; 6 | import io.substrait.extension.SimpleExtension.Function; 7 | import io.substrait.type.Type; 8 | import io.substrait.util.EmptyVisitationContext; 9 | 10 | /** FunctionArgStringify produces a simple debug string for Function Arguments */ 11 | public class FunctionArgStringify extends ParentStringify 12 | implements FuncArgVisitor { 13 | 14 | public FunctionArgStringify(int indent) { 15 | super(indent); 16 | } 17 | 18 | @Override 19 | public String visitExpr(Function fnDef, int argIdx, Expression e, EmptyVisitationContext context) 20 | throws RuntimeException { 21 | return e.accept(new ExpressionStringify(indent + 1), context); 22 | } 23 | 24 | @Override 25 | public String visitType(Function fnDef, int argIdx, Type t, EmptyVisitationContext context) 26 | throws RuntimeException { 27 | return t.accept(new TypeStringify(indent)); 28 | } 29 | 30 | @Override 31 | public String visitEnumArg(Function fnDef, int argIdx, EnumArg e, EmptyVisitationContext context) 32 | throws RuntimeException { 33 | return e.toString(); 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/81.sql: -------------------------------------------------------------------------------- 1 | with customer_total_return as 2 | (select cr_returning_customer_sk as ctr_customer_sk 3 | ,ca_state as ctr_state, 4 | sum(cr_return_amt_inc_tax) as ctr_total_return 5 | from catalog_returns 6 | ,date_dim 7 | ,customer_address 8 | where cr_returned_date_sk = d_date_sk 9 | and d_year =1998 10 | and cr_returning_addr_sk = ca_address_sk 11 | group by cr_returning_customer_sk 12 | ,ca_state ) 13 | select c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name 14 | ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset 15 | ,ca_location_type,ctr_total_return 16 | from customer_total_return ctr1 17 | ,customer_address 18 | ,customer 19 | where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 20 | from customer_total_return ctr2 21 | where ctr1.ctr_state = ctr2.ctr_state) 22 | and ca_address_sk = c_current_addr_sk 23 | and ca_state = 'dist(fips_county, 3, 1)' 24 | and ctr1.ctr_customer_sk = c_customer_sk 25 | order by c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name 26 | ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset 27 | ,ca_location_type,ctr_total_return 28 | LIMIT 100 29 | -------------------------------------------------------------------------------- /spark/src/test/spark-3.2/org/apache/spark/sql/DatasetUtil.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to you under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.spark.sql 19 | 20 | import org.apache.spark.sql.catalyst.encoders.RowEncoder 21 | import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan 22 | 23 | object DatasetUtil { 24 | def fromLogicalPlan(sparkSession: SparkSession, logicalPlan: LogicalPlan): DataFrame = { 25 | sparkSession.withActive { 26 | val qe = sparkSession.sessionState.executePlan(logicalPlan) 27 | qe.assertAnalyzed() 28 | new Dataset[Row](qe, RowEncoder(qe.analyzed.schema)) 29 | } 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/29.sql: -------------------------------------------------------------------------------- 1 | select 2 | i_item_id 3 | ,i_item_desc 4 | ,s_store_id 5 | ,s_store_name 6 | ,avg(ss_quantity) as store_sales_quantity 7 | ,avg(sr_return_quantity) as store_returns_quantity 8 | ,avg(cs_quantity) as catalog_sales_quantity 9 | from 10 | store_sales 11 | ,store_returns 12 | ,catalog_sales 13 | ,date_dim d1 14 | ,date_dim d2 15 | ,date_dim d3 16 | ,store 17 | ,item 18 | where 19 | d1.d_moy = 4 20 | and d1.d_year = 1998 21 | and d1.d_date_sk = ss_sold_date_sk 22 | and i_item_sk = ss_item_sk 23 | and s_store_sk = ss_store_sk 24 | and ss_customer_sk = sr_customer_sk 25 | and ss_item_sk = sr_item_sk 26 | and ss_ticket_number = sr_ticket_number 27 | and sr_returned_date_sk = d2.d_date_sk 28 | and d2.d_moy between 4 and 4 + 3 29 | and d2.d_year = 1998 30 | and sr_customer_sk = cs_bill_customer_sk 31 | and sr_item_sk = cs_item_sk 32 | and cs_sold_date_sk = d3.d_date_sk 33 | and d3.d_year in (1998,1998+1,1998+2) 34 | group by 35 | i_item_id 36 | ,i_item_desc 37 | ,s_store_id 38 | ,s_store_name 39 | order by 40 | i_item_id 41 | ,i_item_desc 42 | ,s_store_id 43 | ,s_store_name 44 | LIMIT 100 45 | -------------------------------------------------------------------------------- /core/src/main/java/io/substrait/expression/AggregateFunctionInvocation.java: -------------------------------------------------------------------------------- 1 | package io.substrait.expression; 2 | 3 | import io.substrait.extension.SimpleExtension; 4 | import io.substrait.type.Type; 5 | import java.util.List; 6 | import org.immutables.value.Value; 7 | 8 | @Value.Immutable 9 | public abstract class AggregateFunctionInvocation { 10 | public abstract SimpleExtension.AggregateFunctionVariant declaration(); 11 | 12 | public abstract List arguments(); 13 | 14 | public abstract List options(); 15 | 16 | public abstract Expression.AggregationPhase aggregationPhase(); 17 | 18 | public abstract List sort(); 19 | 20 | public abstract Type outputType(); 21 | 22 | public Type getType() { 23 | return outputType(); 24 | } 25 | 26 | public abstract Expression.AggregationInvocation invocation(); 27 | 28 | /** 29 | * Validates that variadic arguments satisfy the parameter consistency requirement. When 30 | * CONSISTENT, all variadic arguments must have the same type (ignoring nullability). When 31 | * INCONSISTENT, arguments can have different types. 32 | */ 33 | @Value.Check 34 | protected void check() { 35 | VariadicParameterConsistencyValidator.validate(declaration(), arguments()); 36 | } 37 | 38 | public static ImmutableAggregateFunctionInvocation.Builder builder() { 39 | return ImmutableAggregateFunctionInvocation.builder(); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/61.sql: -------------------------------------------------------------------------------- 1 | select promotions,total,cast(promotions as decimal(15,4))/cast(total as decimal(15,4))*100 2 | from 3 | (select sum(ss_ext_sales_price) promotions 4 | from store_sales 5 | ,store 6 | ,promotion 7 | ,date_dim 8 | ,customer 9 | ,customer_address 10 | ,item 11 | where ss_sold_date_sk = d_date_sk 12 | and ss_store_sk = s_store_sk 13 | and ss_promo_sk = p_promo_sk 14 | and ss_customer_sk= c_customer_sk 15 | and ca_address_sk = c_current_addr_sk 16 | and ss_item_sk = i_item_sk 17 | and ca_gmt_offset = -6 18 | and i_category = 'Jewelry' 19 | and (p_channel_dmail = 'Y' or p_channel_email = 'Y' or p_channel_tv = 'Y') 20 | and s_gmt_offset = -6 21 | and d_year = 1998 22 | and d_moy = 12) promotional_sales, 23 | (select sum(ss_ext_sales_price) total 24 | from store_sales 25 | ,store 26 | ,date_dim 27 | ,customer 28 | ,customer_address 29 | ,item 30 | where ss_sold_date_sk = d_date_sk 31 | and ss_store_sk = s_store_sk 32 | and ss_customer_sk= c_customer_sk 33 | and ca_address_sk = c_current_addr_sk 34 | and ss_item_sk = i_item_sk 35 | and ca_gmt_offset = -6 36 | and i_category = 'Jewelry' 37 | and s_gmt_offset = -6 38 | and d_year = 1998 39 | and d_moy = 12) all_sales 40 | order by promotions, total 41 | LIMIT 100 42 | -------------------------------------------------------------------------------- /ci/release/sanity.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # shellcheck shell=bash 3 | 4 | set -euo pipefail 5 | export GPG_TTY=$(tty) 6 | 7 | echo "Validate Central Publisher API credentials." 8 | BEARER=$(printf "%s:%s" "${MAVENCENTRAL_USERNAME}" "${MAVENCENTRAL_PASSWORD}" | base64) 9 | CODE=$(curl --request GET 'https://central.sonatype.com/api/v1/publisher/published?namespace=io.substrait&name=core&version=0.1.0' --header 'accept: application/json' --header "Authorization: Bearer ${BEARER}" -sSL -w '%{http_code}' -o /dev/null) 10 | if [[ "$CODE" =~ ^2 ]]; then 11 | echo "Central Publisher API credentials configured successfully." 12 | else 13 | echo "Error to validate Central Publisher API credentials. Server returned HTTP code ${CODE}." 14 | fi 15 | 16 | echo "Validate Signing Private/Public Key." 17 | echo "$SIGNING_KEY" | base64 --decode | gpg --batch --import 18 | KEYGRIP=`gpg --with-keygrip --list-secret-keys $SIGNING_KEY_ID | sed -e '/^ *Keygrip *= */!d;s///;q'` 19 | echo "allow-preset-passphrase" >> ~/.gnupg/gpg-agent.conf 20 | gpgconf --reload gpg-agent 21 | "$(gpgconf --list-dirs libexecdir)/gpg-preset-passphrase" -c $KEYGRIP <<< $SIGNING_PASSWORD 22 | echo "test_use_passphrase_from_cache" | gpg -q --batch --status-fd 1 --sign --local-user $SIGNING_KEY_ID --passphrase-fd 0 > /dev/null 23 | if [ $? -eq 0 ]; then 24 | echo "Public/Private Key Credentials configured successfully." 25 | else 26 | echo "Error to validate Public/Private Key Credentials." 27 | fi 28 | -------------------------------------------------------------------------------- /spark/src/main/scala/io/substrait/spark/DefaultRelVisitor.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package io.substrait.spark 18 | 19 | import io.substrait.relation 20 | import io.substrait.relation.AbstractRelVisitor 21 | import io.substrait.util.EmptyVisitationContext 22 | 23 | class DefaultRelVisitor[T] extends AbstractRelVisitor[T, EmptyVisitationContext, RuntimeException] { 24 | 25 | override def visitFallback(rel: relation.Rel, context: EmptyVisitationContext): T = 26 | throw new UnsupportedOperationException( 27 | s"Type ${rel.getClass.getCanonicalName}" + 28 | s" not handled by visitor type ${getClass.getCanonicalName}.") 29 | } 30 | -------------------------------------------------------------------------------- /core/src/test/java/io/substrait/TestBase.java: -------------------------------------------------------------------------------- 1 | package io.substrait; 2 | 3 | import static org.junit.jupiter.api.Assertions.assertEquals; 4 | 5 | import io.substrait.dsl.SubstraitBuilder; 6 | import io.substrait.extension.DefaultExtensionCatalog; 7 | import io.substrait.extension.ExtensionCollector; 8 | import io.substrait.extension.SimpleExtension; 9 | import io.substrait.relation.ProtoRelConverter; 10 | import io.substrait.relation.Rel; 11 | import io.substrait.relation.RelProtoConverter; 12 | import io.substrait.type.TypeCreator; 13 | 14 | public abstract class TestBase { 15 | 16 | protected static final SimpleExtension.ExtensionCollection defaultExtensionCollection = 17 | DefaultExtensionCatalog.DEFAULT_COLLECTION; 18 | 19 | protected TypeCreator R = TypeCreator.REQUIRED; 20 | protected TypeCreator N = TypeCreator.NULLABLE; 21 | 22 | protected SubstraitBuilder b = new SubstraitBuilder(defaultExtensionCollection); 23 | protected ExtensionCollector functionCollector = new ExtensionCollector(); 24 | protected RelProtoConverter relProtoConverter = new RelProtoConverter(functionCollector); 25 | protected ProtoRelConverter protoRelConverter = 26 | new ProtoRelConverter(functionCollector, defaultExtensionCollection); 27 | 28 | protected void verifyRoundTrip(Rel rel) { 29 | io.substrait.proto.Rel protoRel = relProtoConverter.toProto(rel); 30 | Rel relReturned = protoRelConverter.from(protoRel); 31 | assertEquals(rel, relReturned); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /isthmus/src/test/java/io/substrait/isthmus/DmlRoundtripTest.java: -------------------------------------------------------------------------------- 1 | package io.substrait.isthmus; 2 | 3 | import io.substrait.isthmus.sql.SubstraitCreateStatementParser; 4 | import org.apache.calcite.prepare.Prepare; 5 | import org.apache.calcite.sql.parser.SqlParseException; 6 | import org.junit.jupiter.api.Test; 7 | 8 | class DmlRoundtripTest extends PlanTestBase { 9 | 10 | final Prepare.CatalogReader catalogReader = 11 | SubstraitCreateStatementParser.processCreateStatementsToCatalog( 12 | "create table src1 (intcol int, charcol varchar(10))", 13 | "create table src2 (intcol int, charcol varchar(10))"); 14 | 15 | public DmlRoundtripTest() throws SqlParseException {} 16 | 17 | @Test 18 | void testDelete() throws SqlParseException { 19 | assertFullRoundTripWithIdentityProjectionWorkaround( 20 | "delete from src1 where intcol=10", catalogReader); 21 | } 22 | 23 | @Test 24 | void testUpdate() throws SqlParseException { 25 | assertFullRoundTripWithIdentityProjectionWorkaround( 26 | "update src1 set intcol=10 where charcol='a'", catalogReader); 27 | } 28 | 29 | @Test 30 | void testInsert() throws SqlParseException { 31 | assertFullRoundTripWithIdentityProjectionWorkaround( 32 | "insert into src1 (intcol, charcol) values (1,'a'); ", catalogReader); 33 | assertFullRoundTripWithIdentityProjectionWorkaround( 34 | "insert into src1 (intcol, charcol) select intcol,charcol from src2;", catalogReader); 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/89.sql: -------------------------------------------------------------------------------- 1 | select * 2 | from( 3 | select i_category, i_class, i_brand, 4 | s_store_name, s_company_name, 5 | d_moy, 6 | sum(ss_sales_price) sum_sales, 7 | avg(sum(ss_sales_price)) over 8 | (partition by i_category, i_brand, s_store_name, s_company_name) 9 | avg_monthly_sales 10 | from item, store_sales, date_dim, store 11 | where ss_item_sk = i_item_sk and 12 | ss_sold_date_sk = d_date_sk and 13 | ss_store_sk = s_store_sk and 14 | d_year in (1998) and 15 | ((i_category in ('distmember(categories, [IDX.1], 1)','distmember(categories, [IDX.2], 1)','distmember(categories, [IDX.3], 1)') and 16 | i_class in ('DIST(distmember(categories, [IDX.1], 2), 1, 1)','DIST(distmember(categories, [IDX.2], 2), 1, 1)','DIST(distmember(categories, [IDX.3], 2), 1, 1)') 17 | ) 18 | or (i_category in ('distmember(categories, [IDX.4], 1)','distmember(categories, [IDX.5], 1)','distmember(categories, [IDX.6], 1)') and 19 | i_class in ('DIST(distmember(categories, [IDX.4], 2), 1, 1)','DIST(distmember(categories, [IDX.5], 2), 1, 1)','DIST(distmember(categories, [IDX.6], 2), 1, 1)') 20 | )) 21 | group by i_category, i_class, i_brand, 22 | s_store_name, s_company_name, d_moy) tmp1 23 | where case when (avg_monthly_sales <> 0) then (abs(sum_sales - avg_monthly_sales) / avg_monthly_sales) else null end > 0.1 24 | order by sum_sales - avg_monthly_sales, s_store_name 25 | LIMIT 100 26 | -------------------------------------------------------------------------------- /core/src/main/java/io/substrait/util/Util.java: -------------------------------------------------------------------------------- 1 | package io.substrait.util; 2 | 3 | import java.util.function.Supplier; 4 | 5 | public class Util { 6 | 7 | public static Supplier memoize(Supplier supplier) { 8 | return new Memoizer(supplier); 9 | } 10 | 11 | private static class Memoizer implements Supplier { 12 | 13 | private boolean retrieved; 14 | private T value; 15 | private Supplier delegate; 16 | 17 | public Memoizer(Supplier delegate) { 18 | this.delegate = delegate; 19 | } 20 | 21 | @Override 22 | public T get() { 23 | if (!retrieved) { 24 | value = delegate.get(); 25 | retrieved = true; 26 | } 27 | return value; 28 | } 29 | } 30 | 31 | public static class IntRange { 32 | private final int startInclusive; 33 | private final int endExclusive; 34 | 35 | public static IntRange of(int startInclusive, int endExclusive) { 36 | return new IntRange(startInclusive, endExclusive); 37 | } 38 | 39 | private IntRange(int startInclusive, int endExclusive) { 40 | this.startInclusive = startInclusive; 41 | this.endExclusive = endExclusive; 42 | } 43 | 44 | public int getStartInclusive() { 45 | return startInclusive; 46 | } 47 | 48 | public int getEndExclusive() { 49 | return endExclusive; 50 | } 51 | 52 | public boolean within(int val) { 53 | return val >= startInclusive && val < endExclusive; 54 | } 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/36a.sql: -------------------------------------------------------------------------------- 1 | with results as 2 | (select 3 | sum(ss_net_profit) as ss_net_profit, sum(ss_ext_sales_price) as ss_ext_sales_price, 4 | sum(ss_net_profit)/sum(ss_ext_sales_price) as gross_margin 5 | ,i_category 6 | ,i_class 7 | ,0 as g_category, 0 as g_class 8 | from 9 | store_sales 10 | ,date_dim d1 11 | ,item 12 | ,store 13 | where 14 | d1.d_year = 1998 15 | and d1.d_date_sk = ss_sold_date_sk 16 | and i_item_sk = ss_item_sk 17 | and s_store_sk = ss_store_sk 18 | and s_state in ('TN','TN','TN','TN', 'TN','TN','TN','TN') 19 | group by i_category,i_class) 20 | , 21 | results_rollup as 22 | (select gross_margin ,i_category ,i_class,0 as t_category, 0 as t_class, 0 as lochierarchy from results 23 | union 24 | select sum(ss_net_profit)/sum(ss_ext_sales_price) as gross_margin, 25 | i_category, NULL AS i_class, 0 as t_category, 1 as t_class, 1 as lochierarchy from results group by i_category 26 | union 27 | select sum(ss_net_profit)/sum(ss_ext_sales_price) as gross_margin, 28 | NULL AS i_category ,NULL AS i_class, 1 as t_category, 1 as t_class, 2 as lochierarchy from results) 29 | select 30 | gross_margin ,i_category ,i_class, lochierarchy,rank() over ( 31 | partition by lochierarchy, case when t_class = 0 then i_category end 32 | order by gross_margin asc) as rank_within_parent 33 | from results_rollup 34 | order by 35 | lochierarchy desc 36 | ,case when lochierarchy = 0 then i_category end 37 | ,rank_within_parent 38 | LIMIT 100 39 | -------------------------------------------------------------------------------- /examples/isthmus-api/src/main/java/io/substrait/examples/SchemaHelper.java: -------------------------------------------------------------------------------- 1 | package io.substrait.examples; 2 | 3 | import io.substrait.isthmus.calcite.SubstraitTable; 4 | import io.substrait.isthmus.sql.SubstraitCreateStatementParser; 5 | import java.util.ArrayList; 6 | import java.util.List; 7 | import org.apache.calcite.jdbc.CalciteSchema; 8 | import org.apache.calcite.prepare.CalciteCatalogReader; 9 | import org.apache.calcite.sql.parser.SqlParseException; 10 | 11 | /** Helper functions for schemas. */ 12 | public final class SchemaHelper { 13 | 14 | private SchemaHelper() {} 15 | 16 | /** 17 | * Parses one or more SQL strings containing only CREATE statements into a {@link 18 | * CalciteCatalogReader} 19 | * 20 | * @param createStatements a SQL string containing only CREATE statements 21 | * @return a {@link CalciteCatalogReader} generated from the CREATE statements 22 | * @throws SqlParseException 23 | */ 24 | public static CalciteSchema processCreateStatementsToSchema(final List createStatements) 25 | throws SqlParseException { 26 | 27 | final List tables = new ArrayList<>(); 28 | for (final String statement : createStatements) { 29 | tables.addAll(SubstraitCreateStatementParser.processCreateStatements(statement)); 30 | } 31 | 32 | final CalciteSchema rootSchema = CalciteSchema.createRootSchema(false); 33 | for (final SubstraitTable table : tables) { 34 | rootSchema.add(table.getName(), table); 35 | } 36 | 37 | return rootSchema; 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /examples/isthmus-api/src/main/java/io/substrait/examples/IsthmusAppExamples.java: -------------------------------------------------------------------------------- 1 | package io.substrait.examples; 2 | 3 | import java.util.Arrays; 4 | 5 | /** Main class */ 6 | public final class IsthmusAppExamples { 7 | 8 | /** Implemented by all examples */ 9 | @FunctionalInterface 10 | public interface Action { 11 | 12 | /** 13 | * Run 14 | * 15 | * @param args String [] 16 | */ 17 | void run(String[] args); 18 | } 19 | 20 | private IsthmusAppExamples() {} 21 | 22 | /** 23 | * Traditional main method 24 | * 25 | * @param args string[] 26 | */ 27 | @SuppressWarnings("unchecked") 28 | public static void main(final String args[]) { 29 | try { 30 | 31 | if (args.length == 0) { 32 | System.err.println( 33 | "Please provide base classname of example to run. eg ToSql to run class io.substrait.examples.ToSql "); 34 | System.exit(-1); 35 | } 36 | final String exampleClass = args[0]; 37 | 38 | final Class clz = 39 | (Class) 40 | Class.forName( 41 | String.format("%s.%s", IsthmusAppExamples.class.getPackageName(), exampleClass)); 42 | final Action action = clz.getDeclaredConstructor().newInstance(); 43 | if (args.length == 1) { 44 | action.run(new String[] {}); 45 | } else { 46 | action.run(Arrays.copyOfRange(args, 1, args.length)); 47 | } 48 | } catch (Exception e) { 49 | e.printStackTrace(); 50 | System.exit(-1); 51 | } 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /isthmus/src/test/java/io/substrait/isthmus/EmptyArrayLiteralTest.java: -------------------------------------------------------------------------------- 1 | package io.substrait.isthmus; 2 | 3 | import io.substrait.dsl.SubstraitBuilder; 4 | import io.substrait.expression.Expression.EmptyListLiteral; 5 | import io.substrait.expression.ExpressionCreator; 6 | import io.substrait.relation.Project; 7 | import io.substrait.relation.Rel; 8 | import io.substrait.type.Type; 9 | import io.substrait.type.TypeCreator; 10 | import java.util.List; 11 | import org.junit.jupiter.api.Test; 12 | 13 | class EmptyArrayLiteralTest extends PlanTestBase { 14 | private static final TypeCreator N = TypeCreator.of(true); 15 | 16 | private final SubstraitBuilder b = new SubstraitBuilder(extensions); 17 | 18 | @Test 19 | void emptyArrayLiteral() { 20 | Type colType = N.I8; 21 | EmptyListLiteral emptyListLiteral = ExpressionCreator.emptyList(false, N.I8); 22 | Project rel = 23 | b.project( 24 | input -> List.of(emptyListLiteral), 25 | Rel.Remap.offset(1, 1), 26 | b.namedScan(List.of("t"), List.of("col"), List.of(colType))); 27 | assertFullRoundTrip(rel); 28 | } 29 | 30 | @Test 31 | void nullableEmptyArrayLiteral() { 32 | Type colType = N.I8; 33 | EmptyListLiteral emptyListLiteral = ExpressionCreator.emptyList(true, N.I8); 34 | Project rel = 35 | b.project( 36 | input -> List.of(emptyListLiteral), 37 | Rel.Remap.offset(1, 1), 38 | b.namedScan(List.of("t"), List.of("col"), List.of(colType))); 39 | assertFullRoundTrip(rel); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /bom/build.gradle.kts: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to you under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | plugins { 18 | `maven-publish` 19 | `java-platform` 20 | } 21 | 22 | val String.v: String get() = rootProject.extra["$this.version"] as String 23 | 24 | javaPlatform { 25 | allowDependencies() 26 | } 27 | 28 | dependencies { 29 | api(platform(libs.jackson.bom)) 30 | 31 | // Parenthesis are needed here: https://github.com/gradle/gradle/issues/9248 32 | (constraints) { 33 | // api means "the dependency is for both compilation and runtime" 34 | // runtime means "the dependency is only for runtime, not for compilation" 35 | // In other words, marking dependency as "runtime" would avoid accidental 36 | // dependency on it during compilation 37 | 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/46.sql: -------------------------------------------------------------------------------- 1 | select c_last_name 2 | ,c_first_name 3 | ,ca_city 4 | ,bought_city 5 | ,ss_ticket_number 6 | ,amt,profit 7 | from 8 | (select ss_ticket_number 9 | ,ss_customer_sk 10 | ,ca_city bought_city 11 | ,sum(ss_coupon_amt) amt 12 | ,sum(ss_net_profit) profit 13 | from store_sales,date_dim,store,household_demographics,customer_address 14 | where store_sales.ss_sold_date_sk = date_dim.d_date_sk 15 | and store_sales.ss_store_sk = store.s_store_sk 16 | and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk 17 | and store_sales.ss_addr_sk = customer_address.ca_address_sk 18 | and (household_demographics.hd_dep_count = 0 or 19 | household_demographics.hd_vehicle_count= 0) 20 | and date_dim.d_dow in (6,0) 21 | and date_dim.d_year in (1999,1999+1,1999+2) 22 | and store.s_city in ('distmember(cities, [CITYNUMBER.1], 1)','distmember(cities, [CITYNUMBER.2], 1)','distmember(cities, [CITYNUMBER.3], 1)','distmember(cities, [CITYNUMBER.4], 1)','distmember(cities, [CITYNUMBER.5], 1)') 23 | group by ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city) dn,customer,customer_address current_addr 24 | where ss_customer_sk = c_customer_sk 25 | and customer.c_current_addr_sk = current_addr.ca_address_sk 26 | and current_addr.ca_city <> bought_city 27 | order by c_last_name 28 | ,c_first_name 29 | ,ca_city 30 | ,bought_city 31 | ,ss_ticket_number 32 | LIMIT 100 33 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/48.sql: -------------------------------------------------------------------------------- 1 | select sum (ss_quantity) 2 | from store_sales, store, customer_demographics, customer_address, date_dim 3 | where s_store_sk = ss_store_sk 4 | and ss_sold_date_sk = d_date_sk and d_year = 1998 5 | and 6 | ( 7 | ( 8 | cd_demo_sk = ss_cdemo_sk 9 | and 10 | cd_marital_status = '[MS.1]' 11 | and 12 | cd_education_status = '[ES.1]' 13 | and 14 | ss_sales_price between 100.00 and 150.00 15 | ) 16 | or 17 | ( 18 | cd_demo_sk = ss_cdemo_sk 19 | and 20 | cd_marital_status = '[MS.1]' 21 | and 22 | cd_education_status = '[ES.1]' 23 | and 24 | ss_sales_price between 50.00 and 100.00 25 | ) 26 | or 27 | ( 28 | cd_demo_sk = ss_cdemo_sk 29 | and 30 | cd_marital_status = '[MS.1]' 31 | and 32 | cd_education_status = '[ES.1]' 33 | and 34 | ss_sales_price between 150.00 and 200.00 35 | ) 36 | ) 37 | and 38 | ( 39 | ( 40 | ss_addr_sk = ca_address_sk 41 | and 42 | ca_country = 'United States' 43 | and 44 | ca_state in ('[STATE.1]', '[STATE.2]', '[STATE.3]') 45 | and ss_net_profit between 0 and 2000 46 | ) 47 | or 48 | (ss_addr_sk = ca_address_sk 49 | and 50 | ca_country = 'United States' 51 | and 52 | ca_state in ('[STATE.4]', '[STATE.5]', '[STATE.6]') 53 | and ss_net_profit between 150 and 3000 54 | ) 55 | or 56 | (ss_addr_sk = ca_address_sk 57 | and 58 | ca_country = 'United States' 59 | and 60 | ca_state in ('[STATE.7]', '[STATE.8]', '[STATE.9]') 61 | and ss_net_profit between 50 and 25000 62 | ) 63 | ) 64 | 65 | -------------------------------------------------------------------------------- /isthmus/src/test/java/io/substrait/isthmus/expression/AggregateFunctionConverterTest.java: -------------------------------------------------------------------------------- 1 | package io.substrait.isthmus.expression; 2 | 3 | import static org.junit.jupiter.api.Assertions.assertEquals; 4 | import static org.junit.jupiter.api.Assertions.assertNotNull; 5 | 6 | import io.substrait.isthmus.AggregateFunctions; 7 | import io.substrait.isthmus.PlanTestBase; 8 | import io.substrait.isthmus.TypeConverter; 9 | import io.substrait.isthmus.expression.FunctionConverter.FunctionFinder; 10 | import java.util.List; 11 | import org.apache.calcite.rel.core.AggregateCall; 12 | import org.apache.calcite.sql.fun.SqlSumEmptyIsZeroAggFunction; 13 | import org.apache.calcite.sql.type.SqlTypeName; 14 | import org.junit.jupiter.api.Test; 15 | 16 | class AggregateFunctionConverterTest extends PlanTestBase { 17 | 18 | @Test 19 | void testFunctionFinderMatch() { 20 | AggregateFunctionConverter converter = 21 | new AggregateFunctionConverter( 22 | extensions.aggregateFunctions(), List.of(), typeFactory, TypeConverter.DEFAULT); 23 | 24 | FunctionFinder functionFinder = 25 | converter.getFunctionFinder( 26 | AggregateCall.create( 27 | new SqlSumEmptyIsZeroAggFunction(), 28 | true, 29 | List.of(1), 30 | 0, 31 | typeFactory.createSqlType(SqlTypeName.VARCHAR), 32 | null)); 33 | assertNotNull(functionFinder); 34 | assertEquals("sum0", functionFinder.getSubstraitName()); 35 | assertEquals(AggregateFunctions.SUM0, functionFinder.getOperator()); 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /examples/substrait-spark/src/main/java/io/substrait/examples/util/ParentStringify.java: -------------------------------------------------------------------------------- 1 | package io.substrait.examples.util; 2 | 3 | /** 4 | * Parent class of all stringifiers Created as it seemed there could be an optimization to share 5 | * formatting fns between the various stringifiers 6 | */ 7 | public class ParentStringify { 8 | 9 | protected String indentChar = " "; 10 | protected int indent; 11 | protected int indentSize = 3; 12 | 13 | /** 14 | * Build with a specific indent at the start - note 'an indent' is set by default to be 3 spaces. 15 | * 16 | * @param indent number of indentes 17 | */ 18 | public ParentStringify(int indent) { 19 | this.indent = indent; 20 | } 21 | 22 | StringBuilder getIndent() { 23 | 24 | StringBuilder sb = new StringBuilder(); 25 | if (indent != 0) { 26 | sb.append("\n"); 27 | } 28 | sb.append(getIndentString()); 29 | 30 | indent++; 31 | return sb; 32 | } 33 | 34 | StringBuilder getIndentString() { 35 | 36 | StringBuilder sb = new StringBuilder(); 37 | sb.append(indentChar.repeat(this.indent * this.indentSize)); 38 | sb.append("+- "); 39 | return sb; 40 | } 41 | 42 | StringBuilder getContinuationIndentString() { 43 | 44 | StringBuilder sb = new StringBuilder(); 45 | if (indent != 0) { 46 | sb.append("\n"); 47 | } 48 | sb.append(indentChar.repeat(this.indent * this.indentSize)); 49 | sb.append(" : "); 50 | return sb; 51 | } 52 | 53 | protected String getOutdent(StringBuilder sb) { 54 | indent--; 55 | return (sb).toString(); 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /core/src/main/java/io/substrait/expression/EnumArg.java: -------------------------------------------------------------------------------- 1 | package io.substrait.expression; 2 | 3 | import io.substrait.extension.SimpleExtension; 4 | import io.substrait.util.VisitationContext; 5 | import java.util.Optional; 6 | import org.immutables.value.Value; 7 | 8 | /** 9 | * Captures the {@link SimpleExtension.EnumArgument} option value in a {@link 10 | * SimpleExtension.Function} invocation. 11 | * 12 | * @see io.substrait.expression.Expression.ScalarFunctionInvocation 13 | * @see AggregateFunctionInvocation 14 | */ 15 | @Value.Immutable 16 | public interface EnumArg extends FunctionArg { 17 | EnumArg UNSPECIFIED_ENUM_ARG = builder().value(Optional.empty()).build(); 18 | 19 | Optional value(); 20 | 21 | @Override 22 | default R accept( 23 | SimpleExtension.Function fnDef, int argIdx, FuncArgVisitor fnArgVisitor, C context) 24 | throws E { 25 | return fnArgVisitor.visitEnumArg(fnDef, argIdx, this, context); 26 | } 27 | 28 | static EnumArg of(SimpleExtension.EnumArgument enumArg, String option) { 29 | if (!enumArg.options().contains(option)) { 30 | throw new IllegalArgumentException( 31 | String.format("EnumArg value %s not valid for options: %s", option, enumArg.options())); 32 | } 33 | return builder().value(Optional.of(option)).build(); 34 | } 35 | 36 | static EnumArg of(String value) { 37 | return builder().value(Optional.of(value)).build(); 38 | } 39 | 40 | static ImmutableEnumArg.Builder builder() { 41 | return ImmutableEnumArg.builder(); 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /core/src/main/java/io/substrait/extendedexpression/ExtendedExpression.java: -------------------------------------------------------------------------------- 1 | package io.substrait.extendedexpression; 2 | 3 | import io.substrait.expression.Expression; 4 | import io.substrait.proto.AdvancedExtension; 5 | import io.substrait.relation.Aggregate; 6 | import io.substrait.type.NamedStruct; 7 | import java.util.List; 8 | import java.util.Optional; 9 | import org.immutables.value.Value; 10 | 11 | @Value.Immutable 12 | public abstract class ExtendedExpression { 13 | public abstract List getReferredExpressions(); 14 | 15 | public abstract NamedStruct getBaseSchema(); 16 | 17 | public abstract List getExpectedTypeUrls(); 18 | 19 | // creating simple extensions, such as extensionURIs and extensions, is performed on the fly 20 | 21 | public abstract Optional getAdvancedExtension(); 22 | 23 | public static ImmutableExtendedExpression.Builder builder() { 24 | return ImmutableExtendedExpression.builder(); 25 | } 26 | 27 | public interface ExpressionReferenceBase { 28 | List getOutputNames(); 29 | } 30 | 31 | @Value.Immutable 32 | public abstract static class ExpressionReference implements ExpressionReferenceBase { 33 | public abstract Expression getExpression(); 34 | 35 | public static ImmutableExpressionReference.Builder builder() { 36 | return ImmutableExpressionReference.builder(); 37 | } 38 | } 39 | 40 | @Value.Immutable 41 | public abstract static class AggregateFunctionReference implements ExpressionReferenceBase { 42 | public abstract Aggregate.Measure getMeasure(); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/69.sql: -------------------------------------------------------------------------------- 1 | select 2 | cd_gender, 3 | cd_marital_status, 4 | cd_education_status, 5 | count(*) cnt1, 6 | cd_purchase_estimate, 7 | count(*) cnt2, 8 | cd_credit_rating, 9 | count(*) cnt3 10 | from 11 | customer c,customer_address ca,customer_demographics 12 | where 13 | c.c_current_addr_sk = ca.ca_address_sk and 14 | ca_state in ('[STATE.1]','[STATE.2]','[STATE.3]') and 15 | cd_demo_sk = c.c_current_cdemo_sk and 16 | exists (select * 17 | from store_sales,date_dim 18 | where c.c_customer_sk = ss_customer_sk and 19 | ss_sold_date_sk = d_date_sk and 20 | d_year = 2003 and 21 | d_moy between 3 and 3+2) and 22 | (not exists (select * 23 | from web_sales,date_dim 24 | where c.c_customer_sk = ws_bill_customer_sk and 25 | ws_sold_date_sk = d_date_sk and 26 | d_year = 2003 and 27 | d_moy between 3 and 3+2) and 28 | not exists (select * 29 | from catalog_sales,date_dim 30 | where c.c_customer_sk = cs_ship_customer_sk and 31 | cs_sold_date_sk = d_date_sk and 32 | d_year = 2003 and 33 | d_moy between 3 and 3+2)) 34 | group by cd_gender, 35 | cd_marital_status, 36 | cd_education_status, 37 | cd_purchase_estimate, 38 | cd_credit_rating 39 | order by cd_gender, 40 | cd_marital_status, 41 | cd_education_status, 42 | cd_purchase_estimate, 43 | cd_credit_rating 44 | LIMIT 100 45 | -------------------------------------------------------------------------------- /isthmus/src/test/java/io/substrait/isthmus/TpchQueryTest.java: -------------------------------------------------------------------------------- 1 | package io.substrait.isthmus; 2 | 3 | import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; 4 | 5 | import io.substrait.plan.Plan; 6 | import java.io.IOException; 7 | import java.util.stream.IntStream; 8 | import org.apache.calcite.sql.parser.SqlParseException; 9 | import org.junit.jupiter.params.ParameterizedTest; 10 | import org.junit.jupiter.params.provider.MethodSource; 11 | 12 | /** TPC-H test to convert SQL to Substrait and then convert those plans back to SQL. */ 13 | class TpchQueryTest extends PlanTestBase { 14 | static IntStream testCases() { 15 | return IntStream.rangeClosed(1, 22); 16 | } 17 | 18 | /** 19 | * Note that this test does not currently validate the correctness of the Substrait plan; just 20 | * that the SQL can be converted to Substrait and back to SQL without error. 21 | */ 22 | @ParameterizedTest 23 | @MethodSource("testCases") 24 | void testQuery(int query) throws IOException { 25 | String inputSql = asString(String.format("tpch/queries/%02d.sql", query)); 26 | 27 | Plan plan = assertDoesNotThrow(() -> toSubstraitPlan(inputSql), "SQL to Substrait POJO"); 28 | 29 | assertDoesNotThrow(() -> toSql(plan), "Substrait POJO to SQL"); 30 | 31 | io.substrait.proto.Plan proto = 32 | assertDoesNotThrow(() -> toProto(plan), "Substrait POJO to Substrait PROTO"); 33 | 34 | assertDoesNotThrow(() -> toSql(proto), "Substrait PROTO to SQL"); 35 | } 36 | 37 | private Plan toSubstraitPlan(String sql) throws SqlParseException { 38 | return toSubstraitPlan(sql, TPCH_CATALOG); 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /core/src/test/java/io/substrait/extension/ExtensionCollectorUriUrnTest.java: -------------------------------------------------------------------------------- 1 | package io.substrait.extension; 2 | 3 | import static org.junit.jupiter.api.Assertions.assertEquals; 4 | 5 | import io.substrait.proto.Plan; 6 | import org.junit.jupiter.api.Test; 7 | 8 | class ExtensionCollectorUriUrnTest { 9 | 10 | @Test 11 | void testExtensionCollectorScalarFuncWithoutURI() { 12 | String uri = "test://uri"; 13 | BidiMap uriUrnMap = new BidiMap(); 14 | uriUrnMap.put(uri, "extension:test:basic"); 15 | 16 | SimpleExtension.ExtensionCollection extensionCollection = 17 | SimpleExtension.ExtensionCollection.builder().uriUrnMap(uriUrnMap).build(); 18 | 19 | ExtensionCollector collector = new ExtensionCollector(extensionCollection); 20 | 21 | SimpleExtension.ScalarFunctionVariant func = 22 | ImmutableSimpleExtension.ScalarFunctionVariant.builder() 23 | .urn("extension:test:basic") 24 | .name("test_func") 25 | .returnType(io.substrait.function.TypeExpressionCreator.REQUIRED.BOOLEAN) 26 | .build(); 27 | 28 | int functionRef = collector.getFunctionReference(func); 29 | assertEquals(1, functionRef); 30 | 31 | Plan.Builder planBuilder = Plan.newBuilder(); 32 | collector.addExtensionsToPlan(planBuilder); 33 | 34 | Plan plan = planBuilder.build(); 35 | assertEquals(1, plan.getExtensionUrnsCount()); 36 | assertEquals("extension:test:basic", plan.getExtensionUrns(0).getUrn()); 37 | 38 | assertEquals(1, plan.getExtensionUrisCount()); 39 | assertEquals("test://uri", plan.getExtensionUris(0).getUri()); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /examples/substrait-spark/src/main/resources/tests_subset_2023.csv: -------------------------------------------------------------------------------- 1 | test_id,vehicle_id,test_date,test_class,test_type,test_result,test_mileage,postcode_area 2 | 539514409,17113014,2023-01-09,4,NT,F,69934,PA 3 | 1122718877,986649781,2023-01-16,4,NT,F,57376,SG 4 | 1104881351,424684356,2023-03-06,4,NT,F,81853,SG 5 | 1487493049,1307056703,2023-03-07,4,NT,P,20763,SA 6 | 1107861883,130747047,2023-03-27,4,RT,P,125910,SA 7 | 472789285,777757523,2023-03-29,4,NT,P,68399,CO 8 | 1105082521,840180863,2023-04-15,4,NT,P,54240,NN 9 | 1172953135,917255260,2023-04-27,4,NT,P,60918,SM 10 | 127807783,888103385,2023-05-08,4,NT,P,112090,EH 11 | 1645970709,816803134,2023-06-03,4,NT,P,134858,RG 12 | 1355347761,919820431,2023-06-21,4,NT,P,37336,ST 13 | 1750209849,544950855,2023-06-23,4,NT,F,120034,NR 14 | 1376930435,439876988,2023-07-19,4,NT,P,109927,PO 15 | 582729949,1075446447,2023-07-19,4,NT,P,72986,SA 16 | 127953451,105663799,2023-07-31,4,NT,F,35824,ME 17 | 759291679,931759350,2023-08-07,4,NT,P,65353,DY 18 | 1629819891,335780567,2023-08-08,4,NT,PRS,103365,CF 19 | 1120026477,1153361746,2023-08-11,4,NT,P,286881,RM 20 | 1331300969,644861283,2023-08-15,4,NT,P,52173,LE 21 | 990694587,449899992,2023-08-16,4,NT,F,124891,SA 22 | 193460599,759696266,2023-08-29,4,NT,P,83554,LU 23 | 1337337679,1110416764,2023-10-09,4,NT,PRS,71093,SS 24 | 1885237527,137785384,2023-11-04,4,NT,P,88730,BH 25 | 1082642803,1291985882,2023-11-15,4,NT,PRS,160717,BA 26 | 896066743,615735063,2023-11-15,4,RT,P,107710,NR 27 | 1022666841,474362449,2023-11-20,4,NT,P,56296,HP 28 | 1010400923,1203222226,2023-12-04,4,NT,F,89255,TW 29 | 866705687,605696575,2023-12-06,4,NT,P,14674,YO 30 | 621751843,72093448,2023-12-14,4,NT,F,230280,TR 31 | -------------------------------------------------------------------------------- /isthmus-cli/src/test/script/smoke.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -eu -o pipefail 4 | 5 | parent_path=$( cd "$(dirname "${BASH_SOURCE[0]}")" ; pwd -P ) 6 | cd "${parent_path}/../../.." 7 | CMD="${ISTHMUS:-build/native/nativeCompile/isthmus}" 8 | LINEITEM='CREATE TABLE LINEITEM (L_ORDERKEY BIGINT NOT NULL, L_PARTKEY BIGINT NOT NULL, L_SUPPKEY BIGINT NOT NULL, L_LINENUMBER INTEGER, L_QUANTITY DECIMAL, L_EXTENDEDPRICE DECIMAL, L_DISCOUNT DECIMAL, L_TAX DECIMAL, L_RETURNFLAG CHAR(1), L_LINESTATUS CHAR(1), L_SHIPDATE DATE, L_COMMITDATE DATE, L_RECEIPTDATE DATE, L_SHIPINSTRUCT CHAR(25), L_SHIPMODE CHAR(10), L_COMMENT VARCHAR(44))' 9 | echo "${LINEITEM}" 10 | #set -x 11 | 12 | # SQL Query - Simple 13 | "${CMD}" 'select * from lineitem' --create "${LINEITEM}" 14 | 15 | # SQL Query - With condition 16 | "${CMD}" 'select * from lineitem where l_orderkey > 10' --create "${LINEITEM}" 17 | 18 | # SQL Query - Aggregate 19 | "${CMD}" 'select l_orderkey, count(l_partkey) from lineitem group by l_orderkey' --create "${LINEITEM}" 20 | 21 | # SQL Expression - Literal expression 22 | "${CMD}" --expression '10' 23 | 24 | # SQL Expression - Reference expression 25 | "${CMD}" --expression 'l_suppkey' --create "${LINEITEM}" 26 | 27 | # SQL Expression - Filter expression 28 | "${CMD}" --expression 'l_orderkey > 10' --create "${LINEITEM}" 29 | 30 | # SQL Expression - Projection expression (column-1) 31 | "${CMD}" --expression 'l_orderkey + 9888486986' --create "${LINEITEM}" 32 | 33 | # SQL Expression - 03 Projection expression (column-1, column-2, column-3) 34 | "${CMD}" --expression 'l_orderkey + 9888486986' 'l_orderkey * 2' 'l_orderkey > 10' 'l_orderkey in (10, 20)' --create "${LINEITEM}" 35 | -------------------------------------------------------------------------------- /isthmus/src/test/resources/tpcds/queries/68.sql: -------------------------------------------------------------------------------- 1 | select c_last_name 2 | ,c_first_name 3 | ,ca_city 4 | ,bought_city 5 | ,ss_ticket_number 6 | ,extended_price 7 | ,extended_tax 8 | ,list_price 9 | from (select ss_ticket_number 10 | ,ss_customer_sk 11 | ,ca_city bought_city 12 | ,sum(ss_ext_sales_price) extended_price 13 | ,sum(ss_ext_list_price) list_price 14 | ,sum(ss_ext_tax) extended_tax 15 | from store_sales 16 | ,date_dim 17 | ,store 18 | ,household_demographics 19 | ,customer_address 20 | where store_sales.ss_sold_date_sk = date_dim.d_date_sk 21 | and store_sales.ss_store_sk = store.s_store_sk 22 | and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk 23 | and store_sales.ss_addr_sk = customer_address.ca_address_sk 24 | and date_dim.d_dom between 1 and 2 25 | and (household_demographics.hd_dep_count = 0 or 26 | household_demographics.hd_vehicle_count= 0) 27 | and date_dim.d_year in (1999,1999+1,1999+2) 28 | and store.s_city in ('distmember(cities, [CITYNUMBER.1], 1)','distmember(cities, [CITYNUMBER.2], 1)') 29 | group by ss_ticket_number 30 | ,ss_customer_sk 31 | ,ss_addr_sk,ca_city) dn 32 | ,customer 33 | ,customer_address current_addr 34 | where ss_customer_sk = c_customer_sk 35 | and customer.c_current_addr_sk = current_addr.ca_address_sk 36 | and current_addr.ca_city <> bought_city 37 | order by c_last_name 38 | ,ss_ticket_number 39 | LIMIT 100 40 | -------------------------------------------------------------------------------- /core/src/main/java/io/substrait/relation/Extension.java: -------------------------------------------------------------------------------- 1 | package io.substrait.relation; 2 | 3 | import io.substrait.type.NamedStruct; 4 | import io.substrait.type.Type; 5 | import java.util.List; 6 | 7 | /** Contains tag interfaces for handling {@link com.google.protobuf.Any} types within Substrait. */ 8 | public class Extension { 9 | 10 | public interface LeafRelDetail extends ToProto { 11 | /** 12 | * @return the record layout for the associated {@link ExtensionLeaf} relation 13 | */ 14 | Type.Struct deriveRecordType(); 15 | } 16 | 17 | public interface SingleRelDetail extends ToProto { 18 | /** 19 | * @param input to the associated {@link ExtensionSingle} relation 20 | * @return the record layout for the associated {@link ExtensionSingle} relation 21 | */ 22 | Type.Struct deriveRecordType(Rel input); 23 | } 24 | 25 | public interface MultiRelDetail extends ToProto { 26 | /** 27 | * @param inputs to the associated {@link ExtensionMulti} relation 28 | * @return the record layout for the associated {@link ExtensionMulti} relation 29 | */ 30 | Type.Struct deriveRecordType(List inputs); 31 | } 32 | 33 | public interface ExtensionTableDetail extends ToProto { 34 | /** 35 | * @return the table schema for the associated {@link ExtensionTable} relation 36 | */ 37 | NamedStruct deriveSchema(); 38 | } 39 | 40 | public interface WriteExtensionObject extends ToProto {} 41 | 42 | public interface DdlExtensionObject extends ToProto {} 43 | } 44 | -------------------------------------------------------------------------------- /isthmus/src/test/java/io/substrait/isthmus/CalciteObjs.java: -------------------------------------------------------------------------------- 1 | package io.substrait.isthmus; 2 | 3 | import org.apache.calcite.rel.type.RelDataType; 4 | import org.apache.calcite.rel.type.RelDataTypeFactory; 5 | import org.apache.calcite.rex.RexBuilder; 6 | import org.apache.calcite.rex.RexNode; 7 | import org.apache.calcite.sql.type.SqlTypeName; 8 | 9 | /** Set of classes/methods that make it easier to work with Calcite. */ 10 | public abstract class CalciteObjs { 11 | 12 | final RelDataTypeFactory type = SubstraitTypeSystem.TYPE_FACTORY; 13 | final RexBuilder rex = new RexBuilder(type); 14 | 15 | RelDataType t(SqlTypeName typeName, int... vals) { 16 | switch (vals.length) { 17 | case 0: 18 | return type.createSqlType(typeName); 19 | case 1: 20 | return type.createSqlType(typeName, vals[0]); 21 | case 2: 22 | return type.createSqlType(typeName, vals[0], vals[1]); 23 | default: 24 | throw new IllegalArgumentException(); 25 | } 26 | } 27 | 28 | RelDataType tN(SqlTypeName typeName, int... vals) { 29 | return type.createTypeWithNullability(t(typeName, vals), true); 30 | } 31 | 32 | public RexNode makeCalciteLiteral( 33 | boolean nullable, SqlTypeName typeName, Object value, int... vals) { 34 | return rex.makeLiteral(value, nullable ? tN(typeName, vals) : t(typeName, vals), true, false); 35 | } 36 | 37 | public RexNode c(Object value, SqlTypeName typeName, int... vals) { 38 | return makeCalciteLiteral(false, typeName, value, vals); 39 | } 40 | 41 | public RexNode cN(Object value, SqlTypeName typeName, int... vals) { 42 | return makeCalciteLiteral(true, typeName, value, vals); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /spark/src/main/scala/io/substrait/spark/expression/Enum.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to you under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package io.substrait.spark.expression 19 | 20 | import org.apache.spark.sql.catalyst.expressions.{LeafExpression, Unevaluable} 21 | import org.apache.spark.sql.types.{DataType, NullType} 22 | 23 | /** 24 | * For internal use only. This represents the equivalent of a Substrait enum parameter type for use 25 | * during conversion. It must not become part of a final Spark logical plan. 26 | * 27 | * @param value 28 | * The enum string value. 29 | */ 30 | case class Enum(value: String) extends LeafExpression with Unevaluable { 31 | override def nullable: Boolean = false 32 | 33 | override def dataType: DataType = NullType 34 | 35 | override def equals(that: Any): Boolean = that match { 36 | case Enum(other) => other == value 37 | case _ => false 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /core/src/main/java/io/substrait/relation/Rel.java: -------------------------------------------------------------------------------- 1 | package io.substrait.relation; 2 | 3 | import io.substrait.extension.AdvancedExtension; 4 | import io.substrait.hint.Hint; 5 | import io.substrait.type.Type; 6 | import io.substrait.type.TypeCreator; 7 | import io.substrait.util.VisitationContext; 8 | import java.util.List; 9 | import java.util.Optional; 10 | import java.util.stream.IntStream; 11 | import org.immutables.value.Value; 12 | 13 | public interface Rel { 14 | Optional getRemap(); 15 | 16 | /** 17 | * @return the {@link AdvancedExtension} associated with a {@link io.substrait.proto.RelCommon} 18 | * message, if present 19 | */ 20 | Optional getCommonExtension(); 21 | 22 | Type.Struct getRecordType(); 23 | 24 | List getInputs(); 25 | 26 | Optional getHint(); 27 | 28 | @Value.Immutable 29 | abstract class Remap { 30 | public abstract List indices(); 31 | 32 | public Type.Struct remap(Type.Struct initial) { 33 | List types = initial.fields(); 34 | return TypeCreator.of(initial.nullable()).struct(indices().stream().map(i -> types.get(i))); 35 | } 36 | 37 | public static Remap of(Iterable fields) { 38 | return ImmutableRemap.builder().addAllIndices(fields).build(); 39 | } 40 | 41 | public static Remap offset(int start, int length) { 42 | return of( 43 | IntStream.range(start, start + length) 44 | .mapToObj(i -> i) 45 | .collect(java.util.stream.Collectors.toList())); 46 | } 47 | } 48 | 49 | O accept( 50 | RelVisitor visitor, C context) throws E; 51 | } 52 | --------------------------------------------------------------------------------