├── .idea ├── icon.png └── vcs.xml ├── dev ├── auron-logo.png ├── license-header ├── docker-build │ ├── docker-compose.yml │ └── centos7 │ │ └── Dockerfile ├── utils.sh └── reformat ├── benchmark-results └── tpcds-benchmark-echarts.png ├── NOTICE ├── .github ├── pull_request_template.md ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── dependabot.yml ├── workflows │ ├── labeler.yml │ ├── style.yml │ ├── license.yml │ ├── stale.yml │ ├── celeborn.yml │ ├── uniffle.yml │ ├── flink.yml │ └── paimon.yml └── labeler.yml ├── DISCLAIMER ├── scalafix.conf ├── .gitattributes ├── native-engine ├── datafusion-ext-functions │ ├── src │ │ └── brickhouse │ │ │ └── mod.rs │ └── Cargo.toml ├── datafusion-ext-plans │ └── src │ │ ├── agg │ │ └── brickhouse │ │ │ └── mod.rs │ │ ├── joins │ │ ├── smj │ │ │ └── mod.rs │ │ └── bhj │ │ │ └── mod.rs │ │ ├── window │ │ └── processors │ │ │ └── mod.rs │ │ ├── common │ │ └── mod.rs │ │ ├── shuffle │ │ └── rss.rs │ │ └── lib.rs ├── datafusion-ext-commons │ ├── src │ │ ├── algorithm │ │ │ └── mod.rs │ │ ├── arrow │ │ │ ├── mod.rs │ │ │ ├── boolean.rs │ │ │ └── array_size.rs │ │ ├── hash │ │ │ └── mod.rs │ │ └── scalar_value.rs │ └── Cargo.toml ├── auron-jni-bridge │ ├── Cargo.toml │ └── src │ │ └── lib.rs ├── datafusion-ext-exprs │ ├── Cargo.toml │ └── src │ │ └── lib.rs ├── auron-memmgr │ ├── Cargo.toml │ └── src │ │ └── metrics.rs ├── auron-serde │ ├── Cargo.toml │ └── build.rs └── auron │ └── src │ ├── metrics.rs │ └── http │ └── memory_profiling.rs ├── auron-core └── src │ ├── test │ ├── resources │ │ ├── META-INF │ │ │ └── services │ │ │ │ └── org.apache.auron.jni.AuronAdaptorProvider │ │ └── log4j2.properties │ └── java │ │ └── org │ │ └── apache │ │ └── auron │ │ ├── jni │ │ ├── MockAuronAdaptorProvider.java │ │ ├── AuronAdaptorTest.java │ │ └── MockAuronAdaptor.java │ │ ├── functions │ │ └── MockAuronUDFWrapperContext.java │ │ └── configuration │ │ └── ConfigOptionTest.java │ └── main │ └── java │ └── org │ └── apache │ └── auron │ ├── jni │ └── AuronAdaptorProvider.java │ ├── arrowio │ └── AuronArrowFFIExporter.java │ ├── metric │ └── MetricNode.java │ └── functions │ └── AuronUDFWrapperContext.java ├── rust-toolchain.toml ├── spark-extension └── src │ ├── main │ ├── resources │ │ └── META-INF │ │ │ └── services │ │ │ └── org.apache.auron.jni.AuronAdaptorProvider │ ├── java │ │ └── org │ │ │ └── apache │ │ │ ├── auron │ │ │ └── jni │ │ │ │ └── SparkAuronAdaptorProvider.java │ │ │ └── spark │ │ │ ├── memory │ │ │ └── auron │ │ │ │ └── OnHeapSpillManagerHelper.java │ │ │ └── sql │ │ │ └── execution │ │ │ └── auron │ │ │ └── plan │ │ │ └── NativeParquetSinkUtils.java │ └── scala │ │ └── org │ │ └── apache │ │ ├── spark │ │ └── sql │ │ │ ├── auron │ │ │ ├── AuronConvertProvider.scala │ │ │ ├── NativeSupports.scala │ │ │ └── util │ │ │ │ └── AuronLogUtils.scala │ │ │ ├── execution │ │ │ └── auron │ │ │ │ ├── shuffle │ │ │ │ ├── RssPartitionWriterBase.scala │ │ │ │ ├── AuronRssShuffleReaderBase.scala │ │ │ │ └── AuronShuffleDependency.scala │ │ │ │ ├── columnar │ │ │ │ ├── AuronColumnarMap.scala │ │ │ │ └── ColumnarHelper.scala │ │ │ │ └── plan │ │ │ │ └── Util.scala │ │ │ └── hive │ │ │ └── auron │ │ │ ├── HiveClientHelper.scala │ │ │ └── HiveUDFUtil.scala │ │ └── auron │ │ └── metric │ │ └── SparkMetricNode.scala │ └── test │ └── resources │ └── log4j2.properties ├── auron-spark-ui ├── src │ └── main │ │ ├── resources │ │ └── META-INF │ │ │ └── services │ │ │ └── org.apache.spark.status.AppHistoryServerPlugin │ │ └── scala │ │ └── org │ │ └── apache │ │ ├── spark │ │ └── sql │ │ │ └── execution │ │ │ └── ui │ │ │ ├── AuronEventUtils.scala │ │ │ ├── AuronSQLTab.scala │ │ │ ├── AuronSQLAppStatusStore.scala │ │ │ ├── AuronSQLHistoryServerPlugin.scala │ │ │ ├── AuronSQLAppStatusListener.scala │ │ │ └── AuronAllExecutionsPage.scala │ │ └── auron │ │ └── spark │ │ └── ui │ │ └── AuronEvent.scala └── pom.xml ├── thirdparty ├── auron-paimon │ ├── src │ │ └── main │ │ │ └── resources │ │ │ └── META-INF │ │ │ └── services │ │ │ └── org.apache.spark.sql.auron.AuronConvertProvider │ └── pom.xml ├── auron-uniffle │ ├── src │ │ └── main │ │ │ └── scala │ │ │ └── org │ │ │ └── apache │ │ │ └── spark │ │ │ ├── shuffle │ │ │ └── uniffle │ │ │ │ └── RssShuffleHandleWrapper.scala │ │ │ └── sql │ │ │ └── execution │ │ │ └── auron │ │ │ └── shuffle │ │ │ └── uniffle │ │ │ └── AuronUniffleShuffleWriter.scala │ └── pom.xml ├── auron-iceberg │ └── src │ │ └── test │ │ └── scala │ │ └── org │ │ └── apache │ │ └── auron │ │ └── iceberg │ │ ├── AuronIcebergIntegrationSuite.scala │ │ └── BaseAuronIcebergSuite.scala ├── auron-celeborn-0.5 │ └── pom.xml └── auron-celeborn-0.6 │ └── pom.xml ├── .gitmodules ├── common └── src │ ├── main │ └── templates │ │ └── org │ │ └── apache │ │ └── auron │ │ └── common │ │ └── ProjectConstants.java │ └── test │ ├── scala │ └── org │ │ └── apache │ │ └── auron │ │ └── util │ │ └── AuronTestUtils.scala │ └── resources │ └── log4j2.properties ├── .rat-excludes ├── .asf.yaml ├── rustfmt.toml ├── spark-extension-shims-spark └── src │ ├── main │ ├── scala │ │ └── org │ │ │ └── apache │ │ │ └── spark │ │ │ └── sql │ │ │ └── execution │ │ │ └── auron │ │ │ ├── plan │ │ │ ├── NativeOrcScanExec.scala │ │ │ ├── NativeParquetScanExec.scala │ │ │ ├── ConvertToNativeExec.scala │ │ │ ├── NativeLocalLimitExec.scala │ │ │ ├── NativeCollectLimitExec.scala │ │ │ ├── NativeGlobalLimitExec.scala │ │ │ ├── NativeFilterExec.scala │ │ │ ├── NativeSortExec.scala │ │ │ ├── NativeUnionExec.scala │ │ │ ├── NativeTakeOrderedExec.scala │ │ │ ├── NativeExpandExec.scala │ │ │ ├── NativePartialTakeOrderedExec.scala │ │ │ ├── NativeGenerateExec.scala │ │ │ ├── NativeParquetSinkExec.scala │ │ │ ├── NativeWindowExec.scala │ │ │ └── NativeBroadcastExchangeExec.scala │ │ │ └── shuffle │ │ │ └── AuronShuffleWriter.scala │ └── java │ │ └── org │ │ └── apache │ │ └── spark │ │ └── sql │ │ └── auron │ │ ├── ForceApplyShuffledHashJoinInterceptor.java │ │ ├── ValidateSparkPlanApplyInterceptor.java │ │ └── ForceApplyShuffledHashJoinInjector.java │ └── test │ ├── scala │ └── org │ │ └── apache │ │ ├── auron │ │ ├── EmptyNativeRddSuite.scala │ │ ├── BaseAuronSQLSuite.scala │ │ ├── AuronSQLTestHelper.scala │ │ └── exec │ │ │ └── AuronExecSuite.scala │ │ └── spark │ │ └── sql │ │ └── execution │ │ └── BuildInfoInSparkUISuite.scala │ └── resources │ └── log4j2.properties ├── scalafmt.conf ├── .gitignore ├── auron-flink-extension └── src │ └── test │ └── java │ └── org │ └── apache │ └── auron │ └── flink │ └── table │ └── runtime │ └── AuronFlinkCalcITCase.java └── hadoop-shim ├── src └── main │ └── scala │ └── org │ └── apache │ └── auron │ └── hadoop │ └── fs │ └── FSDataOutputWrapper.scala └── pom.xml /.idea/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/auron/HEAD/.idea/icon.png -------------------------------------------------------------------------------- /dev/auron-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/auron/HEAD/dev/auron-logo.png -------------------------------------------------------------------------------- /benchmark-results/tpcds-benchmark-echarts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/auron/HEAD/benchmark-results/tpcds-benchmark-echarts.png -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | 2 | Apache Auron (Incubating) 3 | Copyright 2025 The Apache Software Foundation. 4 | 5 | This product includes software developed at 6 | The Apache Software Foundation (https://www.apache.org/). 7 | 8 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | 4 | # Which issue does this PR close? 5 | 6 | Closes # 7 | 8 | # Rationale for this change 9 | 10 | # What changes are included in this PR? 11 | 12 | # Are there any user-facing changes? 13 | 14 | # How was this patch tested? 15 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /DISCLAIMER: -------------------------------------------------------------------------------- 1 | Apache Auron (Incubating) is an effort undergoing incubation at the Apache 2 | Software Foundation (ASF), sponsored by the Apache Incubator PMC. 3 | 4 | Incubation is required of all newly accepted projects until a further review 5 | indicates that the infrastructure, communications, and decision making process 6 | have stabilized in a manner consistent with other successful ASF projects. 7 | 8 | While incubation status is not necessarily a reflection of the completeness 9 | or stability of the code, it does indicate that the project has yet to be 10 | fully endorsed by the ASF. 11 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | 14 | 15 | **To Reproduce** 16 | 23 | 24 | **Expected behavior** 25 | 28 | 29 | **Screenshots** 30 | 33 | 34 | **Additional context** 35 | 38 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | 14 | 15 | **Describe the solution you'd like** 16 | 19 | 20 | **Describe alternatives you've considered** 21 | 24 | 25 | **Additional context** 26 | 29 | -------------------------------------------------------------------------------- /dev/license-header: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | -------------------------------------------------------------------------------- /scalafix.conf: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | rules = [ 19 | RemoveUnused 20 | ] 21 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | *.bat text eol=crlf 19 | *.cmd text eol=crlf 20 | -------------------------------------------------------------------------------- /native-engine/datafusion-ext-functions/src/brickhouse/mod.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one or more 2 | // contributor license agreements. See the NOTICE file distributed with 3 | // this work for additional information regarding copyright ownership. 4 | // The ASF licenses this file to You under the Apache License, Version 2.0 5 | // (the "License"); you may not use this file except in compliance with 6 | // the License. You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | pub mod array_union; 17 | -------------------------------------------------------------------------------- /native-engine/datafusion-ext-plans/src/agg/brickhouse/mod.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one or more 2 | // contributor license agreements. See the NOTICE file distributed with 3 | // this work for additional information regarding copyright ownership. 4 | // The ASF licenses this file to You under the Apache License, Version 2.0 5 | // (the "License"); you may not use this file except in compliance with 6 | // the License. You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | pub mod collect; 17 | pub mod combine_unique; 18 | -------------------------------------------------------------------------------- /auron-core/src/test/resources/META-INF/services/org.apache.auron.jni.AuronAdaptorProvider: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | org.apache.auron.jni.MockAuronAdaptorProvider -------------------------------------------------------------------------------- /native-engine/datafusion-ext-commons/src/algorithm/mod.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one or more 2 | // contributor license agreements. See the NOTICE file distributed with 3 | // this work for additional information regarding copyright ownership. 4 | // The ASF licenses this file to You under the Apache License, Version 2.0 5 | // (the "License"); you may not use this file except in compliance with 6 | // the License. You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | pub mod loser_tree; 17 | pub mod rdx_queue; 18 | pub mod rdx_sort; 19 | -------------------------------------------------------------------------------- /native-engine/datafusion-ext-plans/src/joins/smj/mod.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one or more 2 | // contributor license agreements. See the NOTICE file distributed with 3 | // this work for additional information regarding copyright ownership. 4 | // The ASF licenses this file to You under the Apache License, Version 2.0 5 | // (the "License"); you may not use this file except in compliance with 6 | // the License. You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | pub mod existence_join; 17 | pub mod full_join; 18 | pub mod semi_join; 19 | -------------------------------------------------------------------------------- /rust-toolchain.toml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | [toolchain] 19 | channel = "nightly-2025-05-09" 20 | components = ["rust-src", "cargo", "rustfmt", "clippy"] 21 | -------------------------------------------------------------------------------- /spark-extension/src/main/resources/META-INF/services/org.apache.auron.jni.AuronAdaptorProvider: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | org.apache.auron.jni.SparkAuronAdaptorProvider -------------------------------------------------------------------------------- /native-engine/datafusion-ext-plans/src/window/processors/mod.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one or more 2 | // contributor license agreements. See the NOTICE file distributed with 3 | // this work for additional information regarding copyright ownership. 4 | // The ASF licenses this file to You under the Apache License, Version 2.0 5 | // (the "License"); you may not use this file except in compliance with 6 | // the License. You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | pub mod agg_processor; 17 | pub mod rank_processor; 18 | pub mod row_number_processor; 19 | -------------------------------------------------------------------------------- /auron-spark-ui/src/main/resources/META-INF/services/org.apache.spark.status.AppHistoryServerPlugin: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | org.apache.spark.sql.execution.ui.AuronSQLHistoryServerPlugin 19 | -------------------------------------------------------------------------------- /thirdparty/auron-paimon/src/main/resources/META-INF/services/org.apache.spark.sql.auron.AuronConvertProvider: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | org.apache.spark.sql.hive.auron.paimon.PaimonConvertProvider 19 | -------------------------------------------------------------------------------- /native-engine/datafusion-ext-commons/src/arrow/mod.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one or more 2 | // contributor license agreements. See the NOTICE file distributed with 3 | // this work for additional information regarding copyright ownership. 4 | // The ASF licenses this file to You under the Apache License, Version 2.0 5 | // (the "License"); you may not use this file except in compliance with 6 | // the License. You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | pub mod array_size; 17 | pub mod boolean; 18 | pub mod cast; 19 | pub mod coalesce; 20 | pub mod eq_comparator; 21 | pub mod selection; 22 | -------------------------------------------------------------------------------- /auron-core/src/main/java/org/apache/auron/jni/AuronAdaptorProvider.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.auron.jni; 18 | 19 | public interface AuronAdaptorProvider { 20 | AuronAdaptor create(); 21 | } 22 | -------------------------------------------------------------------------------- /native-engine/datafusion-ext-plans/src/joins/bhj/mod.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one or more 2 | // contributor license agreements. See the NOTICE file distributed with 3 | // this work for additional information regarding copyright ownership. 4 | // The ASF licenses this file to You under the Apache License, Version 2.0 5 | // (the "License"); you may not use this file except in compliance with 6 | // the License. You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | pub mod full_join; 17 | pub mod semi_join; 18 | 19 | #[derive(std::marker::ConstParamTy, Clone, Copy, PartialEq, Eq)] 20 | pub enum ProbeSide { 21 | L, 22 | R, 23 | } 24 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | version: 2 19 | updates: 20 | - package-ecosystem: cargo 21 | directory: "/" 22 | schedule: 23 | interval: daily 24 | open-pull-requests-limit: 10 25 | target-branch: master 26 | labels: [auto-dependencies] 27 | -------------------------------------------------------------------------------- /native-engine/datafusion-ext-plans/src/common/mod.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one or more 2 | // contributor license agreements. See the NOTICE file distributed with 3 | // this work for additional information regarding copyright ownership. 4 | // The ASF licenses this file to You under the Apache License, Version 2.0 5 | // (the "License"); you may not use this file except in compliance with 6 | // the License. You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | pub mod cached_exprs_evaluator; 17 | pub mod column_pruning; 18 | pub mod execution_context; 19 | pub mod key_rows_output; 20 | pub mod offsetted; 21 | pub mod row_null_checker; 22 | pub mod stream_exec; 23 | pub mod timer_helper; 24 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | [submodule "dev/tpcds_1g"] 19 | path = dev/tpcds_1g 20 | url = https://github.com/auron-project/tpcds_1g 21 | [submodule ".github/actions/setup-rust-toolchain"] 22 | path = .github/actions/setup-rust-toolchain 23 | url = https://github.com/actions-rust-lang/setup-rust-toolchain 24 | -------------------------------------------------------------------------------- /common/src/main/templates/org/apache/auron/common/ProjectConstants.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.auron.common; 19 | 20 | public final class ProjectConstants { 21 | public static final String PROJECT_VERSION = "${project.version}"; 22 | public static final String SHIM_NAME = "${shimName}"; 23 | } 24 | -------------------------------------------------------------------------------- /auron-core/src/test/java/org/apache/auron/jni/MockAuronAdaptorProvider.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.auron.jni; 18 | 19 | public class MockAuronAdaptorProvider implements AuronAdaptorProvider { 20 | @Override 21 | public AuronAdaptor create() { 22 | return new MockAuronAdaptor(); 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /spark-extension/src/main/java/org/apache/auron/jni/SparkAuronAdaptorProvider.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.auron.jni; 18 | 19 | public class SparkAuronAdaptorProvider implements AuronAdaptorProvider { 20 | @Override 21 | public AuronAdaptor create() { 22 | return new SparkAuronAdaptor(); 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /native-engine/auron-jni-bridge/Cargo.toml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | [package] 19 | name = "auron-jni-bridge" 20 | version = "0.1.0" 21 | edition = "2024" 22 | resolver = "1" 23 | 24 | [dependencies] 25 | datafusion = { workspace = true } 26 | jni = { workspace = true } 27 | log = { workspace = true } 28 | once_cell = { workspace = true } 29 | paste = { workspace = true } 30 | -------------------------------------------------------------------------------- /spark-extension/src/main/scala/org/apache/spark/sql/auron/AuronConvertProvider.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.spark.sql.auron 18 | 19 | import org.apache.spark.sql.execution.SparkPlan 20 | 21 | trait AuronConvertProvider { 22 | def isEnabled: Boolean 23 | 24 | def isSupported(exec: SparkPlan): Boolean 25 | 26 | def convert(exec: SparkPlan): SparkPlan 27 | } 28 | -------------------------------------------------------------------------------- /.github/workflows/labeler.yml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | name: "Pull Request Labeler" 19 | on: pull_request_target 20 | 21 | permissions: 22 | contents: read 23 | pull-requests: write 24 | 25 | jobs: 26 | triage: 27 | runs-on: ubuntu-24.04 28 | steps: 29 | - uses: actions/labeler@v4 30 | with: 31 | repo-token: "${{ secrets.GITHUB_TOKEN }}" 32 | sync-labels: true 33 | -------------------------------------------------------------------------------- /auron-spark-ui/src/main/scala/org/apache/spark/sql/execution/ui/AuronEventUtils.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.spark.sql.execution.ui 18 | 19 | import org.apache.spark.SparkContext 20 | 21 | import org.apache.auron.spark.ui.AuronEvent 22 | 23 | object AuronEventUtils { 24 | def post(sc: SparkContext, event: AuronEvent): Unit = { 25 | sc.listenerBus.post(event) 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /auron-spark-ui/src/main/scala/org/apache/auron/spark/ui/AuronEvent.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.auron.spark.ui 18 | 19 | import scala.collection.mutable 20 | 21 | import org.apache.spark.scheduler.SparkListenerEvent 22 | 23 | sealed trait AuronEvent extends SparkListenerEvent {} 24 | 25 | case class AuronBuildInfoEvent(info: mutable.LinkedHashMap[String, String]) extends AuronEvent {} 26 | -------------------------------------------------------------------------------- /.rat-excludes: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | **/.*/** 19 | **/*.json 20 | **/*.prefs 21 | **/*.log 22 | **/*.md 23 | **/*.iml 24 | **/target/** 25 | **/out/** 26 | **/spark-warehouse/** 27 | **/metastore_db/** 28 | **/licenses/LICENSE* 29 | **/licenses-binary/LICENSE* 30 | **/dependency-reduced-pom.xml 31 | **/scalastyle-output.xml 32 | NOTICE* 33 | docs/** 34 | build/apache-maven-*/** 35 | src/main/resources/auron-build-info.properties 36 | -------------------------------------------------------------------------------- /spark-extension/src/main/scala/org/apache/spark/sql/execution/auron/shuffle/RssPartitionWriterBase.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.spark.sql.execution.auron.shuffle 18 | 19 | import java.nio.ByteBuffer 20 | 21 | trait RssPartitionWriterBase { 22 | def write(partitionId: Int, buffer: ByteBuffer): Unit 23 | def flush(): Unit 24 | def close(success: Boolean): Unit 25 | def getPartitionLengthMap: Array[Long] 26 | } 27 | -------------------------------------------------------------------------------- /spark-extension/src/main/java/org/apache/spark/memory/auron/OnHeapSpillManagerHelper.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.spark.memory.auron; 18 | 19 | import org.apache.spark.SparkEnv$; 20 | import org.apache.spark.memory.MemoryPool; 21 | 22 | public class OnHeapSpillManagerHelper { 23 | public static MemoryPool getOnHeapExecutionMemoryPool() { 24 | return SparkEnv$.MODULE$.get().memoryManager().onHeapExecutionMemoryPool(); 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /.asf.yaml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | github: 17 | description: "The Auron accelerator for distributed computing framework (e.g., Spark) leverages native vectorized execution to accelerate query processing" 18 | homepage: https://auron.apache.org/ 19 | labels: 20 | - rust-lang 21 | - big-data 22 | - spark 23 | - datafusion 24 | - arrow 25 | enabled_merge_buttons: 26 | squash: true 27 | squash_commit_message: PR_TITLE_AND_DESC 28 | merge: false 29 | rebase: false 30 | -------------------------------------------------------------------------------- /thirdparty/auron-uniffle/src/main/scala/org/apache/spark/shuffle/uniffle/RssShuffleHandleWrapper.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.spark.shuffle.uniffle 18 | 19 | import org.apache.spark.shuffle.{BaseShuffleHandle, RssShuffleHandle} 20 | 21 | class RssShuffleHandleWrapper[K, V, C](val rssShuffleHandleInfo: RssShuffleHandle[K, V, C]) 22 | extends BaseShuffleHandle[K, V, C]( 23 | rssShuffleHandleInfo.getShuffleId, 24 | rssShuffleHandleInfo.getDependency) {} 25 | -------------------------------------------------------------------------------- /spark-extension/src/main/scala/org/apache/spark/sql/execution/auron/shuffle/AuronRssShuffleReaderBase.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.spark.sql.execution.auron.shuffle 18 | 19 | import org.apache.spark.TaskContext 20 | import org.apache.spark.shuffle.BaseShuffleHandle 21 | 22 | abstract class AuronRssShuffleReaderBase[K, C]( 23 | handle: BaseShuffleHandle[K, _, C], 24 | context: TaskContext) 25 | extends AuronBlockStoreShuffleReaderBase[K, C](handle, context) {} 26 | -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | edition = "2021" 19 | unstable_features = true 20 | 21 | max_width = 100 22 | wrap_comments = true 23 | format_code_in_doc_comments = true 24 | format_macro_bodies = true 25 | format_macro_matchers = true 26 | normalize_comments = true 27 | normalize_doc_attributes = true 28 | condense_wildcard_suffixes = true 29 | newline_style = "Unix" 30 | use_field_init_shorthand = true 31 | use_try_shorthand = true 32 | imports_granularity = "Crate" 33 | group_imports = "StdExternalCrate" 34 | -------------------------------------------------------------------------------- /spark-extension-shims-spark/src/main/scala/org/apache/spark/sql/execution/auron/plan/NativeOrcScanExec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.spark.sql.execution.auron.plan 18 | 19 | import org.apache.spark.sql.execution.FileSourceScanExec 20 | 21 | case class NativeOrcScanExec(basedFileScan: FileSourceScanExec) 22 | extends NativeOrcScanBase(basedFileScan) { 23 | 24 | override def simpleString(maxFields: Int): String = 25 | s"$nodeName (${basedFileScan.simpleString(maxFields)})" 26 | } 27 | -------------------------------------------------------------------------------- /spark-extension/src/main/scala/org/apache/spark/sql/hive/auron/HiveClientHelper.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.spark.sql.hive.auron 18 | 19 | import org.apache.hadoop.hive.ql.metadata.Table 20 | import org.apache.spark.sql.catalyst.catalog.CatalogTable 21 | import org.apache.spark.sql.hive.client.HiveClientImpl 22 | 23 | object HiveClientHelper { 24 | def toHiveTable(table: CatalogTable, userName: Option[String] = None): Table = { 25 | HiveClientImpl.toHiveTable(table, userName) 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /spark-extension-shims-spark/src/main/scala/org/apache/spark/sql/execution/auron/plan/NativeParquetScanExec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.spark.sql.execution.auron.plan 18 | 19 | import org.apache.spark.sql.execution.FileSourceScanExec 20 | 21 | case class NativeParquetScanExec(basedFileScan: FileSourceScanExec) 22 | extends NativeParquetScanBase(basedFileScan) { 23 | 24 | override def simpleString(maxFields: Int): String = 25 | s"$nodeName (${basedFileScan.simpleString(maxFields)})" 26 | } 27 | -------------------------------------------------------------------------------- /native-engine/datafusion-ext-exprs/Cargo.toml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | [package] 19 | name = "datafusion-ext-exprs" 20 | version = "0.1.0" 21 | edition = "2024" 22 | resolver = "1" 23 | 24 | [dependencies] 25 | arrow = { workspace = true } 26 | auron-jni-bridge = { workspace = true } 27 | datafusion = { workspace = true } 28 | datafusion-ext-commons = { workspace = true } 29 | 30 | itertools = { workspace = true } 31 | jni = { workspace = true } 32 | log = { workspace = true } 33 | once_cell = { workspace = true } 34 | parking_lot = { workspace = true } 35 | -------------------------------------------------------------------------------- /spark-extension-shims-spark/src/main/scala/org/apache/spark/sql/execution/auron/shuffle/AuronShuffleWriter.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.spark.sql.execution.auron.shuffle 18 | 19 | import org.apache.spark.shuffle.ShuffleWriteMetricsReporter 20 | 21 | import org.apache.auron.sparkver 22 | 23 | class AuronShuffleWriter[K, V](metrics: ShuffleWriteMetricsReporter) 24 | extends AuronShuffleWriterBase[K, V](metrics) { 25 | 26 | @sparkver("3.2 / 3.3 / 3.4 / 3.5") 27 | override def getPartitionLengths(): Array[Long] = partitionLengths 28 | } 29 | -------------------------------------------------------------------------------- /auron-spark-ui/src/main/scala/org/apache/spark/sql/execution/ui/AuronSQLTab.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.spark.sql.execution.ui 18 | 19 | import org.apache.spark.internal.Logging 20 | import org.apache.spark.ui.{SparkUI, SparkUITab} 21 | 22 | class AuronSQLTab(val sqlStore: AuronSQLAppStatusStore, sparkUI: SparkUI) 23 | extends SparkUITab(sparkUI, "auron") 24 | with Logging { 25 | 26 | override val name = "Auron" 27 | 28 | val parent = sparkUI 29 | attachPage(new AuronAllExecutionsPage(this)) 30 | parent.attachTab(this) 31 | } 32 | -------------------------------------------------------------------------------- /scalafmt.conf: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | align = none 19 | align.openParenDefnSite = false 20 | align.openParenCallSite = false 21 | align.tokens = [] 22 | importSelectors = "singleLine" 23 | optIn = { 24 | configStyleArguments = false 25 | } 26 | danglingParentheses.preset = false 27 | docstrings.style = Asterisk 28 | maxColumn = 98 29 | runner.dialect = scala212 30 | version = 3.9.9 31 | rewrite.imports.groups = [ 32 | ["javax?\\..*"], 33 | ["scala\\..*"], 34 | ["(?!org\\.apache\\.auron\\.).*"], 35 | ["org\\.apache\\.auron\\..*"] 36 | ] 37 | rewrite.imports.sort = scalastyle 38 | rewrite.rules = [Imports, SortModifiers] 39 | -------------------------------------------------------------------------------- /native-engine/datafusion-ext-functions/Cargo.toml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | [package] 19 | name = "datafusion-ext-functions" 20 | version = "0.1.0" 21 | edition = "2024" 22 | resolver = "1" 23 | 24 | [dependencies] 25 | arrow = { workspace = true } 26 | auron-jni-bridge = { workspace = true } 27 | datafusion = { workspace = true } 28 | datafusion-ext-commons = { workspace = true } 29 | 30 | itertools = { workspace = true } 31 | log = { workspace = true } 32 | num = { workspace = true } 33 | paste = { workspace = true } 34 | serde_json = { workspace = true } 35 | sonic-rs = { workspace = true } 36 | chrono = "0.4.42" 37 | chrono-tz = "0.10.4" 38 | -------------------------------------------------------------------------------- /native-engine/auron-memmgr/Cargo.toml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | [package] 19 | name = "auron-memmgr" 20 | version = "0.1.0" 21 | edition = "2024" 22 | 23 | [dependencies] 24 | auron-jni-bridge = { workspace = true } 25 | datafusion = { workspace = true } 26 | datafusion-ext-commons = { workspace = true } 27 | 28 | async-trait = { workspace = true } 29 | bytesize = { workspace = true } 30 | jni = { workspace = true } 31 | log = { workspace = true } 32 | once_cell = { workspace = true } 33 | tempfile = { workspace = true } 34 | parking_lot = { workspace = true } 35 | 36 | [target.'cfg(target_os = "linux")'.dependencies] 37 | procfs = { workspace = true } 38 | -------------------------------------------------------------------------------- /spark-extension/src/main/scala/org/apache/spark/sql/auron/NativeSupports.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.spark.sql.auron 18 | 19 | import org.apache.spark.rdd.RDD 20 | import org.apache.spark.sql.catalyst.InternalRow 21 | import org.apache.spark.sql.execution.SparkPlan 22 | 23 | trait NativeSupports extends SparkPlan { 24 | protected def doExecuteNative(): NativeRDD 25 | 26 | override protected def doExecute(): RDD[InternalRow] = doExecuteNative() 27 | 28 | def executeNative(): NativeRDD = executeQuery { 29 | doExecuteNative() 30 | } 31 | 32 | def shuffleReadFull: Boolean = Shims.get.getRDDShuffleReadFull(this.doExecuteNative()) 33 | } 34 | -------------------------------------------------------------------------------- /auron-spark-ui/src/main/scala/org/apache/spark/sql/execution/ui/AuronSQLAppStatusStore.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.spark.sql.execution.ui 18 | 19 | import com.fasterxml.jackson.annotation.JsonIgnore 20 | import org.apache.spark.util.kvstore.{KVIndex, KVStore} 21 | 22 | class AuronSQLAppStatusStore(store: KVStore) { 23 | 24 | def buildInfo(): AuronBuildInfoUIData = { 25 | val kClass = classOf[AuronBuildInfoUIData] 26 | store.read(kClass, kClass.getName) 27 | } 28 | } 29 | 30 | class AuronBuildInfoUIData(val info: Seq[(String, String)]) { 31 | @JsonIgnore 32 | @KVIndex 33 | def id: String = classOf[AuronBuildInfoUIData].getName() 34 | } 35 | -------------------------------------------------------------------------------- /thirdparty/auron-iceberg/src/test/scala/org/apache/auron/iceberg/AuronIcebergIntegrationSuite.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.auron.iceberg 18 | 19 | import org.apache.spark.sql.Row 20 | 21 | class AuronIcebergIntegrationSuite 22 | extends org.apache.spark.sql.QueryTest 23 | with BaseAuronIcebergSuite { 24 | 25 | test("test iceberg integrate ") { 26 | withTable("local.db.t1") { 27 | sql( 28 | "create table local.db.t1 using iceberg PARTITIONED BY (part) as select 1 as c1, 2 as c2, 'test test' as part") 29 | val df = sql("select * from local.db.t1") 30 | checkAnswer(df, Seq(Row(1, 2, "test test"))) 31 | } 32 | } 33 | 34 | } 35 | -------------------------------------------------------------------------------- /native-engine/auron-serde/Cargo.toml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | [package] 19 | name = "auron-serde" 20 | version = "0.1.0" 21 | edition = "2024" 22 | 23 | [features] 24 | default = ["prost/no-recursion-limit"] 25 | 26 | [dependencies] 27 | arrow = { workspace = true } 28 | datafusion = { workspace = true } 29 | datafusion-ext-exprs = { workspace = true } 30 | datafusion-ext-functions = { workspace = true } 31 | datafusion-ext-plans = { workspace = true } 32 | datafusion-spark = { workspace = true } 33 | 34 | base64 = { workspace = true } 35 | object_store = { workspace = true } 36 | parking_lot = { workspace = true } 37 | prost = { workspace = true } 38 | 39 | [build-dependencies] 40 | tonic-build = { workspace = true } 41 | -------------------------------------------------------------------------------- /spark-extension-shims-spark/src/main/scala/org/apache/spark/sql/execution/auron/plan/ConvertToNativeExec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.spark.sql.execution.auron.plan 18 | 19 | import org.apache.spark.sql.execution.SparkPlan 20 | 21 | import org.apache.auron.sparkver 22 | 23 | case class ConvertToNativeExec(override val child: SparkPlan) extends ConvertToNativeBase(child) { 24 | 25 | @sparkver("3.2 / 3.3 / 3.4 / 3.5") 26 | override protected def withNewChildInternal(newChild: SparkPlan): SparkPlan = 27 | copy(child = newChild) 28 | 29 | @sparkver("3.0 / 3.1") 30 | override def withNewChildren(newChildren: Seq[SparkPlan]): SparkPlan = 31 | copy(child = newChildren.head) 32 | } 33 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | # Gradle 19 | .gradle/ 20 | build/ 21 | lib/ 22 | 23 | # Generated Visual Studio files 24 | *.vcxproj 25 | *.vcxproj.* 26 | *.sln 27 | *.iml 28 | .vscode 29 | 30 | # IDEA 31 | .idea/ 32 | 33 | # Misc 34 | *.orig 35 | .*.swp 36 | .*.swo 37 | *.cache 38 | .metals/ 39 | 40 | # macOS 41 | .DS_Store 42 | 43 | # docker volumes used for caching 44 | .docker 45 | docker_cache 46 | 47 | # Rust 48 | target/ 49 | 50 | # Protoc generated src 51 | spark-extension/src/generated-sources 52 | 53 | # exclude native engine build lib directory 54 | native-engine/_build 55 | 56 | /target-docker/ 57 | 58 | # exclude org.apache.auron.common.AuronBuildInfo 59 | common/src/main/resources/auron-build-info.properties 60 | -------------------------------------------------------------------------------- /spark-extension/src/main/scala/org/apache/spark/sql/auron/util/AuronLogUtils.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.spark.sql.auron.util 18 | 19 | import org.apache.spark.internal.Logging 20 | import org.apache.spark.sql.auron.Shims 21 | import org.apache.spark.sql.execution.SparkPlan 22 | 23 | object AuronLogUtils extends Logging { 24 | 25 | def logDebugPlanConversion(plan: SparkPlan, fields: => Seq[(String, Any)] = Nil): Unit = { 26 | if (log.isDebugEnabled) { 27 | val header = s"Converting ${plan.nodeName}: ${Shims.get.simpleStringWithNodeId(plan)}" 28 | val body = fields.map { case (k, v) => s" $k: $v" }.mkString("\n") 29 | logDebug(s"$header\n$body".trim) 30 | } 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /spark-extension/src/main/java/org/apache/spark/sql/execution/auron/plan/NativeParquetSinkUtils.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.spark.sql.execution.auron.plan; 18 | 19 | // for jni_bridge usage 20 | @SuppressWarnings("unused") 21 | public class NativeParquetSinkUtils { 22 | public static String getTaskOutputPath() throws InterruptedException { 23 | return ParquetSinkTaskContext$.MODULE$.get().processingOutputFiles().take(); 24 | } 25 | 26 | public static void completeOutput(String path, long numRows, long numFiles) { 27 | OutputFileStat stat = new OutputFileStat(path, numRows, numFiles); 28 | ParquetSinkTaskContext$.MODULE$.get().processedOutputFiles().push(stat); 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /spark-extension-shims-spark/src/main/scala/org/apache/spark/sql/execution/auron/plan/NativeLocalLimitExec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.spark.sql.execution.auron.plan 18 | 19 | import org.apache.spark.sql.execution.SparkPlan 20 | 21 | import org.apache.auron.sparkver 22 | 23 | case class NativeLocalLimitExec(limit: Long, override val child: SparkPlan) 24 | extends NativeLocalLimitBase(limit, child) { 25 | 26 | @sparkver("3.2 / 3.3 / 3.4 / 3.5") 27 | override protected def withNewChildInternal(newChild: SparkPlan): SparkPlan = 28 | copy(child = newChild) 29 | 30 | @sparkver("3.0 / 3.1") 31 | override def withNewChildren(newChildren: Seq[SparkPlan]): SparkPlan = 32 | copy(child = newChildren.head) 33 | } 34 | -------------------------------------------------------------------------------- /spark-extension-shims-spark/src/main/scala/org/apache/spark/sql/execution/auron/plan/NativeCollectLimitExec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.spark.sql.execution.auron.plan 18 | 19 | import org.apache.spark.sql.execution.SparkPlan 20 | 21 | import org.apache.auron.sparkver 22 | 23 | case class NativeCollectLimitExec(limit: Int, override val child: SparkPlan) 24 | extends NativeCollectLimitBase(limit, child) { 25 | 26 | @sparkver("3.2 / 3.3 / 3.4 / 3.5") 27 | override protected def withNewChildInternal(newChild: SparkPlan): SparkPlan = 28 | copy(child = newChild) 29 | 30 | @sparkver("3.0 / 3.1") 31 | override def withNewChildren(newChildren: Seq[SparkPlan]): SparkPlan = 32 | copy(child = newChildren.head) 33 | } 34 | -------------------------------------------------------------------------------- /spark-extension-shims-spark/src/main/scala/org/apache/spark/sql/execution/auron/plan/NativeGlobalLimitExec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.spark.sql.execution.auron.plan 18 | 19 | import org.apache.spark.sql.execution.SparkPlan 20 | 21 | import org.apache.auron.sparkver 22 | 23 | case class NativeGlobalLimitExec(limit: Long, override val child: SparkPlan) 24 | extends NativeGlobalLimitBase(limit, child) { 25 | 26 | @sparkver("3.2 / 3.3 / 3.4 / 3.5") 27 | override protected def withNewChildInternal(newChild: SparkPlan): SparkPlan = 28 | copy(child = newChild) 29 | 30 | @sparkver("3.0 / 3.1") 31 | override def withNewChildren(newChildren: Seq[SparkPlan]): SparkPlan = 32 | copy(child = newChildren.head) 33 | } 34 | -------------------------------------------------------------------------------- /spark-extension-shims-spark/src/test/scala/org/apache/auron/EmptyNativeRddSuite.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.auron 18 | 19 | import org.apache.spark.sql.AuronQueryTest 20 | import org.apache.spark.sql.auron.EmptyNativeRDD 21 | 22 | class EmptyNativeRddSuite extends AuronQueryTest with BaseAuronSQLSuite { 23 | 24 | test("test empty native rdd") { 25 | val sc = spark.sparkContext 26 | val empty = new EmptyNativeRDD(sc) 27 | assert(empty.count === 0) 28 | assert(empty.collect().size === 0) 29 | 30 | val thrown = intercept[UnsupportedOperationException] { 31 | empty.reduce((row1, _) => { 32 | row1 33 | }) 34 | } 35 | assert(thrown.getMessage.contains("empty")) 36 | } 37 | 38 | } 39 | -------------------------------------------------------------------------------- /common/src/test/scala/org/apache/auron/util/AuronTestUtils.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.auron.util 18 | 19 | import org.apache.spark.SPARK_VERSION 20 | 21 | object AuronTestUtils { 22 | 23 | lazy val SPARK_RUNTIME_VERSION: SemanticVersion = SemanticVersion(SPARK_VERSION) 24 | lazy val isSparkV30OrGreater: Boolean = SPARK_RUNTIME_VERSION >= "3.0" 25 | lazy val isSparkV31OrGreater: Boolean = SPARK_RUNTIME_VERSION >= "3.1" 26 | lazy val isSparkV32OrGreater: Boolean = SPARK_RUNTIME_VERSION >= "3.2" 27 | lazy val isSparkV33OrGreater: Boolean = SPARK_RUNTIME_VERSION >= "3.3" 28 | lazy val isSparkV34OrGreater: Boolean = SPARK_RUNTIME_VERSION >= "3.4" 29 | lazy val isSparkV35OrGreater: Boolean = SPARK_RUNTIME_VERSION >= "3.5" 30 | } 31 | -------------------------------------------------------------------------------- /native-engine/datafusion-ext-commons/src/hash/mod.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one or more 2 | // contributor license agreements. See the NOTICE file distributed with 3 | // this work for additional information regarding copyright ownership. 4 | // The ASF licenses this file to You under the Apache License, Version 2.0 5 | // (the "License"); you may not use this file except in compliance with 6 | // the License. You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | pub mod mur; 17 | pub mod xxhash; 18 | 19 | fn read32(data: &[u8], offset: usize) -> u32 { 20 | let v = unsafe { 21 | // safety: boundary check is done by caller 22 | std::ptr::read_unaligned(data.as_ptr().add(offset) as *const u32) 23 | }; 24 | if cfg!(target_endian = "big") { 25 | return v.swap_bytes(); 26 | } 27 | v 28 | } 29 | 30 | fn read64(data: &[u8], offset: usize) -> u64 { 31 | let v = unsafe { 32 | // safety: boundary check is done by caller 33 | std::ptr::read_unaligned(data.as_ptr().add(offset) as *const u64) 34 | }; 35 | if cfg!(target_endian = "big") { 36 | return v.swap_bytes(); 37 | } 38 | v 39 | } 40 | -------------------------------------------------------------------------------- /common/src/test/resources/log4j2.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | rootLogger.level = info 19 | rootLogger.appenderRef.file.ref = File 20 | 21 | #File Appender 22 | appender.file.type = File 23 | appender.file.name = File 24 | appender.file.fileName = target/unit-tests.log 25 | appender.file.layout.type = PatternLayout 26 | appender.file.layout.pattern = %d{HH:mm:ss.SSS} %t %p %c{1}: %m%n%ex 27 | 28 | #Console Appender 29 | appender.console.type = Console 30 | appender.console.name = STDOUT 31 | appender.console.target = SYSTEM_OUT 32 | appender.console.layout.type = PatternLayout 33 | appender.console.layout.pattern = %d{HH:mm:ss.SSS} %p %c: %maxLen{%m}{512}%n%ex{8}%n 34 | appender.console.filter.threshold.type = ThresholdFilter 35 | appender.console.filter.threshold.level = warn 36 | 37 | -------------------------------------------------------------------------------- /spark-extension-shims-spark/src/test/scala/org/apache/auron/BaseAuronSQLSuite.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.auron 18 | 19 | import org.apache.spark.SparkConf 20 | import org.apache.spark.sql.test.SharedSparkSession 21 | 22 | trait BaseAuronSQLSuite extends SharedSparkSession { 23 | 24 | override protected def sparkConf: SparkConf = { 25 | super.sparkConf 26 | .set("spark.sql.extensions", "org.apache.spark.sql.auron.AuronSparkSessionExtension") 27 | .set( 28 | "spark.shuffle.manager", 29 | "org.apache.spark.sql.execution.auron.shuffle.AuronShuffleManager") 30 | .set("spark.memory.offHeap.enabled", "false") 31 | .set("spark.auron.enable", "true") 32 | .set("spark.ui.enabled", "false") 33 | } 34 | 35 | } 36 | -------------------------------------------------------------------------------- /auron-core/src/test/resources/log4j2.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | rootLogger.level = info 19 | rootLogger.appenderRef.file.ref = File 20 | 21 | #File Appender 22 | appender.file.type = File 23 | appender.file.name = File 24 | appender.file.fileName = target/unit-tests.log 25 | appender.file.layout.type = PatternLayout 26 | appender.file.layout.pattern = %d{HH:mm:ss.SSS} %t %p %c{1}: %m%n%ex 27 | 28 | #Console Appender 29 | appender.console.type = Console 30 | appender.console.name = STDOUT 31 | appender.console.target = SYSTEM_OUT 32 | appender.console.layout.type = PatternLayout 33 | appender.console.layout.pattern = %d{HH:mm:ss.SSS} %p %c: %maxLen{%m}{512}%n%ex{8}%n 34 | appender.console.filter.threshold.type = ThresholdFilter 35 | appender.console.filter.threshold.level = warn 36 | 37 | -------------------------------------------------------------------------------- /.github/workflows/style.yml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | name: Style check 19 | 20 | on: 21 | push: 22 | branches: 23 | - master 24 | - branch-* 25 | pull_request: 26 | branches: 27 | - master 28 | - branch-* 29 | 30 | concurrency: 31 | group: style-${{ github.ref }} 32 | cancel-in-progress: true 33 | 34 | jobs: 35 | style: 36 | name: Style 37 | runs-on: ubuntu-24.04 38 | strategy: 39 | fail-fast: false 40 | steps: 41 | - uses: actions/checkout@v4 42 | - name: Setup JDK 8 43 | uses: actions/setup-java@v4 44 | with: 45 | distribution: 'adopt-hotspot' 46 | java-version: 8 47 | cache: 'maven' 48 | check-latest: false 49 | - run: | 50 | ./dev/reformat --check 51 | -------------------------------------------------------------------------------- /spark-extension/src/test/resources/log4j2.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | rootLogger.level = info 19 | rootLogger.appenderRef.file.ref = File 20 | 21 | #File Appender 22 | appender.file.type = File 23 | appender.file.name = File 24 | appender.file.fileName = target/unit-tests.log 25 | appender.file.layout.type = PatternLayout 26 | appender.file.layout.pattern = %d{HH:mm:ss.SSS} %t %p %c{1}: %m%n%ex 27 | 28 | #Console Appender 29 | appender.console.type = Console 30 | appender.console.name = STDOUT 31 | appender.console.target = SYSTEM_OUT 32 | appender.console.layout.type = PatternLayout 33 | appender.console.layout.pattern = %d{HH:mm:ss.SSS} %p %c: %maxLen{%m}{512}%n%ex{8}%n 34 | appender.console.filter.threshold.type = ThresholdFilter 35 | appender.console.filter.threshold.level = warn 36 | 37 | -------------------------------------------------------------------------------- /spark-extension-shims-spark/src/test/resources/log4j2.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | rootLogger.level = info 19 | rootLogger.appenderRef.file.ref = File 20 | 21 | #File Appender 22 | appender.file.type = File 23 | appender.file.name = File 24 | appender.file.fileName = target/unit-tests.log 25 | appender.file.layout.type = PatternLayout 26 | appender.file.layout.pattern = %d{HH:mm:ss.SSS} %t %p %c{1}: %m%n%ex 27 | 28 | #Console Appender 29 | appender.console.type = Console 30 | appender.console.name = STDOUT 31 | appender.console.target = SYSTEM_OUT 32 | appender.console.layout.type = PatternLayout 33 | appender.console.layout.pattern = %d{HH:mm:ss.SSS} %p %c: %maxLen{%m}{512}%n%ex{8}%n 34 | appender.console.filter.threshold.type = ThresholdFilter 35 | appender.console.filter.threshold.level = warn 36 | 37 | -------------------------------------------------------------------------------- /spark-extension-shims-spark/src/main/scala/org/apache/spark/sql/execution/auron/plan/NativeFilterExec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.spark.sql.execution.auron.plan 18 | 19 | import org.apache.spark.sql.catalyst.expressions.Expression 20 | import org.apache.spark.sql.execution.SparkPlan 21 | 22 | import org.apache.auron.sparkver 23 | 24 | case class NativeFilterExec(condition: Expression, override val child: SparkPlan) 25 | extends NativeFilterBase(condition, child) { 26 | 27 | @sparkver("3.2 / 3.3 / 3.4 / 3.5") 28 | override protected def withNewChildInternal(newChild: SparkPlan): SparkPlan = 29 | copy(child = newChild) 30 | 31 | @sparkver("3.0 / 3.1") 32 | override def withNewChildren(newChildren: Seq[SparkPlan]): SparkPlan = 33 | copy(child = newChildren.head) 34 | } 35 | -------------------------------------------------------------------------------- /spark-extension-shims-spark/src/main/java/org/apache/spark/sql/auron/ForceApplyShuffledHashJoinInterceptor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.spark.sql.auron; 18 | 19 | import net.bytebuddy.implementation.bind.annotation.Argument; 20 | import net.bytebuddy.implementation.bind.annotation.RuntimeType; 21 | import org.slf4j.Logger; 22 | import org.slf4j.LoggerFactory; 23 | 24 | public class ForceApplyShuffledHashJoinInterceptor { 25 | private static final Logger logger = LoggerFactory.getLogger(ForceApplyShuffledHashJoinInterceptor.class); 26 | 27 | @RuntimeType 28 | public static Object intercept(@Argument(0) Object conf) { 29 | logger.debug("calling JoinSelectionHelper.forceApplyShuffledHashJoin() intercepted by auron"); 30 | return AuronConf.FORCE_SHUFFLED_HASH_JOIN.booleanConf(); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /spark-extension-shims-spark/src/main/scala/org/apache/spark/sql/execution/auron/plan/NativeSortExec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.spark.sql.execution.auron.plan 18 | 19 | import org.apache.spark.sql.catalyst.expressions.SortOrder 20 | import org.apache.spark.sql.execution.SparkPlan 21 | 22 | import org.apache.auron.sparkver 23 | 24 | case class NativeSortExec( 25 | sortOrder: Seq[SortOrder], 26 | global: Boolean, 27 | override val child: SparkPlan) 28 | extends NativeSortBase(sortOrder, global, child) { 29 | 30 | @sparkver("3.2 / 3.3 / 3.4 / 3.5") 31 | override protected def withNewChildInternal(newChild: SparkPlan): SparkPlan = 32 | copy(child = newChild) 33 | 34 | @sparkver("3.0 / 3.1") 35 | override def withNewChildren(newChildren: Seq[SparkPlan]): SparkPlan = 36 | copy(child = newChildren.head) 37 | } 38 | -------------------------------------------------------------------------------- /spark-extension-shims-spark/src/main/scala/org/apache/spark/sql/execution/auron/plan/NativeUnionExec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.spark.sql.execution.auron.plan 18 | 19 | import org.apache.spark.sql.catalyst.expressions.Attribute 20 | import org.apache.spark.sql.execution.SparkPlan 21 | 22 | import org.apache.auron.sparkver 23 | 24 | case class NativeUnionExec( 25 | override val children: Seq[SparkPlan], 26 | override val output: Seq[Attribute]) 27 | extends NativeUnionBase(children, output) { 28 | 29 | @sparkver("3.2 / 3.3 / 3.4 / 3.5") 30 | override protected def withNewChildrenInternal(newChildren: IndexedSeq[SparkPlan]): SparkPlan = 31 | copy(children = newChildren) 32 | 33 | @sparkver("3.0 / 3.1") 34 | override def withNewChildren(newChildren: Seq[SparkPlan]): SparkPlan = 35 | copy(children = newChildren) 36 | } 37 | -------------------------------------------------------------------------------- /spark-extension/src/main/scala/org/apache/spark/sql/execution/auron/columnar/AuronColumnarMap.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.spark.sql.execution.auron.columnar 18 | 19 | import org.apache.spark.sql.catalyst.util.ArrayBasedMapData 20 | import org.apache.spark.sql.catalyst.util.ArrayData 21 | import org.apache.spark.sql.catalyst.util.MapData 22 | 23 | class AuronColumnarMap( 24 | keys: AuronColumnVector, 25 | values: AuronColumnVector, 26 | offset: Int, 27 | private val length: Int) 28 | extends MapData { 29 | 30 | override def numElements: Int = length 31 | 32 | override def keyArray: ArrayData = new AuronColumnarArray(keys, offset, length) 33 | 34 | override def valueArray: ArrayData = new AuronColumnarArray(values, offset, length) 35 | 36 | override def copy = new ArrayBasedMapData(keyArray.copy, valueArray.copy) 37 | } 38 | -------------------------------------------------------------------------------- /spark-extension/src/main/scala/org/apache/spark/sql/execution/auron/columnar/ColumnarHelper.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.spark.sql.execution.auron.columnar 18 | 19 | import scala.collection.JavaConverters._ 20 | 21 | import org.apache.arrow.vector.VectorSchemaRoot 22 | 23 | object ColumnarHelper { 24 | def rootRowsIter(root: VectorSchemaRoot): Iterator[AuronColumnarBatchRow] = { 25 | val row = rootRowReusable(root) 26 | val numRows = root.getRowCount 27 | Range(0, numRows).iterator.map { rowId => 28 | row.rowId = rowId 29 | row 30 | } 31 | } 32 | 33 | def rootRowReusable(root: VectorSchemaRoot): AuronColumnarBatchRow = { 34 | val vectors = root.getFieldVectors.asScala.toArray 35 | new AuronColumnarBatchRow( 36 | vectors.map(new AuronArrowColumnVector(_).asInstanceOf[AuronColumnVector])) 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /native-engine/datafusion-ext-exprs/src/lib.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one or more 2 | // contributor license agreements. See the NOTICE file distributed with 3 | // this work for additional information regarding copyright ownership. 4 | // The ASF licenses this file to You under the Apache License, Version 2.0 5 | // (the "License"); you may not use this file except in compliance with 6 | // the License. You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | use std::any::Any; 17 | 18 | use datafusion::physical_expr::{PhysicalExpr, PhysicalExprRef}; 19 | 20 | pub mod bloom_filter_might_contain; 21 | pub mod cast; 22 | pub mod get_indexed_field; 23 | pub mod get_map_value; 24 | pub mod named_struct; 25 | pub mod row_num; 26 | pub mod spark_scalar_subquery_wrapper; 27 | pub mod spark_udf_wrapper; 28 | pub mod string_contains; 29 | pub mod string_ends_with; 30 | pub mod string_starts_with; 31 | 32 | fn down_cast_any_ref(any: &dyn Any) -> &dyn Any { 33 | if any.is::() { 34 | any.downcast_ref::().unwrap().as_any() 35 | } else if any.is::>() { 36 | any.downcast_ref::>() 37 | .unwrap() 38 | .as_any() 39 | } else { 40 | any 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /spark-extension-shims-spark/src/main/scala/org/apache/spark/sql/execution/auron/plan/NativeTakeOrderedExec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.spark.sql.execution.auron.plan 18 | 19 | import org.apache.spark.sql.catalyst.expressions.SortOrder 20 | import org.apache.spark.sql.execution.SparkPlan 21 | 22 | import org.apache.auron.sparkver 23 | 24 | case class NativeTakeOrderedExec( 25 | limit: Long, 26 | sortOrder: Seq[SortOrder], 27 | override val child: SparkPlan) 28 | extends NativeTakeOrderedBase(limit, sortOrder, child) { 29 | 30 | @sparkver("3.2 / 3.3 / 3.4 / 3.5") 31 | override protected def withNewChildInternal(newChild: SparkPlan): SparkPlan = 32 | copy(child = newChild) 33 | 34 | @sparkver("3.0 / 3.1") 35 | override def withNewChildren(newChildren: Seq[SparkPlan]): SparkPlan = 36 | copy(child = newChildren.head) 37 | } 38 | -------------------------------------------------------------------------------- /spark-extension-shims-spark/src/test/scala/org/apache/auron/AuronSQLTestHelper.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.auron 18 | 19 | import org.apache.spark.sql.internal.SQLConf 20 | 21 | trait AuronSQLTestHelper { 22 | def withEnvConf(pairs: (String, String)*)(f: => Unit): Unit = { 23 | val conf = SQLConf.get 24 | val (keys, values) = pairs.unzip 25 | val currentValues = keys.map { key => 26 | if (conf.contains(key)) { 27 | Some(conf.getConfString(key)) 28 | } else { 29 | None 30 | } 31 | } 32 | (keys, values).zipped.foreach { (k, v) => 33 | conf.setConfString(k, v) 34 | } 35 | try f 36 | finally { 37 | keys.zip(currentValues).foreach { 38 | case (key, Some(value)) => conf.setConfString(key, value) 39 | case (key, None) => conf.unsetConf(key) 40 | } 41 | } 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /spark-extension-shims-spark/src/main/java/org/apache/spark/sql/auron/ValidateSparkPlanApplyInterceptor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.spark.sql.auron; 18 | 19 | import net.bytebuddy.implementation.bind.annotation.Argument; 20 | import net.bytebuddy.implementation.bind.annotation.RuntimeType; 21 | import org.apache.spark.sql.execution.SparkPlan; 22 | import org.slf4j.Logger; 23 | import org.slf4j.LoggerFactory; 24 | 25 | public class ValidateSparkPlanApplyInterceptor { 26 | private static final Logger logger = LoggerFactory.getLogger(ValidateSparkPlanApplyInterceptor.class); 27 | 28 | @RuntimeType 29 | public static Object intercept(@Argument(0) Object plan) { 30 | logger.debug("calling ValidateSparkPlan.apply() intercepted by auron"); 31 | InterceptedValidateSparkPlan$.MODULE$.validate((SparkPlan) plan); 32 | return plan; 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /thirdparty/auron-iceberg/src/test/scala/org/apache/auron/iceberg/BaseAuronIcebergSuite.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.auron.iceberg 18 | 19 | import org.apache.spark.SparkConf 20 | import org.apache.spark.sql.test.SharedSparkSession 21 | 22 | trait BaseAuronIcebergSuite extends SharedSparkSession { 23 | 24 | override protected def sparkConf: SparkConf = { 25 | super.sparkConf 26 | .set( 27 | "spark.sql.extensions", 28 | "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions") 29 | .set("spark.sql.catalog.spark_catalog", "org.apache.iceberg.spark.SparkSessionCatalog") 30 | .set("spark.sql.catalog.local", "org.apache.iceberg.spark.SparkCatalog") 31 | .set("spark.sql.catalog.local.type", "hadoop") 32 | .set("spark.sql.catalog.local.warehouse", "iceberg_warehouse") 33 | .set("spark.ui.enabled", "false") 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /spark-extension-shims-spark/src/test/scala/org/apache/auron/exec/AuronExecSuite.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.auron.exec 18 | 19 | import org.apache.spark.sql.AuronQueryTest 20 | import org.apache.spark.sql.execution.auron.plan.NativeCollectLimitExec 21 | 22 | import org.apache.auron.BaseAuronSQLSuite 23 | 24 | class AuronExecSuite extends AuronQueryTest with BaseAuronSQLSuite { 25 | 26 | test("Collect Limit") { 27 | withTable("t1") { 28 | sql("create table t1(id INT) using parquet") 29 | sql("insert into t1 values(1),(2),(3),(3),(3),(4),(5),(6),(7),(8),(9),(10)") 30 | Seq(1, 3, 8, 12, 20).foreach { limit => 31 | val df = checkSparkAnswerAndOperator(s"SELECT id FROM t1 limit $limit") 32 | assert(collectFirst(df.queryExecution.executedPlan) { case e: NativeCollectLimitExec => 33 | e 34 | }.isDefined) 35 | } 36 | } 37 | } 38 | 39 | } 40 | -------------------------------------------------------------------------------- /auron-spark-ui/src/main/scala/org/apache/spark/sql/execution/ui/AuronSQLHistoryServerPlugin.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.spark.sql.execution.ui 18 | 19 | import org.apache.spark.SparkConf 20 | import org.apache.spark.scheduler.SparkListener 21 | import org.apache.spark.status.{AppHistoryServerPlugin, ElementTrackingStore} 22 | import org.apache.spark.ui.SparkUI 23 | 24 | class AuronSQLHistoryServerPlugin extends AppHistoryServerPlugin { 25 | 26 | override def createListeners( 27 | conf: SparkConf, 28 | store: ElementTrackingStore): Seq[SparkListener] = { 29 | Seq(new AuronSQLAppStatusListener(conf, store)) 30 | } 31 | 32 | override def setupUI(ui: SparkUI): Unit = { 33 | val sqlStatusStore = new AuronSQLAppStatusStore(ui.store.store) 34 | if (sqlStatusStore.buildInfo() != null) { 35 | new AuronSQLTab(sqlStatusStore, ui) 36 | } 37 | } 38 | 39 | override def displayOrder: Int = 0 40 | } 41 | -------------------------------------------------------------------------------- /spark-extension-shims-spark/src/main/scala/org/apache/spark/sql/execution/auron/plan/NativeExpandExec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.spark.sql.execution.auron.plan 18 | 19 | import org.apache.spark.sql.catalyst.expressions.Attribute 20 | import org.apache.spark.sql.catalyst.expressions.Expression 21 | import org.apache.spark.sql.execution.SparkPlan 22 | 23 | import org.apache.auron.sparkver 24 | 25 | case class NativeExpandExec( 26 | projections: Seq[Seq[Expression]], 27 | override val output: Seq[Attribute], 28 | override val child: SparkPlan) 29 | extends NativeExpandBase(projections, output, child) { 30 | 31 | @sparkver("3.2 / 3.3 / 3.4 / 3.5") 32 | override protected def withNewChildInternal(newChild: SparkPlan): SparkPlan = 33 | copy(child = newChild) 34 | 35 | @sparkver("3.0 / 3.1") 36 | override def withNewChildren(newChildren: Seq[SparkPlan]): SparkPlan = 37 | copy(child = newChildren.head) 38 | } 39 | -------------------------------------------------------------------------------- /auron-core/src/test/java/org/apache/auron/functions/MockAuronUDFWrapperContext.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.auron.functions; 18 | 19 | import java.nio.ByteBuffer; 20 | 21 | /** 22 | * Mock class for AuronUDFWrapperContext. 23 | */ 24 | public class MockAuronUDFWrapperContext implements AuronUDFWrapperContext { 25 | 26 | public MockAuronUDFWrapperContext(ByteBuffer udfSerialized) { 27 | // Mock implementation, We can obtain some information required for initializing the UDF through 28 | // deserialization. 29 | byte[] bytes = new byte[udfSerialized.remaining()]; 30 | udfSerialized.get(bytes); 31 | // Deserialize the UDF information. 32 | // get the UDF class name and initialize the UDF. 33 | } 34 | 35 | @Override 36 | public void eval(long inputPtr, long outputPtr) { 37 | // Mock implementation, we can use the inputPtr and outputPtr to process the data. 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /auron-core/src/main/java/org/apache/auron/arrowio/AuronArrowFFIExporter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.auron.arrowio; 18 | 19 | /** 20 | * Abstract class for exporting data to Arrow format via FFI (Foreign Function Interface). 21 | * This class serves as a bridge between SQL's execution engine and Arrow data structures, 22 | * allowing efficient data transfer between JVM and native code. 23 | */ 24 | public abstract class AuronArrowFFIExporter implements AutoCloseable { 25 | 26 | /** 27 | * Exports the next batch of data to Arrow format. 28 | * This method is called by the native code to fetch the next batch of data 29 | * from the JVM side and convert it to Arrow format. 30 | * 31 | * @param contextPtr Native pointer to the Arrow FFI context 32 | * @return true if there is more data to export, false if all data has been exported 33 | */ 34 | public abstract boolean exportNextBatch(long contextPtr); 35 | } 36 | -------------------------------------------------------------------------------- /native-engine/datafusion-ext-commons/Cargo.toml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | [package] 19 | name = "datafusion-ext-commons" 20 | version = "0.1.0" 21 | edition = "2024" 22 | resolver = "1" 23 | 24 | [features] 25 | default = ["tokio/rt-multi-thread"] 26 | 27 | [dependencies] 28 | arrow = { workspace = true } 29 | arrow-schema = { workspace = true } 30 | auron-jni-bridge = { workspace = true } 31 | datafusion = { workspace = true } 32 | bigdecimal = { workspace = true } 33 | byteorder = { workspace = true } 34 | chrono = { workspace = true } 35 | itertools = { workspace = true } 36 | jni = { workspace = true } 37 | log = { workspace = true } 38 | lz4_flex = { workspace = true } 39 | num = { workspace = true } 40 | once_cell = { workspace = true } 41 | paste = { workspace = true } 42 | smallvec = { workspace = true } 43 | tokio = { workspace = true } 44 | transpose = { workspace = true } 45 | unchecked-index = { workspace = true } 46 | zstd = { workspace = true } 47 | 48 | [dev-dependencies] 49 | rand = { workspace = true } 50 | -------------------------------------------------------------------------------- /native-engine/auron-jni-bridge/src/lib.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one or more 2 | // contributor license agreements. See the NOTICE file distributed with 3 | // this work for additional information regarding copyright ownership. 4 | // The ASF licenses this file to You under the Apache License, Version 2.0 5 | // (the "License"); you may not use this file except in compliance with 6 | // the License. You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | use datafusion::common::{DataFusionError, Result}; 17 | 18 | pub mod conf; 19 | pub mod jni_bridge; 20 | 21 | pub fn is_jni_bridge_inited() -> bool { 22 | jni_bridge::JavaClasses::inited() 23 | } 24 | 25 | pub fn ensure_jni_bridge_inited() -> Result<()> { 26 | if is_jni_bridge_inited() { 27 | Ok(()) 28 | } else { 29 | Err(DataFusionError::Execution( 30 | "JNIEnv not initialized".to_string(), 31 | )) 32 | } 33 | } 34 | 35 | pub fn is_task_running() -> bool { 36 | fn is_task_running_impl() -> Result { 37 | if !jni_call_static!(JniBridge.isTaskRunning() -> bool).unwrap() { 38 | jni_exception_clear!()?; 39 | return Ok(false); 40 | } 41 | Ok(true) 42 | } 43 | if !is_jni_bridge_inited() { 44 | // only for testing 45 | return true; 46 | } 47 | is_task_running_impl().expect("calling JniBridge.isTaskRunning() error") 48 | } 49 | -------------------------------------------------------------------------------- /spark-extension-shims-spark/src/main/scala/org/apache/spark/sql/execution/auron/plan/NativePartialTakeOrderedExec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.spark.sql.execution.auron.plan 18 | 19 | import org.apache.spark.sql.catalyst.expressions.SortOrder 20 | import org.apache.spark.sql.execution.SparkPlan 21 | import org.apache.spark.sql.execution.metric.SQLMetric 22 | 23 | import org.apache.auron.sparkver 24 | 25 | case class NativePartialTakeOrderedExec( 26 | limit: Long, 27 | sortOrder: Seq[SortOrder], 28 | override val child: SparkPlan, 29 | override val metrics: Map[String, SQLMetric]) 30 | extends NativePartialTakeOrderedBase(limit, sortOrder, child, metrics) { 31 | 32 | @sparkver("3.2 / 3.3 / 3.4 / 3.5") 33 | override protected def withNewChildInternal(newChild: SparkPlan): SparkPlan = 34 | copy(child = newChild) 35 | 36 | @sparkver("3.0 / 3.1") 37 | override def withNewChildren(newChildren: Seq[SparkPlan]): SparkPlan = 38 | copy(child = newChildren.head) 39 | } 40 | -------------------------------------------------------------------------------- /auron-flink-extension/src/test/java/org/apache/auron/flink/table/runtime/AuronFlinkCalcITCase.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.auron.flink.table.runtime; 18 | 19 | import static org.assertj.core.api.Assertions.assertThat; 20 | 21 | import java.util.Arrays; 22 | import java.util.Comparator; 23 | import java.util.List; 24 | import org.apache.auron.flink.table.AuronFlinkTableTestBase; 25 | import org.apache.flink.types.Row; 26 | import org.apache.flink.util.CollectionUtil; 27 | import org.junit.jupiter.api.Test; 28 | 29 | /** 30 | * IT case for Flink Calc Operator on Auron. 31 | */ 32 | public class AuronFlinkCalcITCase extends AuronFlinkTableTestBase { 33 | 34 | @Test 35 | public void testPlus() { 36 | List rows = CollectionUtil.iteratorToList( 37 | tableEnvironment.executeSql("select `int` + 1 from T1").collect()); 38 | rows.sort(Comparator.comparingInt(o -> (int) o.getField(0))); 39 | assertThat(rows).isEqualTo(Arrays.asList(Row.of(2), Row.of(3), Row.of(3))); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /spark-extension-shims-spark/src/main/scala/org/apache/spark/sql/execution/auron/plan/NativeGenerateExec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.spark.sql.execution.auron.plan 18 | 19 | import org.apache.spark.sql.catalyst.expressions.Attribute 20 | import org.apache.spark.sql.catalyst.expressions.Generator 21 | import org.apache.spark.sql.execution.SparkPlan 22 | 23 | import org.apache.auron.sparkver 24 | 25 | case class NativeGenerateExec( 26 | generator: Generator, 27 | requiredChildOutput: Seq[Attribute], 28 | outer: Boolean, 29 | generatorOutput: Seq[Attribute], 30 | override val child: SparkPlan) 31 | extends NativeGenerateBase(generator, requiredChildOutput, outer, generatorOutput, child) { 32 | 33 | @sparkver("3.2 / 3.3 / 3.4 / 3.5") 34 | override protected def withNewChildInternal(newChild: SparkPlan): SparkPlan = 35 | copy(child = newChild) 36 | 37 | @sparkver("3.0 / 3.1") 38 | override def withNewChildren(newChildren: Seq[SparkPlan]): SparkPlan = 39 | copy(child = newChildren.head) 40 | } 41 | -------------------------------------------------------------------------------- /native-engine/datafusion-ext-plans/src/shuffle/rss.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one or more 2 | // contributor license agreements. See the NOTICE file distributed with 3 | // this work for additional information regarding copyright ownership. 4 | // The ASF licenses this file to You under the Apache License, Version 2.0 5 | // (the "License"); you may not use this file except in compliance with 6 | // the License. You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | use std::io::Write; 17 | 18 | use auron_jni_bridge::{jni_call, jni_new_direct_byte_buffer}; 19 | use jni::objects::GlobalRef; 20 | 21 | pub struct RssWriter { 22 | rss_partition_writer: GlobalRef, 23 | partition_id: usize, 24 | } 25 | 26 | impl RssWriter { 27 | pub fn new(rss_partition_writer: GlobalRef, partition_id: usize) -> Self { 28 | Self { 29 | rss_partition_writer, 30 | partition_id, 31 | } 32 | } 33 | } 34 | 35 | impl Write for RssWriter { 36 | fn write(&mut self, buf: &[u8]) -> std::io::Result { 37 | let buf_len = buf.len(); 38 | let buf = jni_new_direct_byte_buffer!(&buf)?; 39 | jni_call!( 40 | AuronRssPartitionWriterBase(self.rss_partition_writer.as_obj()) 41 | .write(self.partition_id as i32, buf.as_obj()) -> () 42 | )?; 43 | Ok(buf_len) 44 | } 45 | 46 | fn flush(&mut self) -> std::io::Result<()> { 47 | Ok(()) 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /hadoop-shim/src/main/scala/org/apache/auron/hadoop/fs/FSDataOutputWrapper.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.auron.hadoop.fs 18 | 19 | import java.io.EOFException 20 | import java.nio.ByteBuffer 21 | import java.nio.channels.Channels 22 | 23 | import org.apache.hadoop.fs.FSDataOutputStream 24 | 25 | trait FSDataOutputWrapper extends AutoCloseable { 26 | def writeFully(buf: ByteBuffer): Unit 27 | } 28 | 29 | object FSDataOutputWrapper { 30 | def wrap(output: FSDataOutputStream): FSDataOutputWrapper = { 31 | new SeekableFSDataOutputWrapper(output) 32 | } 33 | } 34 | 35 | class SeekableFSDataOutputWrapper(output: FSDataOutputStream) extends FSDataOutputWrapper { 36 | override def writeFully(buf: ByteBuffer): Unit = { 37 | output.synchronized { 38 | val channel = Channels.newChannel(output) 39 | while (buf.hasRemaining) if (channel.write(buf) == -1) { 40 | throw new EOFException("writeFullyToFSDataOutputStream() got unexpected EOF") 41 | } 42 | } 43 | } 44 | 45 | override def close(): Unit = output.close() 46 | } 47 | -------------------------------------------------------------------------------- /spark-extension/src/main/scala/org/apache/auron/metric/SparkMetricNode.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.auron.metric 18 | 19 | import scala.collection.JavaConverters._ 20 | 21 | import org.apache.spark.internal.Logging 22 | import org.apache.spark.sql.execution.metric.SQLMetric 23 | 24 | case class SparkMetricNode( 25 | metrics: Map[String, SQLMetric], 26 | children: Seq[MetricNode], 27 | metricValueHandler: Option[(String, Long) => Unit] = None) 28 | extends MetricNode(children.asJava) 29 | with Logging { 30 | 31 | override def getChild(i: Int): MetricNode = { 32 | if (i < children.length) { 33 | children(i) 34 | } else { 35 | null 36 | } 37 | } 38 | 39 | def add(metricName: String, v: Long): Unit = { 40 | metricValueHandler.foreach(_.apply(metricName, v)) 41 | if (v > 0) { 42 | metrics.get(metricName).foreach(_.add(v)) 43 | } 44 | } 45 | 46 | def foreach(fn: SparkMetricNode => Unit): Unit = { 47 | fn(this) 48 | this.children.foreach(_.asInstanceOf[SparkMetricNode].foreach(fn)) 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /native-engine/datafusion-ext-commons/src/scalar_value.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one or more 2 | // contributor license agreements. See the NOTICE file distributed with 3 | // this work for additional information regarding copyright ownership. 4 | // The ASF licenses this file to You under the Apache License, Version 2.0 5 | // (the "License"); you may not use this file except in compliance with 6 | // the License. You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | use arrow::array::{Array, UInt64Array}; 17 | use datafusion::{common, common::ScalarValue}; 18 | 19 | use crate::arrow::array_size::ArraySize; 20 | 21 | pub fn compacted_scalar_value_from_array( 22 | array: &dyn Array, 23 | i: usize, 24 | ) -> common::Result { 25 | if array.data_type().is_nested() { 26 | // avoid using sliced nested array for imprecise memory usage 27 | let taken = 28 | arrow::compute::take(array, &UInt64Array::new_scalar(i as u64).into_inner(), None)?; 29 | ScalarValue::try_from_array(&taken, 0) 30 | } else { 31 | ScalarValue::try_from_array(array, i) 32 | } 33 | } 34 | 35 | pub fn scalar_value_heap_mem_size(value: &ScalarValue) -> usize { 36 | match value { 37 | ScalarValue::List(list) => list.as_ref().get_array_mem_size(), 38 | ScalarValue::Map(map) => map.get_array_mem_size(), 39 | ScalarValue::Struct(struct_) => struct_.get_array_mem_size(), 40 | _ => value.size() - size_of::(), 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /spark-extension-shims-spark/src/main/scala/org/apache/spark/sql/execution/auron/plan/NativeParquetSinkExec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.spark.sql.execution.auron.plan 18 | 19 | import org.apache.spark.sql.SparkSession 20 | import org.apache.spark.sql.catalyst.catalog.CatalogTable 21 | import org.apache.spark.sql.execution.SparkPlan 22 | import org.apache.spark.sql.execution.metric.SQLMetric 23 | 24 | import org.apache.auron.sparkver 25 | 26 | case class NativeParquetSinkExec( 27 | sparkSession: SparkSession, 28 | table: CatalogTable, 29 | partition: Map[String, Option[String]], 30 | override val child: SparkPlan, 31 | override val metrics: Map[String, SQLMetric]) 32 | extends NativeParquetSinkBase(sparkSession, table, partition, child, metrics) { 33 | 34 | @sparkver("3.2 / 3.3 / 3.4 / 3.5") 35 | override protected def withNewChildInternal(newChild: SparkPlan): SparkPlan = 36 | copy(child = newChild) 37 | 38 | @sparkver("3.0 / 3.1") 39 | override def withNewChildren(newChildren: Seq[SparkPlan]): SparkPlan = 40 | copy(child = newChildren.head) 41 | } 42 | -------------------------------------------------------------------------------- /native-engine/auron-memmgr/src/metrics.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one or more 2 | // contributor license agreements. See the NOTICE file distributed with 3 | // this work for additional information regarding copyright ownership. 4 | // The ASF licenses this file to You under the Apache License, Version 2.0 5 | // (the "License"); you may not use this file except in compliance with 6 | // the License. You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | use datafusion::physical_plan::metrics::{ 17 | Count, ExecutionPlanMetricsSet, Gauge, MetricBuilder, Time, 18 | }; 19 | 20 | #[derive(Clone)] 21 | pub struct SpillMetrics { 22 | pub mem_spill_count: Count, 23 | pub mem_spill_size: Gauge, 24 | pub mem_spill_iotime: Time, 25 | pub disk_spill_size: Gauge, 26 | pub disk_spill_iotime: Time, 27 | } 28 | 29 | impl SpillMetrics { 30 | pub fn new(metrics: &ExecutionPlanMetricsSet, partition: usize) -> Self { 31 | Self { 32 | mem_spill_count: MetricBuilder::new(metrics).counter("mem_spill_count", partition), 33 | mem_spill_size: MetricBuilder::new(metrics).gauge("mem_spill_size", partition), 34 | mem_spill_iotime: MetricBuilder::new(metrics) 35 | .subset_time("mem_spill_iotime", partition), 36 | disk_spill_size: MetricBuilder::new(metrics).gauge("disk_spill_size", partition), 37 | disk_spill_iotime: MetricBuilder::new(metrics) 38 | .subset_time("disk_spill_iotime", partition), 39 | } 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /spark-extension-shims-spark/src/main/scala/org/apache/spark/sql/execution/auron/plan/NativeWindowExec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.spark.sql.execution.auron.plan 18 | 19 | import org.apache.spark.sql.catalyst.expressions.Expression 20 | import org.apache.spark.sql.catalyst.expressions.NamedExpression 21 | import org.apache.spark.sql.catalyst.expressions.SortOrder 22 | import org.apache.spark.sql.execution.SparkPlan 23 | 24 | import org.apache.auron.sparkver 25 | 26 | case class NativeWindowExec( 27 | windowExpression: Seq[NamedExpression], 28 | partitionSpec: Seq[Expression], 29 | orderSpec: Seq[SortOrder], 30 | groupLimit: Option[Int], 31 | override val child: SparkPlan) 32 | extends NativeWindowBase(windowExpression, partitionSpec, orderSpec, groupLimit, child) { 33 | 34 | @sparkver("3.2 / 3.3 / 3.4 / 3.5") 35 | override protected def withNewChildInternal(newChild: SparkPlan): SparkPlan = 36 | copy(child = newChild) 37 | 38 | @sparkver("3.0 / 3.1") 39 | override def withNewChildren(newChildren: Seq[SparkPlan]): SparkPlan = 40 | copy(child = newChildren.head) 41 | } 42 | -------------------------------------------------------------------------------- /dev/docker-build/docker-compose.yml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | services: 19 | build-release: 20 | build: ${BUILD_CONTEXT:-./centos7} 21 | volumes: 22 | - ~/.m2:/root/.m2:rw 23 | - ~/.sbt:/root/.sbt:rw 24 | - ~/.cargo/git:/root/.cargo/git:rw 25 | - ~/.cargo/registry:/root/.cargo/registry:rw 26 | - ./../../:/auron:rw 27 | - ./../../target-docker:/auron/target:rw 28 | - ./../../target-docker/spark-extension-target:/auron/spark-extension/target:rw 29 | - ./../../target-docker/spark-extension-shims-spark-target:/auron/spark-extension-shims-spark/target:rw 30 | - ./../../target-docker/build-helper-proto-target:/auron/dev/mvn-build-helper/proto/target:rw 31 | - ./../../target-docker/build-helper-assembly-target:/auron/dev/mvn-build-helper/assembly/target:rw 32 | environment: 33 | RUSTFLAGS: "-C target-cpu=skylake" 34 | AURON_BUILD_ARGS: "${AURON_BUILD_ARGS}" 35 | command: > 36 | bash -c ' 37 | source ~/.bashrc && 38 | cd /auron && 39 | echo "[DOCKER] Running: ./build/mvn $AURON_BUILD_ARGS" && 40 | ./build/mvn $AURON_BUILD_ARGS 41 | ' 42 | -------------------------------------------------------------------------------- /dev/utils.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # 4 | # Licensed to the Apache Software Foundation (ASF) under one or more 5 | # contributor license agreements. See the NOTICE file distributed with 6 | # this work for additional information regarding copyright ownership. 7 | # The ASF licenses this file to You under the Apache License, Version 2.0 8 | # (the "License"); you may not use this file except in compliance with 9 | # the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | # 21 | # Common functions in scripts 22 | 23 | # Function to join an input array by a given separator 24 | join_by() { 25 | local IFS="$1" 26 | shift 27 | echo "$*" 28 | } 29 | 30 | # Function to run a given command 31 | run_cmd() { 32 | local command="$1" 33 | local working_dir="$2" 34 | 35 | # Preserve the calling directory 36 | _CALLING_DIR="$(pwd)" 37 | 38 | # Run the given command and check if it works well 39 | cd ${working_dir} && ${command} 40 | if [ $? = 127 ]; then 41 | echo "Cannot run '${command}', so check if the command works" 42 | exit 1 43 | fi 44 | 45 | # Reset the current working directory 46 | cd "${_CALLING_DIR}" 47 | } 48 | 49 | # Split input arguments into two parts: Spark confs and args 50 | parse_args_for_spark_submit() { 51 | SPARK_CONF=() 52 | ARGS=() 53 | while [ ! -z "$1" ]; do 54 | if [[ "$1" =~ ^--master= ]]; then 55 | SPARK_CONF+=($1) 56 | elif [ "$1" == "--conf" ]; then 57 | shift 58 | SPARK_CONF+=("--conf $1") 59 | else 60 | ARGS+=($1) 61 | fi 62 | shift 63 | done 64 | } 65 | 66 | -------------------------------------------------------------------------------- /native-engine/datafusion-ext-plans/src/lib.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one or more 2 | // contributor license agreements. See the NOTICE file distributed with 3 | // this work for additional information regarding copyright ownership. 4 | // The ASF licenses this file to You under the Apache License, Version 2.0 5 | // (the "License"); you may not use this file except in compliance with 6 | // the License. You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #![allow(incomplete_features)] 17 | #![allow(internal_features)] 18 | #![feature(adt_const_params)] 19 | #![feature(core_intrinsics)] 20 | #![feature(get_mut_unchecked)] 21 | #![feature(portable_simd)] 22 | #![feature(ptr_as_ref_unchecked)] 23 | 24 | // execution plan implementations 25 | pub mod agg; 26 | pub mod agg_exec; 27 | pub mod broadcast_join_build_hash_map_exec; 28 | pub mod broadcast_join_exec; 29 | pub mod debug_exec; 30 | pub mod empty_partitions_exec; 31 | pub mod expand_exec; 32 | pub mod ffi_reader_exec; 33 | pub mod filter_exec; 34 | pub mod generate_exec; 35 | pub mod ipc_reader_exec; 36 | pub mod ipc_writer_exec; 37 | pub mod joins; 38 | pub mod limit_exec; 39 | pub mod orc_exec; 40 | pub mod parquet_exec; 41 | pub mod parquet_sink_exec; 42 | pub mod project_exec; 43 | pub mod rename_columns_exec; 44 | pub mod rss_shuffle_writer_exec; 45 | pub mod shuffle_writer_exec; 46 | pub mod sort_exec; 47 | pub mod sort_merge_join_exec; 48 | pub mod union_exec; 49 | pub mod window_exec; 50 | 51 | // helper modules 52 | pub mod common; 53 | pub mod generate; 54 | mod scan; 55 | pub mod shuffle; 56 | pub mod window; 57 | -------------------------------------------------------------------------------- /spark-extension/src/main/scala/org/apache/spark/sql/execution/auron/plan/Util.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.spark.sql.execution.auron.plan 18 | 19 | import org.apache.spark.sql.auron.NativeConverters 20 | import org.apache.spark.sql.catalyst.expressions.NamedExpression 21 | import org.apache.spark.sql.types.StructField 22 | import org.apache.spark.sql.types.StructType 23 | 24 | import org.apache.auron.{protobuf => pb} 25 | 26 | object Util { 27 | def getSchema[E <: NamedExpression]( 28 | fieldItems: Seq[E], 29 | useExprId: Boolean = true): StructType = { 30 | StructType(fieldItems.map { item => 31 | val name = if (useExprId) { 32 | getFieldNameByExprId(item) 33 | } else { 34 | item.name 35 | } 36 | StructField(name, item.dataType, item.nullable, item.metadata) 37 | }) 38 | } 39 | 40 | def getNativeSchema[E <: NamedExpression]( 41 | fieldItems: Seq[E], 42 | useExprId: Boolean = true): pb.Schema = { 43 | NativeConverters.convertSchema(getSchema(fieldItems, useExprId)) 44 | } 45 | 46 | def getFieldNameByExprId(expr: NamedExpression): String = 47 | s"#${expr.exprId.id}" 48 | } 49 | -------------------------------------------------------------------------------- /spark-extension/src/main/scala/org/apache/spark/sql/hive/auron/HiveUDFUtil.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.spark.sql.hive.auron 18 | 19 | import org.apache.spark.internal.Logging 20 | import org.apache.spark.sql.catalyst.expressions.Expression 21 | import org.apache.spark.sql.hive.{HiveGenericUDF, HiveSimpleUDF} 22 | import org.apache.spark.sql.hive.HiveUDAFFunction 23 | 24 | object HiveUDFUtil extends Logging { 25 | def isHiveUDF(expression: Expression): Boolean = { 26 | isHiveSimpleUDF(expression) || isHiveGenericUDF(expression) 27 | } 28 | 29 | def isHiveSimpleUDF(expression: Expression): Boolean = { 30 | expression.isInstanceOf[HiveSimpleUDF] 31 | } 32 | 33 | def isHiveGenericUDF(expression: Expression): Boolean = { 34 | expression.isInstanceOf[HiveGenericUDF] 35 | } 36 | 37 | def getFunctionClassName(expression: Expression): Option[String] = { 38 | expression match { 39 | case e: HiveSimpleUDF => Some(e.funcWrapper.functionClassName) 40 | case e: HiveGenericUDF => Some(e.funcWrapper.functionClassName) 41 | case e: HiveUDAFFunction => Some(e.funcWrapper.functionClassName) 42 | case _ => None 43 | } 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /hadoop-shim/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 18 | 19 | 4.0.0 20 | 21 | 22 | org.apache.auron 23 | auron-parent_${scalaVersion} 24 | ${project.version} 25 | ../pom.xml 26 | 27 | 28 | hadoop-shim_${scalaVersion} 29 | jar 30 | Apache Auron Hadoop Shim ${scalaVersion} 31 | Apache Auron Hadoop Shim Project 32 | 33 | 34 | 35 | org.scala-lang.modules 36 | scala-java8-compat_${scalaVersion} 37 | 38 | 39 | org.apache.hadoop 40 | hadoop-client-api 41 | provided 42 | 43 | 44 | 45 | -------------------------------------------------------------------------------- /.github/workflows/license.yml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | name: License check 19 | 20 | # This GitHub workflow checks Apache License v2.0 Header is added if needed 21 | 22 | on: 23 | push: 24 | branches: 25 | - master 26 | - branch-* 27 | pull_request: 28 | branches: 29 | - master 30 | - branch-* 31 | 32 | concurrency: 33 | group: license-${{ github.head_ref || github.run_id }} 34 | cancel-in-progress: true 35 | 36 | jobs: 37 | rat: 38 | name: License 39 | runs-on: ubuntu-24.04 40 | steps: 41 | - uses: actions/checkout@v4 42 | - name: Setup JDK 8 43 | uses: actions/setup-java@v4 44 | with: 45 | distribution: temurin 46 | java-version: 8 47 | cache: 'maven' 48 | - run: | 49 | build/mvn org.apache.rat:apache-rat-plugin:check \ 50 | -Pscala-2.12 -Pspark-3.5 \ 51 | -Pceleborn,celeborn-0.6 \ 52 | -Puniffle,uniffle-0.10 \ 53 | -Ppaimon,paimon-1.2 \ 54 | -Pflink,flink-1.18 55 | - name: Upload rat report 56 | if: failure() 57 | uses: actions/upload-artifact@v4 58 | with: 59 | name: rat-report 60 | path: "**/target/rat*.txt" 61 | -------------------------------------------------------------------------------- /.github/workflows/stale.yml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | name: Close inactive issues and PRs 19 | on: 20 | schedule: 21 | - cron: "30 1 * * *" 22 | 23 | jobs: 24 | close-issues: 25 | runs-on: ubuntu-24.04 26 | permissions: 27 | issues: write 28 | pull-requests: write 29 | steps: 30 | - uses: actions/stale@v5 31 | with: 32 | days-before-issue-stale: 30 33 | days-before-issue-close: 14 34 | stale-issue-label: "stale" 35 | stale-issue-message: "This issue is stale because it has been open for 30 days with no activity." 36 | close-issue-message: "This issue was closed because it has been inactive for 14 days since being marked as stale." 37 | days-before-pr-stale: 100 38 | days-before-pr-close: 0 39 | stale-pr-message: > 40 | We're closing this PR because it hasn't been updated in a while. 41 | This isn't a judgement on the merit of the PR in any way. It's just 42 | a way of keeping the PR queue manageable. 43 | 44 | If you'd like to revive this PR, please reopen it and ask a 45 | committer to remove the Stale tag! 46 | repo-token: ${{ secrets.GITHUB_TOKEN }} 47 | -------------------------------------------------------------------------------- /auron-core/src/main/java/org/apache/auron/metric/MetricNode.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.auron.metric; 18 | 19 | import java.io.Serializable; 20 | import java.util.ArrayList; 21 | import java.util.List; 22 | 23 | /** 24 | * Abstract class representing a metric node in the Auron system. 25 | * This class provides functionality for hierarchical metrics tracking. 26 | */ 27 | public abstract class MetricNode implements Serializable { 28 | private final List children = new ArrayList<>(); 29 | 30 | public MetricNode(List children) { 31 | this.children.addAll(children); 32 | } 33 | 34 | /** 35 | * Gets a child metric node with the specified ID. 36 | * 37 | * @param id The identifier for the child node 38 | * @return The child MetricNode associated with the given ID 39 | */ 40 | public MetricNode getChild(int id) { 41 | return children.get(id); 42 | } 43 | 44 | /** 45 | * Adds a metric value with a specified name. 46 | * 47 | * @param name The name of the metric 48 | * @param value The value to add for the metric (as a long) 49 | */ 50 | public abstract void add(String name, long value); 51 | } 52 | -------------------------------------------------------------------------------- /.github/labeler.yml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | infra: 19 | - ".github/**" 20 | - ".asf.yaml" 21 | - ".rat-excludes" 22 | - ".gitattributes" 23 | - ".gitignore" 24 | - ".gitmodules" 25 | 26 | build: 27 | - "build/**" 28 | - "**/*pom.xml" 29 | - "Cargo.toml" 30 | - "Cargo.lock" 31 | - "auron-build.sh" 32 | - "rust-toolchain.toml" 33 | - "rustfmt.toml" 34 | - "scalafix.conf" 35 | - "scalafmt.conf" 36 | 37 | documentation: 38 | - "**/*.md" 39 | - "LICENSE" 40 | 41 | core: 42 | - "auron-core/**" 43 | 44 | common: 45 | - "common/**" 46 | 47 | native: 48 | - "native-engine/**" 49 | 50 | hadoop: 51 | - "hadoop-shim/**" 52 | 53 | spark: 54 | - "spark-extension/**" 55 | - "spark-extension-shims-spark/**" 56 | - "spark-version-annotation-macros/**" 57 | 58 | spark-ui: 59 | - "auron-spark-ui/**" 60 | 61 | flink: 62 | - "auron-flink-extension/**" 63 | 64 | thirdparty-celeborn: 65 | - "thirdparty/auron-celeborn-*/**" 66 | 67 | thirdparty-uniffle: 68 | - "thirdparty/auron-uniffle/**" 69 | 70 | thirdparty-paimon: 71 | - "thirdparty/auron-paimon/**" 72 | 73 | thirdparty-iceberg: 74 | - "thirdparty/auron-iceberg/**" 75 | 76 | dev-tools: 77 | - "dev/**" 78 | - ".idea/**" 79 | 80 | benchmark: 81 | - "benchmark-results/**" 82 | -------------------------------------------------------------------------------- /spark-extension-shims-spark/src/test/scala/org/apache/spark/sql/execution/BuildInfoInSparkUISuite.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.spark.sql.execution 18 | 19 | import java.io.File 20 | 21 | import org.apache.spark.SparkConf 22 | import org.apache.spark.sql.AuronQueryTest 23 | import org.apache.spark.sql.execution.ui.AuronSQLAppStatusListener 24 | import org.apache.spark.util.Utils 25 | 26 | import org.apache.auron.BaseAuronSQLSuite 27 | 28 | class BuildInfoInSparkUISuite extends AuronQueryTest with BaseAuronSQLSuite { 29 | 30 | var testDir: File = _ 31 | 32 | override protected def sparkConf: SparkConf = { 33 | super.sparkConf.set("spark.eventLog.dir", testDir.toString) 34 | } 35 | 36 | override protected def beforeAll(): Unit = { 37 | testDir = Utils.createTempDir(namePrefix = "spark-events") 38 | super.beforeAll() 39 | } 40 | 41 | override protected def afterAll(): Unit = { 42 | Utils.deleteRecursively(testDir) 43 | } 44 | 45 | test("test build info in spark UI ") { 46 | val listeners = spark.sparkContext.listenerBus.findListenersByClass[AuronSQLAppStatusListener] 47 | assert(listeners.size === 1) 48 | val listener = listeners(0) 49 | assert(listener.getAuronBuildInfo() == 1) 50 | } 51 | 52 | } 53 | -------------------------------------------------------------------------------- /.github/workflows/celeborn.yml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | name: Celeborn 19 | 20 | on: 21 | workflow_dispatch: 22 | push: 23 | branches: 24 | - master 25 | - branch-* 26 | pull_request: 27 | branches: 28 | - master 29 | - branch-* 30 | 31 | concurrency: 32 | group: celeborn-${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} 33 | cancel-in-progress: true 34 | 35 | jobs: 36 | test-celeborn: 37 | strategy: 38 | fail-fast: false 39 | matrix: 40 | include: 41 | - celebornver: "celeborn-0.6" 42 | - celebornver: "celeborn-0.5" 43 | uses: ./.github/workflows/tpcds-reusable.yml 44 | name: Test Celeborn ${{ matrix.celebornver }} 45 | with: 46 | celebornver: ${{ matrix.celebornver }} 47 | sparkver: "spark-3.5" 48 | hadoop-profile: 'hadoop3' 49 | scalaver: "2.12" 50 | extrasparkconf: >- 51 | --conf spark.shuffle.manager=org.apache.spark.sql.execution.auron.shuffle.celeborn.AuronCelebornShuffleManager 52 | --conf spark.serializer=org.apache.spark.serializer.KryoSerializer 53 | --conf spark.celeborn.client.spark.shuffle.writer=hash 54 | --conf spark.celeborn.client.push.replicate.enabled=false 55 | queries: '["q1,q2,q3,q4,q5,q6,q7,q8,q9"]' 56 | -------------------------------------------------------------------------------- /auron-core/src/main/java/org/apache/auron/functions/AuronUDFWrapperContext.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.auron.functions; 18 | 19 | /** 20 | * Wrapper context for user-defined functions (UDFs). 21 | * This class bridges different engines and native UDF implementations. 22 | * SQL engines such as Spark and Flink should provide their respective implementations based on this. 23 | */ 24 | public interface AuronUDFWrapperContext { 25 | 26 | /** 27 | * Opens the UDF context with the given resource ID. 28 | * The Flink engine requires the FunctionContext, which can be obtained via the ResourceID, to initialize the Flink ScalarFunction. 29 | * @param resourceId 30 | */ 31 | default void open(String resourceId) {} 32 | 33 | /** 34 | * Evaluates the UDF with the provided input and output pointers. 35 | * This method is called for each invocation of the UDF during query execution. 36 | * 37 | * @param inputPtr Native pointer to the input data 38 | * @param outputPtr Native pointer to the output location where results should be written 39 | */ 40 | void eval(long inputPtr, long outputPtr); 41 | 42 | /** 43 | * Closes the UDF context. 44 | * Some UDFs may need to perform resource cleanup operations. 45 | */ 46 | default void close() {} 47 | } 48 | -------------------------------------------------------------------------------- /auron-spark-ui/src/main/scala/org/apache/spark/sql/execution/ui/AuronSQLAppStatusListener.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.spark.sql.execution.ui 18 | 19 | import org.apache.spark.{SparkConf, SparkContext} 20 | import org.apache.spark.internal.Logging 21 | import org.apache.spark.scheduler.{SparkListener, SparkListenerEvent} 22 | import org.apache.spark.status.ElementTrackingStore 23 | 24 | import org.apache.auron.spark.ui.AuronBuildInfoEvent 25 | 26 | class AuronSQLAppStatusListener(conf: SparkConf, kvstore: ElementTrackingStore) 27 | extends SparkListener 28 | with Logging { 29 | 30 | def getAuronBuildInfo(): Long = { 31 | kvstore.count(classOf[AuronBuildInfoUIData]) 32 | } 33 | 34 | private def onAuronBuildInfo(event: AuronBuildInfoEvent): Unit = { 35 | val uiData = new AuronBuildInfoUIData(event.info.toSeq) 36 | kvstore.write(uiData) 37 | } 38 | 39 | override def onOtherEvent(event: SparkListenerEvent): Unit = event match { 40 | case e: AuronBuildInfoEvent => onAuronBuildInfo(e) 41 | case _ => // Ignore 42 | } 43 | 44 | } 45 | object AuronSQLAppStatusListener { 46 | def register(sc: SparkContext): Unit = { 47 | val kvStore = sc.statusStore.store.asInstanceOf[ElementTrackingStore] 48 | val listener = new AuronSQLAppStatusListener(sc.conf, kvStore) 49 | sc.listenerBus.addToStatusQueue(listener) 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /auron-core/src/test/java/org/apache/auron/jni/AuronAdaptorTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.auron.jni; 18 | 19 | import static org.junit.jupiter.api.Assertions.assertEquals; 20 | import static org.junit.jupiter.api.Assertions.assertInstanceOf; 21 | import static org.junit.jupiter.api.Assertions.assertNotNull; 22 | 23 | import org.apache.auron.configuration.AuronConfiguration; 24 | import org.junit.jupiter.api.Test; 25 | 26 | /** 27 | * This is a test class for {@link AuronAdaptor}. 28 | */ 29 | public class AuronAdaptorTest { 30 | 31 | @Test 32 | public void testRetrieveConfigWithAuronAdaptor() { 33 | MockAuronAdaptor auronAdaptor = new MockAuronAdaptor(); 34 | assertNotNull(auronAdaptor.getAuronConfiguration()); 35 | AuronConfiguration auronConfig = auronAdaptor.getAuronConfiguration(); 36 | assertEquals(auronConfig.getInteger(AuronConfiguration.BATCH_SIZE), 10000); 37 | assertEquals(auronConfig.getDouble(AuronConfiguration.MEMORY_FRACTION), 0.6, 0.0); 38 | assertEquals(auronConfig.getString(AuronConfiguration.NATIVE_LOG_LEVEL), "info"); 39 | } 40 | 41 | @Test 42 | public void testLoadAuronAdaptorInstanceViaSPI() { 43 | AuronAdaptor adaptor = AuronAdaptor.getInstance(); 44 | assertInstanceOf(MockAuronAdaptor.class, adaptor, "SPI should discover and instantiate MockAuronAdaptor"); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /.github/workflows/uniffle.yml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | name: Uniffle 19 | 20 | on: 21 | workflow_dispatch: 22 | push: 23 | branches: 24 | - master 25 | - branch-* 26 | pull_request: 27 | branches: 28 | - master 29 | - branch-* 30 | 31 | concurrency: 32 | group: uniffle-${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} 33 | cancel-in-progress: true 34 | 35 | jobs: 36 | test-uniffle: 37 | strategy: 38 | fail-fast: false 39 | matrix: 40 | include: 41 | - unifflever: "uniffle-0.10" 42 | hadoopver: "2.8.5" 43 | uses: ./.github/workflows/tpcds-reusable.yml 44 | name: Test Uniffle ${{ matrix.unifflever }} 45 | with: 46 | sparkver: "spark-3.5" 47 | hadoop-profile: 'hadoop3' 48 | scalaver: "2.12" 49 | hadoopver: ${{ matrix.hadoopver }} 50 | unifflever: ${{ matrix.unifflever }} 51 | extrasparkconf: >- 52 | --conf spark.shuffle.manager=org.apache.spark.sql.execution.auron.shuffle.uniffle.AuronUniffleShuffleManager 53 | --conf spark.serializer=org.apache.spark.serializer.KryoSerializer 54 | --conf spark.rss.coordinator.quorum=localhost:19999 55 | --conf spark.rss.enabled=true 56 | --conf spark.rss.storage.type=MEMORY_LOCALFILE 57 | --conf spark.rss.client.type=GRPC_NETTY 58 | queries: '["q1,q2,q3,q4,q5,q6,q7,q8,q9"]' 59 | -------------------------------------------------------------------------------- /dev/reformat: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # 4 | # Licensed to the Apache Software Foundation (ASF) under one or more 5 | # contributor license agreements. See the NOTICE file distributed with 6 | # this work for additional information regarding copyright ownership. 7 | # The ASF licenses this file to You under the Apache License, Version 2.0 8 | # (the "License"); you may not use this file except in compliance with 9 | # the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | set -ex 20 | 21 | PROJECT_DIR="$(cd "`dirname "$0"`/.."; pwd)" 22 | 23 | CHECK="false" 24 | while (( "$#" )); do 25 | case $1 in 26 | --check) 27 | CHECK="true" 28 | ;; 29 | esac 30 | shift 31 | done 32 | 33 | MODE=pre 34 | SCALA_PROFILE=scala-2.12 35 | function run_maven() { 36 | if [[ "$CHECK" == "true" ]]; then 37 | "${PROJECT_DIR}"/build/mvn spotless:check compile test-compile scalafix:scalafix -Dscalafix.mode=CHECK -Dscalafix.skipTest=true -DskipBuildNative -P"${MODE}" -P"${SCALA_PROFILE}" "$@" 38 | else 39 | "${PROJECT_DIR}"/build/mvn spotless:apply compile test-compile scalafix:scalafix -DskipBuildNative -P"${MODE}" -P"${SCALA_PROFILE}" "$@" 40 | fi 41 | } 42 | 43 | # check or format the rust code 44 | if [[ "$CHECK" == "true" ]]; then 45 | cargo fmt --check 46 | else 47 | cargo fix --all --allow-dirty --allow-staged --allow-no-vcs 48 | cargo fmt --all -q -- 49 | fi 50 | 51 | # Check or format all code, including third-party code, with spark-3.5 52 | sparkver=spark-3.5 53 | for celebornver in celeborn-0.5 celeborn-0.6 54 | do 55 | run_maven -P"${sparkver}" -Pceleborn,"${celebornver}" -Puniffle,uniffle-0.10 -Ppaimon,paimon-1.2 -Pflink,flink-1.18 -Piceberg,iceberg-1.9 56 | 57 | done 58 | 59 | sparkvers=(spark-3.0 spark-3.1 spark-3.2 spark-3.3 spark-3.4) 60 | for sparkver in "${sparkvers[@]}" 61 | do 62 | run_maven -P"${sparkver}" 63 | done 64 | -------------------------------------------------------------------------------- /native-engine/auron-serde/build.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one or more 2 | // contributor license agreements. See the NOTICE file distributed with 3 | // this work for additional information regarding copyright ownership. 4 | // The ASF licenses this file to You under the Apache License, Version 2.0 5 | // (the "License"); you may not use this file except in compliance with 6 | // the License. You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | use std::{fs, path::PathBuf}; 17 | 18 | fn main() -> Result<(), String> { 19 | // for use in docker build where file changes can be wonky 20 | println!("cargo:rerun-if-env-changed=FORCE_REBUILD"); 21 | 22 | println!("cargo:rerun-if-changed=proto/auron.proto"); 23 | 24 | let mut prost_build = tonic_build::Config::new(); 25 | // protobuf-maven-plugin download the protoc executable in the target directory 26 | let protoc_dir_path = "../../dev/mvn-build-helper/proto/target/protoc-plugins"; 27 | let mut protoc_file: Option = None; 28 | if let Ok(entries) = fs::read_dir(protoc_dir_path) { 29 | for entry in entries.filter_map(Result::ok) { 30 | let path = entry.path(); 31 | if path.is_file() 32 | && path 33 | .file_name() 34 | .map(|f| f.to_string_lossy().starts_with("protoc")) 35 | .unwrap_or(false) 36 | { 37 | protoc_file = Some(path); 38 | break; 39 | } 40 | } 41 | } 42 | if let Some(path) = protoc_file { 43 | eprintln!("Using protoc executable: {:?}", path); 44 | prost_build.protoc_executable(path); 45 | } 46 | prost_build 47 | .compile_protos(&["proto/auron.proto"], &["proto"]) 48 | .map_err(|e| format!("protobuf compilation failed: {}", e)) 49 | } 50 | -------------------------------------------------------------------------------- /spark-extension-shims-spark/src/main/java/org/apache/spark/sql/auron/ForceApplyShuffledHashJoinInjector.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.spark.sql.auron; 18 | 19 | import static net.bytebuddy.matcher.ElementMatchers.named; 20 | 21 | import net.bytebuddy.ByteBuddy; 22 | import net.bytebuddy.agent.ByteBuddyAgent; 23 | import net.bytebuddy.description.type.TypeDescription; 24 | import net.bytebuddy.dynamic.ClassFileLocator; 25 | import net.bytebuddy.dynamic.loading.ClassLoadingStrategy; 26 | import net.bytebuddy.implementation.MethodDelegation; 27 | import net.bytebuddy.pool.TypePool; 28 | 29 | public class ForceApplyShuffledHashJoinInjector { 30 | public static void inject() { 31 | ByteBuddyAgent.install(); 32 | ClassLoader contextClassLoader = Thread.currentThread().getContextClassLoader(); 33 | TypeDescription typeDescription = TypePool.Default.of(contextClassLoader) 34 | .describe("org.apache.spark.sql.catalyst.optimizer.JoinSelectionHelper") 35 | .resolve(); 36 | new ByteBuddy() 37 | .redefine(typeDescription, ClassFileLocator.ForClassLoader.of(contextClassLoader)) 38 | .method(named("forceApplyShuffledHashJoin")) 39 | .intercept(MethodDelegation.to(ForceApplyShuffledHashJoinInterceptor.class)) 40 | .make() 41 | .load(contextClassLoader, ClassLoadingStrategy.Default.INJECTION); 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /native-engine/auron/src/metrics.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one or more 2 | // contributor license agreements. See the NOTICE file distributed with 3 | // this work for additional information regarding copyright ownership. 4 | // The ASF licenses this file to You under the Apache License, Version 2.0 5 | // (the "License"); you may not use this file except in compliance with 6 | // the License. You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | use std::sync::Arc; 17 | 18 | use auron_jni_bridge::{jni_call, jni_new_string}; 19 | use datafusion::{common::Result, physical_plan::ExecutionPlan}; 20 | use jni::objects::JObject; 21 | 22 | pub fn update_spark_metric_node( 23 | metric_node: JObject, 24 | execution_plan: Arc, 25 | ) -> Result<()> { 26 | if metric_node.is_null() { 27 | return Ok(()); 28 | } 29 | 30 | // update current node 31 | update_metrics( 32 | metric_node, 33 | &execution_plan 34 | .metrics() 35 | .unwrap_or_default() 36 | .iter() 37 | .map(|m| m.value()) 38 | .map(|m| (m.name(), m.as_usize() as i64)) 39 | .collect::>(), 40 | )?; 41 | 42 | // update children nodes 43 | for (i, &child_plan) in execution_plan.children().iter().enumerate() { 44 | let child_metric_node = jni_call!( 45 | SparkMetricNode(metric_node).getChild(i as i32) -> JObject 46 | )?; 47 | update_spark_metric_node(child_metric_node.as_obj(), child_plan.clone())?; 48 | } 49 | Ok(()) 50 | } 51 | 52 | fn update_metrics(metric_node: JObject, metric_values: &[(&str, i64)]) -> Result<()> { 53 | for &(name, value) in metric_values { 54 | let jname = jni_new_string!(&name)?; 55 | jni_call!(SparkMetricNode(metric_node).add(jname.as_obj(), value) -> ())?; 56 | } 57 | Ok(()) 58 | } 59 | -------------------------------------------------------------------------------- /.github/workflows/flink.yml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | name: Flink 19 | 20 | on: 21 | workflow_dispatch: 22 | push: 23 | branches: 24 | - master 25 | - branch-* 26 | pull_request: 27 | branches: 28 | - master 29 | - branch-* 30 | 31 | concurrency: 32 | group: flink-${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} 33 | cancel-in-progress: true 34 | 35 | jobs: 36 | test-flink: 37 | name: Test Flink ${{ matrix.flinkver }} 38 | runs-on: ubuntu-24.04 39 | strategy: 40 | fail-fast: false 41 | matrix: 42 | flinkver: [ "1.18" ] 43 | javaver: [ "8" ] 44 | scalaver: [ "2.12" ] 45 | module: [ "auron-flink-extension" ] 46 | sparkver: [ "spark-3.5" ] 47 | 48 | steps: 49 | - name: Checkout Auron 50 | uses: actions/checkout@v4 51 | 52 | - name: Setup Java and Maven cache 53 | uses: actions/setup-java@v4 54 | with: 55 | distribution: 'adopt-hotspot' 56 | java-version: ${{ matrix.javaver }} 57 | cache: 'maven' 58 | 59 | - name: Test Flink Module 60 | run: ./build/mvn -B test -pl ${{ matrix.module }} -am -Pscala-${{ matrix.scalaver }} -Pflink-${{ matrix.flinkver }} -P${{ matrix.sparkver }} -Prelease 61 | 62 | - name: Upload reports 63 | if: failure() 64 | uses: actions/upload-artifact@v4 65 | with: 66 | name: ${{ matrix.module }}-test-report 67 | path: ${{ matrix.module }}/target/surefire-reports -------------------------------------------------------------------------------- /spark-extension-shims-spark/src/main/scala/org/apache/spark/sql/execution/auron/plan/NativeBroadcastExchangeExec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.spark.sql.execution.auron.plan 18 | 19 | import java.util.UUID 20 | 21 | import org.apache.spark.broadcast 22 | import org.apache.spark.sql.auron.NativeSupports 23 | import org.apache.spark.sql.catalyst.plans.logical.Statistics 24 | import org.apache.spark.sql.catalyst.plans.physical.BroadcastMode 25 | import org.apache.spark.sql.execution.SparkPlan 26 | 27 | import org.apache.auron.sparkver 28 | 29 | case class NativeBroadcastExchangeExec(mode: BroadcastMode, override val child: SparkPlan) 30 | extends NativeBroadcastExchangeBase(mode, child) 31 | with NativeSupports { 32 | 33 | override val getRunId: UUID = UUID.randomUUID() 34 | 35 | override def runtimeStatistics: Statistics = { 36 | val dataSize = metrics("dataSize").value 37 | val rowCount = metrics("numOutputRows").value 38 | Statistics(dataSize, Some(rowCount)) 39 | } 40 | 41 | @transient 42 | override lazy val completionFuture: scala.concurrent.Future[broadcast.Broadcast[Any]] = { 43 | relationFuturePromise.future 44 | } 45 | 46 | @sparkver("3.2 / 3.3 / 3.4 / 3.5") 47 | override protected def withNewChildInternal(newChild: SparkPlan): SparkPlan = 48 | copy(child = newChild) 49 | 50 | @sparkver("3.0 / 3.1") 51 | override def withNewChildren(newChildren: Seq[SparkPlan]): SparkPlan = 52 | copy(child = newChildren.head) 53 | } 54 | -------------------------------------------------------------------------------- /thirdparty/auron-celeborn-0.5/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 18 | 19 | 4.0.0 20 | 21 | 22 | org.apache.auron 23 | auron-parent_${scalaVersion} 24 | ${project.version} 25 | ../../pom.xml 26 | 27 | 28 | auron-celeborn_${scalaVersion} 29 | jar 30 | Apache Auron Celeborn 0.5 ${scalaVersion} 31 | 32 | 33 | 34 | org.apache.auron 35 | spark-extension-shims-spark_${scalaVersion} 36 | ${project.version} 37 | 38 | 39 | org.apache.celeborn 40 | celeborn-client-spark-3-shaded_${scalaVersion} 41 | ${celebornVersion} 42 | provided 43 | 44 | 45 | org.apache.spark 46 | spark-core_${scalaVersion} 47 | provided 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /thirdparty/auron-celeborn-0.6/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 18 | 19 | 4.0.0 20 | 21 | 22 | org.apache.auron 23 | auron-parent_${scalaVersion} 24 | ${project.version} 25 | ../../pom.xml 26 | 27 | 28 | auron-celeborn_${scalaVersion} 29 | jar 30 | Apache Auron Celeborn 0.6 ${scalaVersion} 31 | 32 | 33 | 34 | org.apache.auron 35 | spark-extension-shims-spark_${scalaVersion} 36 | ${project.version} 37 | 38 | 39 | org.apache.celeborn 40 | celeborn-client-spark-3-shaded_${scalaVersion} 41 | ${celebornVersion} 42 | provided 43 | 44 | 45 | org.apache.spark 46 | spark-core_${scalaVersion} 47 | provided 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /dev/docker-build/centos7/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | FROM centos:7 19 | 20 | # install common tools 21 | RUN echo "sslverify=false" >> /etc/yum.conf 22 | RUN sed -i "s/mirror.centos.org/vault.centos.org/g" /etc/yum.repos.d/*.repo 23 | RUN sed -i "s/^#.*baseurl=http/baseurl=https/g" /etc/yum.repos.d/*.repo 24 | RUN sed -i "s/^mirrorlist/#mirrorlist/g" /etc/yum.repos.d/*.repo 25 | RUN yum update -y 26 | RUN yum install -y centos-release-scl epel-release 27 | RUN sed -i "s/mirror.centos.org/vault.centos.org/g" /etc/yum.repos.d/*.repo 28 | RUN sed -i "s/^#.*baseurl=http/baseurl=https/g" /etc/yum.repos.d/*.repo 29 | RUN sed -i "s/^mirrorlist/#mirrorlist/g" /etc/yum.repos.d/*.repo 30 | RUN sed -i "s|^metalink=|#metalink=|g" /etc/yum.repos.d/epel.repo 31 | RUN sed -i "s|https://download.fedoraproject.org/pub/epel|https://archives.fedoraproject.org/pub/archive/epel|g" /etc/yum.repos.d/epel.repo 32 | RUN yum clean all && yum makecache 33 | RUN yum install -y libzip unzip openssl-devel 34 | 35 | # install gcc-11 36 | RUN yum install -y devtoolset-11-gcc devtoolset-11-gcc-c++ 37 | RUN echo '. /opt/rh/devtoolset-11/enable' >> ~/.bashrc 38 | 39 | # install rust nightly toolchain 40 | RUN curl https://sh.rustup.rs > /rustup-init 41 | RUN chmod +x /rustup-init 42 | RUN /rustup-init -y --default-toolchain nightly-2025-05-09-x86_64-unknown-linux-gnu 43 | 44 | # install java 45 | RUN yum install -y java-1.8.0-openjdk java-1.8.0-openjdk-devel 46 | RUN echo 'export JAVA_HOME="/usr/lib/jvm/java-1.8.0-openjdk"' >> ~/.bashrc 47 | -------------------------------------------------------------------------------- /.github/workflows/paimon.yml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | name: Paimon 19 | 20 | on: 21 | workflow_dispatch: 22 | push: 23 | branches: 24 | - master 25 | - branch-* 26 | pull_request: 27 | branches: 28 | - master 29 | - branch-* 30 | 31 | concurrency: 32 | group: paimon-${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} 33 | cancel-in-progress: true 34 | 35 | jobs: 36 | test-flink: 37 | name: Test Paimon ${{ matrix.paimon }} 38 | runs-on: ubuntu-24.04 39 | strategy: 40 | fail-fast: false 41 | matrix: 42 | paimon: [ "1.2" ] 43 | javaver: [ "8" ] 44 | scalaver: [ "2.12" ] 45 | module: [ "thirdparty/auron-paimon" ] 46 | sparkver: [ "spark-3.5" ] 47 | 48 | steps: 49 | - name: Checkout Auron 50 | uses: actions/checkout@v4 51 | 52 | - name: Setup Java and Maven cache 53 | uses: actions/setup-java@v4 54 | with: 55 | distribution: 'adopt-hotspot' 56 | java-version: ${{ matrix.javaver }} 57 | cache: 'maven' 58 | 59 | - name: Test Paimon Module 60 | run: ./build/mvn -B test -pl ${{ matrix.module }} -am -Pscala-${{ matrix.scalaver }} -Ppaimon-${{ matrix.paimon }} -P${{ matrix.sparkver }} -Prelease 61 | 62 | - name: Upload reports 63 | if: failure() 64 | uses: actions/upload-artifact@v4 65 | with: 66 | name: ${{ matrix.module }}-test-report 67 | path: ${{ matrix.module }}/target/surefire-reports -------------------------------------------------------------------------------- /auron-spark-ui/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 18 | 19 | 4.0.0 20 | 21 | 22 | org.apache.auron 23 | auron-parent_${scalaVersion} 24 | ${project.version} 25 | ../pom.xml 26 | 27 | 28 | auron-spark-ui_${scalaVersion} 29 | jar 30 | Apache Auron Spark UI ${scalaVersion} 31 | Apache Auron Spark UI Project 32 | 33 | 34 | 35 | org.apache.spark 36 | spark-sql_${scalaVersion} 37 | provided 38 | 39 | 40 | 41 | 42 | 43 | org.apache.maven.plugins 44 | maven-jar-plugin 45 | 46 | 47 | prepare-test-jar 48 | 49 | test-jar 50 | 51 | test-compile 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | -------------------------------------------------------------------------------- /native-engine/datafusion-ext-commons/src/arrow/boolean.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one or more 2 | // contributor license agreements. See the NOTICE file distributed with 3 | // this work for additional information regarding copyright ownership. 4 | // The ASF licenses this file to You under the Apache License, Version 2.0 5 | // (the "License"); you may not use this file except in compliance with 6 | // the License. You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | use arrow::array::{Array, BooleanArray}; 17 | 18 | /// Returns a BooleanArray where nulls are converted to `false` and the result 19 | /// has no null bitmap (all values are valid). 20 | #[inline] 21 | pub fn nulls_to_false(is_boolean: &BooleanArray) -> BooleanArray { 22 | match is_boolean.nulls() { 23 | Some(nulls) => { 24 | let is_not_null = nulls.inner(); 25 | BooleanArray::new(is_boolean.values() & is_not_null, None) 26 | } 27 | None => is_boolean.clone(), 28 | } 29 | } 30 | 31 | #[cfg(test)] 32 | mod tests { 33 | use arrow::array::{Array, BooleanArray}; 34 | 35 | use super::nulls_to_false; 36 | 37 | #[test] 38 | fn converts_nulls_to_false() { 39 | let input = BooleanArray::from(vec![Some(true), None, Some(false)]); 40 | let output = nulls_to_false(&input); 41 | 42 | assert!(output.nulls().is_none()); 43 | 44 | let got: Vec> = output.iter().collect(); 45 | let expected = vec![Some(true), Some(false), Some(false)]; 46 | assert_eq!(got, expected); 47 | } 48 | 49 | #[test] 50 | fn preserves_when_no_nulls() { 51 | let input = BooleanArray::from(vec![Some(false), Some(true)]); 52 | let output = nulls_to_false(&input); 53 | 54 | assert!(output.nulls().is_none()); 55 | let got: Vec> = output.iter().collect(); 56 | let expected = vec![Some(false), Some(true)]; 57 | assert_eq!(got, expected); 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /thirdparty/auron-paimon/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 18 | 19 | 4.0.0 20 | 21 | 22 | org.apache.auron 23 | auron-parent_${scalaVersion} 24 | ${project.version} 25 | ../../pom.xml 26 | 27 | 28 | auron-paimon_${scalaVersion} 29 | jar 30 | Apache Auron Paimon ${paimonVersion} ${scalaVersion} 31 | 32 | 33 | 34 | org.apache.auron 35 | spark-extension_${scalaVersion} 36 | ${project.version} 37 | 38 | 39 | org.apache.paimon 40 | paimon-core 41 | provided 42 | 43 | 44 | org.apache.spark 45 | spark-sql_${scalaVersion} 46 | provided 47 | 48 | 49 | org.apache.spark 50 | spark-hive_${scalaVersion} 51 | provided 52 | 53 | 54 | 55 | -------------------------------------------------------------------------------- /auron-core/src/test/java/org/apache/auron/jni/MockAuronAdaptor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.auron.jni; 18 | 19 | import java.io.File; 20 | import java.io.IOException; 21 | import java.nio.ByteBuffer; 22 | import org.apache.auron.configuration.AuronConfiguration; 23 | import org.apache.auron.configuration.MockAuronConfiguration; 24 | import org.apache.auron.functions.AuronUDFWrapperContext; 25 | import org.apache.auron.functions.MockAuronUDFWrapperContext; 26 | 27 | /** 28 | * This is a mock class for testing the AuronAdaptor. 29 | */ 30 | public class MockAuronAdaptor extends AuronAdaptor { 31 | @Override 32 | public void loadAuronLib() { 33 | // Mock implementation, no need to load auron library 34 | } 35 | 36 | @Override 37 | public String getDirectWriteSpillToDiskFile() throws IOException { 38 | File tempFile = File.createTempFile("auron-spill-", ".tmp"); 39 | tempFile.deleteOnExit(); 40 | return tempFile.getAbsolutePath(); 41 | } 42 | 43 | @Override 44 | public Object getThreadContext() { 45 | return null; 46 | } 47 | 48 | @Override 49 | public void setThreadContext(Object context) { 50 | // Mock implementation, no need to set thread context 51 | } 52 | 53 | @Override 54 | public AuronConfiguration getAuronConfiguration() { 55 | return new MockAuronConfiguration(); 56 | } 57 | 58 | @Override 59 | public AuronUDFWrapperContext getAuronUDFWrapperContext(ByteBuffer udfSerialized) { 60 | return new MockAuronUDFWrapperContext(udfSerialized); 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /thirdparty/auron-uniffle/src/main/scala/org/apache/spark/sql/execution/auron/shuffle/uniffle/AuronUniffleShuffleWriter.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.spark.sql.execution.auron.shuffle.uniffle 18 | 19 | import org.apache.spark.internal.Logging 20 | import org.apache.spark.scheduler.MapStatus 21 | import org.apache.spark.shuffle.{ShuffleHandle, ShuffleWriteMetricsReporter} 22 | import org.apache.spark.shuffle.writer.RssShuffleWriter 23 | import org.apache.spark.sql.execution.auron.shuffle.{AuronRssShuffleWriterBase, RssPartitionWriterBase} 24 | 25 | class AuronUniffleShuffleWriter[K, V, C]( 26 | rssShuffleWriter: RssShuffleWriter[K, V, C], 27 | metrics: ShuffleWriteMetricsReporter) 28 | extends AuronRssShuffleWriterBase[K, V](metrics) 29 | with Logging { 30 | 31 | override def getRssPartitionWriter( 32 | _handle: ShuffleHandle, 33 | _mapId: Int, 34 | metrics: ShuffleWriteMetricsReporter, 35 | numPartitions: Int): RssPartitionWriterBase = { 36 | new UnifflePartitionWriter(numPartitions, metrics, rssShuffleWriter) 37 | } 38 | 39 | private def waitAndCheckBlocksSend(): Unit = { 40 | logInfo(s"Waiting all blocks sending to the remote shuffle servers...") 41 | val method = rssShuffleWriter.getClass.getDeclaredMethod("internalCheckBlockSendResult") 42 | method.setAccessible(true) 43 | method.invoke(rssShuffleWriter) 44 | } 45 | 46 | override def rssStop(success: Boolean): Option[MapStatus] = { 47 | waitAndCheckBlocksSend() 48 | logInfo(s"Reporting the shuffle result...") 49 | rssShuffleWriter.stop(success) 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /auron-core/src/test/java/org/apache/auron/configuration/ConfigOptionTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.auron.configuration; 18 | 19 | import static org.junit.jupiter.api.Assertions.assertEquals; 20 | 21 | import org.junit.jupiter.api.Test; 22 | 23 | /** Tests for the {@link ConfigOption}. */ 24 | public class ConfigOptionTest { 25 | 26 | @Test 27 | public void testConfigOption() { 28 | ConfigOption keyOption = ConfigOptions.key("key").stringType().noDefaultValue(); 29 | assertEquals("key", keyOption.key()); 30 | assertEquals(null, keyOption.defaultValue()); 31 | assertEquals(false, keyOption.hasDefaultValue()); 32 | ConfigOption booleanOption = 33 | ConfigOptions.key("boolean").booleanType().defaultValue(true); 34 | assertEquals(true, booleanOption.defaultValue()); 35 | } 36 | 37 | @Test 38 | public void testConfigOptionAddDesc() { 39 | ConfigOption keyOption = ConfigOptions.key("key") 40 | .description("this is a description of the key") 41 | .stringType() 42 | .noDefaultValue(); 43 | assertEquals("key", keyOption.key()); 44 | assertEquals(null, keyOption.defaultValue()); 45 | assertEquals(false, keyOption.hasDefaultValue()); 46 | ConfigOption booleanOption = 47 | ConfigOptions.key("boolean").booleanType().defaultValue(true); 48 | assertEquals(true, booleanOption.defaultValue()); 49 | assertEquals("this is a description of the key", keyOption.description()); 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /auron-spark-ui/src/main/scala/org/apache/spark/sql/execution/ui/AuronAllExecutionsPage.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.spark.sql.execution.ui 18 | 19 | import javax.servlet.http.HttpServletRequest 20 | 21 | import scala.xml.{Node, NodeSeq} 22 | 23 | import org.apache.spark.internal.Logging 24 | import org.apache.spark.ui.{UIUtils, WebUIPage} 25 | 26 | private[ui] class AuronAllExecutionsPage(parent: AuronSQLTab) extends WebUIPage("") with Logging { 27 | 28 | private val sqlStore = parent.sqlStore 29 | 30 | override def render(request: HttpServletRequest): Seq[Node] = { 31 | val buildInfo = sqlStore.buildInfo() 32 | val infos = 33 | UIUtils.listingTable(propertyHeader, propertyRow, buildInfo.info, fixedWidth = true) 34 | val summary: NodeSeq = 35 |
36 |
37 | 39 |

40 | 41 | Auron Build Information 42 |

43 |
44 |
45 | {infos} 46 |
47 |
48 |
49 |
50 | 51 | UIUtils.headerSparkPage(request, "Auron", summary, parent) 52 | } 53 | 54 | private def propertyHeader = Seq("Name", "Value") 55 | 56 | private def propertyRow(kv: (String, String)) = 57 | 58 | {kv._1} 59 | 60 | {kv._2} 61 | 62 | 63 | 64 | } 65 | -------------------------------------------------------------------------------- /spark-extension/src/main/scala/org/apache/spark/sql/execution/auron/shuffle/AuronShuffleDependency.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package org.apache.spark.sql.execution.auron.shuffle 18 | 19 | import scala.reflect.ClassTag 20 | 21 | import org.apache.spark.{Aggregator, Partitioner, ShuffleDependency, SparkEnv} 22 | import org.apache.spark.internal.Logging 23 | import org.apache.spark.rdd.RDD 24 | import org.apache.spark.serializer.Serializer 25 | import org.apache.spark.shuffle.{BaseShuffleHandle, ShuffleHandle, ShuffleWriteProcessor} 26 | import org.apache.spark.sql.types.StructType 27 | 28 | class AuronShuffleDependency[K: ClassTag, V: ClassTag, C: ClassTag]( 29 | @transient private val _rdd: RDD[_ <: Product2[K, V]], 30 | override val partitioner: Partitioner, 31 | override val serializer: Serializer = SparkEnv.get.serializer, 32 | override val keyOrdering: Option[Ordering[K]] = None, 33 | override val aggregator: Option[Aggregator[K, V, C]] = None, 34 | override val mapSideCombine: Boolean = false, 35 | override val shuffleWriterProcessor: ShuffleWriteProcessor = new ShuffleWriteProcessor, 36 | val schema: StructType) 37 | extends ShuffleDependency[K, V, C]( 38 | _rdd, 39 | partitioner, 40 | serializer, 41 | keyOrdering, 42 | aggregator, 43 | mapSideCombine, 44 | shuffleWriterProcessor) {} 45 | 46 | object AuronShuffleDependency extends Logging { 47 | def isArrowShuffle(handle: ShuffleHandle): Boolean = { 48 | val base = handle.asInstanceOf[BaseShuffleHandle[_, _, _]] 49 | val dep = base.dependency 50 | dep.isInstanceOf[AuronShuffleDependency[_, _, _]] 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /native-engine/datafusion-ext-commons/src/arrow/array_size.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one or more 2 | // contributor license agreements. See the NOTICE file distributed with 3 | // this work for additional information regarding copyright ownership. 4 | // The ASF licenses this file to You under the Apache License, Version 2.0 5 | // (the "License"); you may not use this file except in compliance with 6 | // the License. You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | use arrow::{ 17 | array::{Array, ArrayData, StructArray}, 18 | buffer::Buffer, 19 | record_batch::RecordBatch, 20 | }; 21 | 22 | // NOTE: 23 | // the official Array::get_array_memory_size() use buffer.capacity() 24 | // which does not work on ffi arrays. we would like to use .len() 25 | // instead for more precise memory statistics. 26 | pub trait ArraySize { 27 | fn get_array_mem_size(&self) -> usize; 28 | } 29 | 30 | impl ArraySize for T { 31 | fn get_array_mem_size(&self) -> usize { 32 | get_array_data_mem_size(&self.to_data()) 33 | } 34 | } 35 | 36 | pub trait BatchSize { 37 | fn get_batch_mem_size(&self) -> usize; 38 | } 39 | 40 | impl BatchSize for RecordBatch { 41 | fn get_batch_mem_size(&self) -> usize { 42 | let as_struct = StructArray::from(self.clone()); 43 | let as_dyn_array: &dyn Array = &as_struct; 44 | as_dyn_array.get_array_mem_size() 45 | } 46 | } 47 | 48 | fn get_array_data_mem_size(array_data: &ArrayData) -> usize { 49 | let mut mem_size = 0; 50 | 51 | for buffer in array_data.buffers() { 52 | mem_size += size_of::() + buffer.len().max(buffer.capacity()); 53 | } 54 | 55 | mem_size += size_of::>(); 56 | mem_size += array_data 57 | .nulls() 58 | .map(|nb| nb.buffer().len().max(nb.buffer().capacity())) 59 | .unwrap_or_default(); 60 | 61 | // summing child data size 62 | for child in array_data.child_data() { 63 | mem_size += size_of::() + get_array_data_mem_size(child); 64 | } 65 | mem_size 66 | } 67 | -------------------------------------------------------------------------------- /thirdparty/auron-uniffle/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 18 | 19 | 4.0.0 20 | 21 | 22 | org.apache.auron 23 | auron-parent_${scalaVersion} 24 | ${project.version} 25 | ../../pom.xml 26 | 27 | 28 | auron-uniffle_${scalaVersion} 29 | jar 30 | Apache Auron Uniffle ${uniffleVersion} ${scalaVersion} 31 | 32 | 33 | 34 | org.apache.auron 35 | spark-extension-shims-spark_${scalaVersion} 36 | ${project.version} 37 | 38 | 39 | org.apache.uniffle 40 | rss-client-spark3-shaded 41 | ${uniffleVersion} 42 | provided 43 | 44 | 45 | org.apache.logging.log4j 46 | log4j-slf4j-impl 47 | 48 | 49 | 50 | 51 | org.apache.spark 52 | spark-core_${scalaVersion} 53 | provided 54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /native-engine/auron/src/http/memory_profiling.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one or more 2 | // contributor license agreements. See the NOTICE file distributed with 3 | // this work for additional information regarding copyright ownership. 4 | // The ASF licenses this file to You under the Apache License, Version 2.0 5 | // (the "License"); you may not use this file except in compliance with 6 | // the License. You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | use std::io::{Error, ErrorKind}; 17 | 18 | use log::error; 19 | use poem::{Request, RouteMethod, http::StatusCode}; 20 | 21 | use super::Handler; 22 | 23 | #[cfg(feature = "jemalloc-pprof")] 24 | #[cfg(not(target_env = "msvc"))] 25 | #[allow(non_upper_case_globals)] 26 | #[unsafe(export_name = "malloc_conf")] 27 | pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:19\0"; 28 | 29 | #[poem::handler] 30 | async fn jemalloc_pprof_handler(_: &Request) -> poem::Result> { 31 | let pprof = dump_prof() 32 | .await 33 | .map_err(|e| poem::Error::from_string(e.to_string(), StatusCode::INTERNAL_SERVER_ERROR))?; 34 | Ok(pprof) 35 | } 36 | 37 | async fn dump_prof() -> Result, Error> { 38 | let prof_ctl = jemalloc_pprof::PROF_CTL 39 | .as_ref() 40 | .ok_or_else(|| Error::new(ErrorKind::Other, "JemallocProfCtl not initialized"))?; 41 | let mut prof_ctl = prof_ctl.lock().await; 42 | let pprof = prof_ctl.dump_pprof().map_err(|err| { 43 | error!("Errors on jemalloc profile. err: {:?}", &err); 44 | Error::new(ErrorKind::Other, err) 45 | })?; 46 | Ok(pprof) 47 | } 48 | 49 | pub struct MemoryProfileHandler {} 50 | 51 | impl Default for MemoryProfileHandler { 52 | fn default() -> Self { 53 | MemoryProfileHandler {} 54 | } 55 | } 56 | 57 | impl Handler for MemoryProfileHandler { 58 | fn get_route_method(&self) -> RouteMethod { 59 | RouteMethod::new().get(jemalloc_pprof_handler) 60 | } 61 | 62 | fn get_route_path(&self) -> String { 63 | "/debug/memory/profile".to_string() 64 | } 65 | } 66 | --------------------------------------------------------------------------------