├── docs ├── img │ ├── .gitignore │ ├── app-type-e2e.png │ ├── ubuntu-icon-32.png │ ├── dotnetsparklogo-6.png │ ├── windows-icon-32.png │ ├── databricks-set-jar.PNG │ ├── spark-dot-net-logo.PNG │ ├── app-type-getting-started.png │ ├── .diagrams-source │ │ ├── nuget-logo.png │ │ ├── scala-logo.png │ │ ├── spark-logo.png │ │ └── dotnet-logo.png │ ├── HDI-Spark-Notebooks │ │ ├── run-udf.png │ │ ├── create-df.png │ │ ├── select-host.png │ │ ├── stop-server.png │ │ ├── select-ambari.png │ │ ├── spark-configs.png │ │ ├── add-sparkconfig.png │ │ ├── restart-affected.png │ │ ├── select-livyserver.png │ │ ├── start-sparksession.png │ │ ├── create-sparkdotnet-notebook.png │ │ └── install-notebook-scriptaction.png │ ├── deployment-hdi-action-script.png │ ├── deployment-databricks-init-script.PNG │ ├── Spark-dotnet-sequence-diagram-simple.png │ ├── Spark-dotnet-sequence-diagram-udf-data.png │ └── Spark-dotnet-integration-component-diagram.png ├── features.md ├── coding-guidelines │ └── scala-coding-style.md ├── contributing.md └── release-notes │ └── 0.6 │ └── release-0.6.md ├── eng ├── common │ ├── cross │ │ ├── riscv64 │ │ │ └── sources.list.sid │ │ ├── arm │ │ │ ├── sources.list.jessie │ │ │ ├── tizen │ │ │ │ └── tizen.patch │ │ │ ├── sources.list.focal │ │ │ ├── sources.list.jammy │ │ │ ├── sources.list.zesty │ │ │ ├── sources.list.bionic │ │ │ └── sources.list.xenial │ │ ├── armel │ │ │ ├── sources.list.jessie │ │ │ └── tizen │ │ │ │ └── tizen.patch │ │ ├── armv6 │ │ │ └── sources.list.buster │ │ ├── arm64 │ │ │ ├── sources.list.buster │ │ │ ├── tizen │ │ │ │ └── tizen.patch │ │ │ ├── sources.list.stretch │ │ │ ├── sources.list.focal │ │ │ ├── sources.list.jammy │ │ │ ├── sources.list.zesty │ │ │ ├── sources.list.bionic │ │ │ └── sources.list.xenial │ │ ├── s390x │ │ │ └── sources.list.bionic │ │ └── ppc64le │ │ │ └── sources.list.bionic │ ├── BuildConfiguration │ │ └── build-configuration.json │ ├── dotnet-install.cmd │ ├── CIBuild.cmd │ ├── sdl │ │ ├── packages.config │ │ └── NuGet.config │ ├── init-tools-native.cmd │ ├── templates │ │ ├── steps │ │ │ ├── run-on-unix.yml │ │ │ ├── run-on-windows.yml │ │ │ ├── add-build-to-channel.yml │ │ │ ├── build-reason.yml │ │ │ ├── component-governance.yml │ │ │ ├── publish-logs.yml │ │ │ ├── enable-internal-runtimes.yml │ │ │ ├── run-script-ifequalelse.yml │ │ │ └── retain-build.yml │ │ ├── variables │ │ │ └── sdl-variables.yml │ │ ├── post-build │ │ │ ├── trigger-subscription.yml │ │ │ └── common-variables.yml │ │ └── jobs │ │ │ └── codeql-build.yml │ ├── internal │ │ ├── Directory.Build.props │ │ └── NuGet.config │ ├── templates-official │ │ ├── variables │ │ │ └── sdl-variables.yml │ │ ├── steps │ │ │ ├── add-build-to-channel.yml │ │ │ ├── build-reason.yml │ │ │ ├── component-governance.yml │ │ │ ├── publish-logs.yml │ │ │ ├── enable-internal-runtimes.yml │ │ │ └── retain-build.yml │ │ └── post-build │ │ │ ├── trigger-subscription.yml │ │ │ └── common-variables.yml │ ├── PSScriptAnalyzerSettings.psd1 │ ├── cibuild.sh │ ├── enable-cross-org-publishing.ps1 │ ├── msbuild.ps1 │ ├── generate-sbom-prep.ps1 │ ├── helixpublish.proj │ ├── dotnet-install.ps1 │ └── post-build │ │ └── nuget-validation.ps1 ├── Versions.props ├── Build.props ├── Version.Details.xml ├── Publishing.props ├── Signing.props ├── PackageWorker.proj └── AfterSolutionBuild.targets ├── src ├── csharp │ ├── Microsoft.Spark.E2ETest │ │ ├── Resources │ │ │ ├── people.txt │ │ │ ├── people.csv │ │ │ ├── users.orc │ │ │ ├── archive.zip │ │ │ ├── users.parquet │ │ │ ├── employees.json │ │ │ ├── more_people.json │ │ │ ├── log4j.properties │ │ │ └── people.json │ │ ├── IpcTests │ │ │ ├── SparkFilesTests.cs │ │ │ ├── Sql │ │ │ │ └── Streaming │ │ │ │ │ └── TriggerTests.cs │ │ │ ├── SerDeTests.cs │ │ │ ├── JvmBridgeTests.cs │ │ │ └── ML │ │ │ │ └── Param │ │ │ │ └── ParamTests.cs │ │ ├── Microsoft.Spark.E2ETest.csproj │ │ ├── TestEnvironment.cs │ │ └── Utils │ │ │ └── SQLUtils.cs │ ├── Microsoft.Spark.E2ETest.ExternalLibrary │ │ ├── Microsoft.Spark.E2ETest.ExternalLibrary.csproj │ │ └── ExternalClass.cs │ ├── Microsoft.Spark │ │ ├── build │ │ │ └── netstandard2.0 │ │ │ │ └── Microsoft.Spark.targets │ │ ├── Constants.cs │ │ ├── Interop │ │ │ ├── Ipc │ │ │ │ ├── IJvmBridgeFactory.cs │ │ │ │ ├── JvmBridgeFactory.cs │ │ │ │ ├── ICallbackHandler.cs │ │ │ │ └── ForeachBatchCallbackHandler.cs │ │ │ └── Internal │ │ │ │ └── Dotnet │ │ │ │ └── ArrayExtensions.cs │ │ ├── ML │ │ │ ├── Util │ │ │ │ └── Identifiable.cs │ │ │ └── Param │ │ │ │ └── ParamPair.cs │ │ ├── JvmException.cs │ │ ├── Utils │ │ │ ├── CollectionUtils.cs │ │ │ └── EnvironmentUtils.cs │ │ ├── Sql │ │ │ ├── Streaming │ │ │ │ └── StreamingQueryException.cs │ │ │ ├── ArrowGroupedMapUdfWrapper.cs │ │ │ └── DataFrameGroupedMapUdfWrapper.cs │ │ ├── Hadoop │ │ │ └── Conf │ │ │ │ └── Configuration.cs │ │ ├── Versions.cs │ │ └── Network │ │ │ └── SocketFactory.cs │ ├── Directory.Build.targets │ ├── Extensions │ │ ├── Microsoft.Spark.Extensions.Hyperspace │ │ │ ├── Microsoft.Spark.Extensions.Hyperspace.csproj │ │ │ ├── HyperspaceVersions.cs │ │ │ └── Attributes.cs │ │ ├── Microsoft.Spark.Extensions.Delta.E2ETest │ │ │ ├── Constants.cs │ │ │ └── Microsoft.Spark.Extensions.Delta.E2ETest.csproj │ │ ├── Microsoft.Spark.Extensions.Hyperspace.E2ETest │ │ │ ├── Constants.cs │ │ │ └── Microsoft.Spark.Extensions.Hyperspace.E2ETest.csproj │ │ ├── Microsoft.Spark.Extensions.DotNet.Interactive │ │ │ └── ResolvedNugetPackage.cs │ │ ├── Microsoft.Spark.Extensions.Delta │ │ │ ├── Microsoft.Spark.Extensions.Delta.csproj │ │ │ ├── DeltaLakeVersions.cs │ │ │ └── Attributes.cs │ │ ├── README.md │ │ └── Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest │ │ │ └── Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest.csproj │ ├── Microsoft.Spark.UnitTest │ │ ├── Microsoft.Spark.UnitTest.csproj │ │ ├── CollectionUtilsTests.cs │ │ └── TestUtils │ │ │ └── XunitConsoleOutHelper.cs │ ├── Microsoft.Spark.Worker │ │ ├── Utils │ │ │ ├── DateTimeExtension.cs │ │ │ └── SettingUtils.cs │ │ └── Microsoft.Spark.Worker.csproj │ └── Microsoft.Spark.Worker.UnitTest │ │ └── Microsoft.Spark.Worker.UnitTest.csproj └── scala │ ├── microsoft-spark-2-4 │ └── src │ │ ├── main │ │ └── scala │ │ │ └── org │ │ │ └── apache │ │ │ └── spark │ │ │ ├── api │ │ │ └── dotnet │ │ │ │ ├── DotnetException.scala │ │ │ │ ├── JvmBridgeUtils.scala │ │ │ │ └── DotnetRDD.scala │ │ │ ├── deploy │ │ │ └── dotnet │ │ │ │ └── DotNetUserAppException.scala │ │ │ ├── mllib │ │ │ └── api │ │ │ │ └── dotnet │ │ │ │ └── MLUtils.scala │ │ │ ├── internal │ │ │ └── config │ │ │ │ └── dotnet │ │ │ │ └── Dotnet.scala │ │ │ └── sql │ │ │ ├── test │ │ │ └── TestUtils.scala │ │ │ └── api │ │ │ └── dotnet │ │ │ └── SQLUtils.scala │ │ └── test │ │ └── scala │ │ └── org │ │ └── apache │ │ └── spark │ │ └── api │ │ └── dotnet │ │ ├── Extensions.scala │ │ ├── JVMObjectTrackerTest.scala │ │ └── DotnetBackendTest.scala │ ├── microsoft-spark-3-0 │ └── src │ │ ├── main │ │ └── scala │ │ │ └── org │ │ │ └── apache │ │ │ └── spark │ │ │ ├── api │ │ │ └── dotnet │ │ │ │ ├── DotnetException.scala │ │ │ │ ├── JvmBridgeUtils.scala │ │ │ │ └── DotnetRDD.scala │ │ │ ├── deploy │ │ │ └── dotnet │ │ │ │ └── DotNetUserAppException.scala │ │ │ ├── mllib │ │ │ └── api │ │ │ │ └── dotnet │ │ │ │ └── MLUtils.scala │ │ │ ├── internal │ │ │ └── config │ │ │ │ └── dotnet │ │ │ │ └── Dotnet.scala │ │ │ └── sql │ │ │ ├── test │ │ │ └── TestUtils.scala │ │ │ └── api │ │ │ └── dotnet │ │ │ ├── SQLUtils.scala │ │ │ └── DotnetForeachBatch.scala │ │ └── test │ │ └── scala │ │ └── org │ │ └── apache │ │ └── spark │ │ └── api │ │ └── dotnet │ │ ├── Extensions.scala │ │ ├── DotnetBackendTest.scala │ │ └── JVMObjectTrackerTest.scala │ ├── microsoft-spark-3-1 │ └── src │ │ ├── main │ │ └── scala │ │ │ └── org │ │ │ └── apache │ │ │ └── spark │ │ │ ├── api │ │ │ └── dotnet │ │ │ │ ├── DotnetException.scala │ │ │ │ ├── JvmBridgeUtils.scala │ │ │ │ └── DotnetRDD.scala │ │ │ ├── deploy │ │ │ └── dotnet │ │ │ │ └── DotNetUserAppException.scala │ │ │ ├── mllib │ │ │ └── api │ │ │ │ └── dotnet │ │ │ │ └── MLUtils.scala │ │ │ ├── internal │ │ │ └── config │ │ │ │ └── dotnet │ │ │ │ └── Dotnet.scala │ │ │ └── sql │ │ │ ├── test │ │ │ └── TestUtils.scala │ │ │ └── api │ │ │ └── dotnet │ │ │ ├── SQLUtils.scala │ │ │ └── DotnetForeachBatch.scala │ │ └── test │ │ └── scala │ │ └── org │ │ └── apache │ │ └── spark │ │ └── api │ │ └── dotnet │ │ ├── Extensions.scala │ │ ├── DotnetBackendTest.scala │ │ └── JVMObjectTrackerTest.scala │ ├── microsoft-spark-3-2 │ └── src │ │ ├── main │ │ └── scala │ │ │ └── org │ │ │ └── apache │ │ │ └── spark │ │ │ ├── api │ │ │ └── dotnet │ │ │ │ ├── DotnetException.scala │ │ │ │ ├── JvmBridgeUtils.scala │ │ │ │ └── DotnetRDD.scala │ │ │ ├── deploy │ │ │ └── dotnet │ │ │ │ └── DotNetUserAppException.scala │ │ │ ├── mllib │ │ │ └── api │ │ │ │ └── dotnet │ │ │ │ └── MLUtils.scala │ │ │ ├── internal │ │ │ └── config │ │ │ │ └── dotnet │ │ │ │ └── Dotnet.scala │ │ │ └── sql │ │ │ ├── test │ │ │ └── TestUtils.scala │ │ │ └── api │ │ │ └── dotnet │ │ │ ├── SQLUtils.scala │ │ │ └── DotnetForeachBatch.scala │ │ └── test │ │ └── scala │ │ └── org │ │ └── apache │ │ └── spark │ │ └── api │ │ └── dotnet │ │ ├── Extensions.scala │ │ ├── DotnetBackendTest.scala │ │ └── JVMObjectTrackerTest.scala │ ├── microsoft-spark-3-3 │ └── src │ │ ├── main │ │ └── scala │ │ │ └── org │ │ │ └── apache │ │ │ └── spark │ │ │ ├── api │ │ │ └── dotnet │ │ │ │ ├── DotnetException.scala │ │ │ │ ├── JvmBridgeUtils.scala │ │ │ │ └── DotnetRDD.scala │ │ │ ├── deploy │ │ │ └── dotnet │ │ │ │ └── DotNetUserAppException.scala │ │ │ ├── mllib │ │ │ └── api │ │ │ │ └── dotnet │ │ │ │ └── MLUtils.scala │ │ │ ├── internal │ │ │ └── config │ │ │ │ └── dotnet │ │ │ │ └── Dotnet.scala │ │ │ └── sql │ │ │ ├── test │ │ │ └── TestUtils.scala │ │ │ └── api │ │ │ └── dotnet │ │ │ ├── SQLUtils.scala │ │ │ └── DotnetForeachBatch.scala │ │ └── test │ │ └── scala │ │ └── org │ │ └── apache │ │ └── spark │ │ └── api │ │ └── dotnet │ │ ├── Extensions.scala │ │ ├── DotnetBackendTest.scala │ │ └── JVMObjectTrackerTest.scala │ ├── microsoft-spark-3-4 │ └── src │ │ ├── main │ │ └── scala │ │ │ └── org │ │ │ └── apache │ │ │ └── spark │ │ │ ├── api │ │ │ └── dotnet │ │ │ │ ├── DotnetException.scala │ │ │ │ ├── JvmBridgeUtils.scala │ │ │ │ └── DotnetRDD.scala │ │ │ ├── deploy │ │ │ └── dotnet │ │ │ │ └── DotNetUserAppException.scala │ │ │ ├── mllib │ │ │ └── api │ │ │ │ └── dotnet │ │ │ │ └── MLUtils.scala │ │ │ ├── internal │ │ │ └── config │ │ │ │ └── dotnet │ │ │ │ └── Dotnet.scala │ │ │ └── sql │ │ │ └── test │ │ │ └── TestUtils.scala │ │ └── test │ │ └── scala │ │ └── org │ │ └── apache │ │ └── spark │ │ └── api │ │ └── dotnet │ │ ├── Extensions.scala │ │ ├── DotnetBackendTest.scala │ │ └── JVMObjectTrackerTest.scala │ ├── microsoft-spark-3-5 │ └── src │ │ ├── main │ │ └── scala │ │ │ └── org │ │ │ └── apache │ │ │ └── spark │ │ │ ├── api │ │ │ └── dotnet │ │ │ │ ├── DotnetException.scala │ │ │ │ ├── JvmBridgeUtils.scala │ │ │ │ └── DotnetRDD.scala │ │ │ ├── deploy │ │ │ └── dotnet │ │ │ │ └── DotNetUserAppException.scala │ │ │ ├── mllib │ │ │ └── api │ │ │ │ └── dotnet │ │ │ │ └── MLUtils.scala │ │ │ ├── internal │ │ │ └── config │ │ │ │ └── dotnet │ │ │ │ └── Dotnet.scala │ │ │ └── sql │ │ │ └── test │ │ │ └── TestUtils.scala │ │ └── test │ │ └── scala │ │ └── org │ │ └── apache │ │ └── spark │ │ └── api │ │ └── dotnet │ │ ├── Extensions.scala │ │ ├── DotnetBackendTest.scala │ │ └── JVMObjectTrackerTest.scala │ └── pom.xml ├── examples ├── Directory.Build.targets ├── Microsoft.Spark.CSharp.Examples │ ├── MachineLearning │ │ └── Sentiment │ │ │ └── Resources │ │ │ └── MLModel.zip │ ├── IExample.cs │ ├── Microsoft.Spark.CSharp.Examples.csproj │ └── Sql │ │ └── README.md ├── Microsoft.Spark.FSharp.Examples │ ├── IExample.fs │ └── Microsoft.Spark.FSharp.Examples.fsproj ├── Directory.Build.props └── README.md ├── benchmark ├── csharp │ ├── Directory.Build.targets │ ├── Directory.Build.props │ └── Tpch │ │ ├── StringExtensions.cs │ │ ├── Tpch.csproj │ │ ├── VectorDataFrameFunctions.cs │ │ └── TpchBase.cs ├── run_scala_benchmark.sh ├── run_python_benchmark.sh ├── run_csharp_benchmark.sh ├── python │ └── tpch_base.py └── scala │ └── src │ └── main │ └── scala │ └── com │ └── microsoft │ └── tpch │ └── TpchBase.scala ├── CODE_OF_CONDUCT.md ├── global.json ├── .gitattributes ├── THIRD-PARTY-NOTICES.TXT ├── dev └── .scalafmt.conf ├── .config └── 1espt │ └── PipelineAutobaseliningConfig.yml ├── NuGet.config ├── PULL_REQUEST_TEMPLATE.md ├── .github └── ISSUE_TEMPLATE │ ├── feature_request.md │ └── bug_report.md ├── script └── package-worker.ps1 └── LICENSE /docs/img/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /docs/features.md: -------------------------------------------------------------------------------- 1 | # Features 2 | -------------------------------------------------------------------------------- /eng/common/cross/riscv64/sources.list.sid: -------------------------------------------------------------------------------- 1 | deb http://deb.debian.org/debian-ports sid main 2 | -------------------------------------------------------------------------------- /docs/img/app-type-e2e.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dotnet/spark/HEAD/docs/img/app-type-e2e.png -------------------------------------------------------------------------------- /docs/img/ubuntu-icon-32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dotnet/spark/HEAD/docs/img/ubuntu-icon-32.png -------------------------------------------------------------------------------- /src/csharp/Microsoft.Spark.E2ETest/Resources/people.txt: -------------------------------------------------------------------------------- 1 | Michael, 29 2 | Andy, 30 3 | Justin, 19 4 | -------------------------------------------------------------------------------- /docs/img/dotnetsparklogo-6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dotnet/spark/HEAD/docs/img/dotnetsparklogo-6.png -------------------------------------------------------------------------------- /docs/img/windows-icon-32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dotnet/spark/HEAD/docs/img/windows-icon-32.png -------------------------------------------------------------------------------- /docs/img/databricks-set-jar.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dotnet/spark/HEAD/docs/img/databricks-set-jar.PNG -------------------------------------------------------------------------------- /docs/img/spark-dot-net-logo.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dotnet/spark/HEAD/docs/img/spark-dot-net-logo.PNG -------------------------------------------------------------------------------- /src/csharp/Microsoft.Spark.E2ETest/Resources/people.csv: -------------------------------------------------------------------------------- 1 | name;age;job 2 | Jorge;30;Developer 3 | Bob;32;Developer 4 | -------------------------------------------------------------------------------- /docs/img/app-type-getting-started.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dotnet/spark/HEAD/docs/img/app-type-getting-started.png -------------------------------------------------------------------------------- /docs/img/.diagrams-source/nuget-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dotnet/spark/HEAD/docs/img/.diagrams-source/nuget-logo.png -------------------------------------------------------------------------------- /docs/img/.diagrams-source/scala-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dotnet/spark/HEAD/docs/img/.diagrams-source/scala-logo.png -------------------------------------------------------------------------------- /docs/img/.diagrams-source/spark-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dotnet/spark/HEAD/docs/img/.diagrams-source/spark-logo.png -------------------------------------------------------------------------------- /docs/img/HDI-Spark-Notebooks/run-udf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dotnet/spark/HEAD/docs/img/HDI-Spark-Notebooks/run-udf.png -------------------------------------------------------------------------------- /eng/common/BuildConfiguration/build-configuration.json: -------------------------------------------------------------------------------- 1 | { 2 | "RetryCountLimit": 1, 3 | "RetryByAnyError": false 4 | } 5 | -------------------------------------------------------------------------------- /docs/img/.diagrams-source/dotnet-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dotnet/spark/HEAD/docs/img/.diagrams-source/dotnet-logo.png -------------------------------------------------------------------------------- /docs/img/HDI-Spark-Notebooks/create-df.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dotnet/spark/HEAD/docs/img/HDI-Spark-Notebooks/create-df.png -------------------------------------------------------------------------------- /docs/img/deployment-hdi-action-script.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dotnet/spark/HEAD/docs/img/deployment-hdi-action-script.png -------------------------------------------------------------------------------- /docs/img/HDI-Spark-Notebooks/select-host.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dotnet/spark/HEAD/docs/img/HDI-Spark-Notebooks/select-host.png -------------------------------------------------------------------------------- /docs/img/HDI-Spark-Notebooks/stop-server.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dotnet/spark/HEAD/docs/img/HDI-Spark-Notebooks/stop-server.png -------------------------------------------------------------------------------- /eng/common/dotnet-install.cmd: -------------------------------------------------------------------------------- 1 | @echo off 2 | powershell -ExecutionPolicy ByPass -NoProfile -command "& """%~dp0dotnet-install.ps1""" %*" -------------------------------------------------------------------------------- /docs/img/HDI-Spark-Notebooks/select-ambari.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dotnet/spark/HEAD/docs/img/HDI-Spark-Notebooks/select-ambari.png -------------------------------------------------------------------------------- /docs/img/HDI-Spark-Notebooks/spark-configs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dotnet/spark/HEAD/docs/img/HDI-Spark-Notebooks/spark-configs.png -------------------------------------------------------------------------------- /docs/img/deployment-databricks-init-script.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dotnet/spark/HEAD/docs/img/deployment-databricks-init-script.PNG -------------------------------------------------------------------------------- /docs/img/HDI-Spark-Notebooks/add-sparkconfig.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dotnet/spark/HEAD/docs/img/HDI-Spark-Notebooks/add-sparkconfig.png -------------------------------------------------------------------------------- /docs/img/HDI-Spark-Notebooks/restart-affected.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dotnet/spark/HEAD/docs/img/HDI-Spark-Notebooks/restart-affected.png -------------------------------------------------------------------------------- /docs/img/HDI-Spark-Notebooks/select-livyserver.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dotnet/spark/HEAD/docs/img/HDI-Spark-Notebooks/select-livyserver.png -------------------------------------------------------------------------------- /docs/img/Spark-dotnet-sequence-diagram-simple.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dotnet/spark/HEAD/docs/img/Spark-dotnet-sequence-diagram-simple.png -------------------------------------------------------------------------------- /docs/img/HDI-Spark-Notebooks/start-sparksession.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dotnet/spark/HEAD/docs/img/HDI-Spark-Notebooks/start-sparksession.png -------------------------------------------------------------------------------- /docs/img/Spark-dotnet-sequence-diagram-udf-data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dotnet/spark/HEAD/docs/img/Spark-dotnet-sequence-diagram-udf-data.png -------------------------------------------------------------------------------- /docs/img/Spark-dotnet-integration-component-diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dotnet/spark/HEAD/docs/img/Spark-dotnet-integration-component-diagram.png -------------------------------------------------------------------------------- /src/csharp/Microsoft.Spark.E2ETest/Resources/users.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dotnet/spark/HEAD/src/csharp/Microsoft.Spark.E2ETest/Resources/users.orc -------------------------------------------------------------------------------- /src/csharp/Microsoft.Spark.E2ETest/Resources/archive.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dotnet/spark/HEAD/src/csharp/Microsoft.Spark.E2ETest/Resources/archive.zip -------------------------------------------------------------------------------- /docs/img/HDI-Spark-Notebooks/create-sparkdotnet-notebook.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dotnet/spark/HEAD/docs/img/HDI-Spark-Notebooks/create-sparkdotnet-notebook.png -------------------------------------------------------------------------------- /eng/common/CIBuild.cmd: -------------------------------------------------------------------------------- 1 | @echo off 2 | powershell -ExecutionPolicy ByPass -NoProfile -command "& """%~dp0Build.ps1""" -restore -build -test -sign -pack -publish -ci %*" -------------------------------------------------------------------------------- /eng/common/sdl/packages.config: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /src/csharp/Microsoft.Spark.E2ETest/Resources/users.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dotnet/spark/HEAD/src/csharp/Microsoft.Spark.E2ETest/Resources/users.parquet -------------------------------------------------------------------------------- /docs/img/HDI-Spark-Notebooks/install-notebook-scriptaction.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dotnet/spark/HEAD/docs/img/HDI-Spark-Notebooks/install-notebook-scriptaction.png -------------------------------------------------------------------------------- /eng/Versions.props: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 2.3.0 5 | 6 | 7 | -------------------------------------------------------------------------------- /eng/common/init-tools-native.cmd: -------------------------------------------------------------------------------- 1 | @echo off 2 | powershell -NoProfile -NoLogo -ExecutionPolicy ByPass -command "& """%~dp0init-tools-native.ps1""" %*" 3 | exit /b %ErrorLevel% -------------------------------------------------------------------------------- /examples/Directory.Build.targets: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /benchmark/csharp/Directory.Build.targets: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /eng/common/templates/steps/run-on-unix.yml: -------------------------------------------------------------------------------- 1 | parameters: 2 | agentOs: '' 3 | steps: [] 4 | 5 | steps: 6 | - ${{ if ne(parameters.agentOs, 'Windows_NT') }}: 7 | - ${{ parameters.steps }} 8 | -------------------------------------------------------------------------------- /eng/common/templates/steps/run-on-windows.yml: -------------------------------------------------------------------------------- 1 | parameters: 2 | agentOs: '' 3 | steps: [] 4 | 5 | steps: 6 | - ${{ if eq(parameters.agentOs, 'Windows_NT') }}: 7 | - ${{ parameters.steps }} 8 | -------------------------------------------------------------------------------- /eng/common/cross/arm/sources.list.jessie: -------------------------------------------------------------------------------- 1 | # Debian (sid) # UNSTABLE 2 | deb http://ftp.debian.org/debian/ sid main contrib non-free 3 | deb-src http://ftp.debian.org/debian/ sid main contrib non-free 4 | -------------------------------------------------------------------------------- /src/csharp/Microsoft.Spark.E2ETest/Resources/employees.json: -------------------------------------------------------------------------------- 1 | {"name":"Michael", "salary":3000} 2 | {"name":"Andy", "salary":4500} 3 | {"name":"Justin", "salary":3500} 4 | {"name":"Berta", "salary":4000} 5 | -------------------------------------------------------------------------------- /src/csharp/Microsoft.Spark.E2ETest/Resources/more_people.json: -------------------------------------------------------------------------------- 1 | {"name":"Michael"} 2 | {"name":"Andy", "age":30} 3 | {"name":"Seth", "age":30} 4 | {"name":"Justin", "age":19} 5 | {"name":"Kathy", "age":19} 6 | -------------------------------------------------------------------------------- /eng/common/cross/armel/sources.list.jessie: -------------------------------------------------------------------------------- 1 | # Debian (jessie) # Stable 2 | deb http://ftp.debian.org/debian/ jessie main contrib non-free 3 | deb-src http://ftp.debian.org/debian/ jessie main contrib non-free 4 | -------------------------------------------------------------------------------- /eng/common/cross/armv6/sources.list.buster: -------------------------------------------------------------------------------- 1 | deb http://raspbian.raspberrypi.org/raspbian/ buster main contrib non-free rpi 2 | deb-src http://raspbian.raspberrypi.org/raspbian/ buster main contrib non-free rpi 3 | -------------------------------------------------------------------------------- /examples/Microsoft.Spark.CSharp.Examples/MachineLearning/Sentiment/Resources/MLModel.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dotnet/spark/HEAD/examples/Microsoft.Spark.CSharp.Examples/MachineLearning/Sentiment/Resources/MLModel.zip -------------------------------------------------------------------------------- /eng/common/internal/Directory.Build.props: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | This project has adopted the code of conduct defined by the [Contributor Covenant](https://contributor-covenant.org/) to clarify expected behavior in our community. 2 | For more information, see the [.NET Foundation Code of Conduct](https://dotnetfoundation.org/code-of-conduct). 3 | -------------------------------------------------------------------------------- /src/csharp/Microsoft.Spark.E2ETest.ExternalLibrary/Microsoft.Spark.E2ETest.ExternalLibrary.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | netstandard2.0 5 | false 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /eng/Build.props: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /eng/common/internal/NuGet.config: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /src/csharp/Microsoft.Spark.E2ETest/Resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Set everything to be logged to the console 2 | log4j.rootCategory=ERROR,console 3 | # Use NullAppender for E2E testing. There is a deadlock issue using ConsoleAppender when the JVM process is launched from the C# process. 4 | log4j.appender.console=org.apache.log4j.varia.NullAppender 5 | -------------------------------------------------------------------------------- /eng/common/templates/variables/sdl-variables.yml: -------------------------------------------------------------------------------- 1 | variables: 2 | # The Guardian version specified in 'eng/common/sdl/packages.config'. This value must be kept in 3 | # sync with the packages.config file. 4 | - name: DefaultGuardianVersion 5 | value: 0.109.0 6 | - name: GuardianPackagesConfigFile 7 | value: $(Build.SourcesDirectory)\eng\common\sdl\packages.config -------------------------------------------------------------------------------- /src/csharp/Microsoft.Spark/build/netstandard2.0/Microsoft.Spark.targets: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | PreserveNewest 5 | 6 | 7 | -------------------------------------------------------------------------------- /eng/common/templates-official/variables/sdl-variables.yml: -------------------------------------------------------------------------------- 1 | variables: 2 | # The Guardian version specified in 'eng/common/sdl/packages.config'. This value must be kept in 3 | # sync with the packages.config file. 4 | - name: DefaultGuardianVersion 5 | value: 0.109.0 6 | - name: GuardianPackagesConfigFile 7 | value: $(Build.SourcesDirectory)\eng\common\sdl\packages.config -------------------------------------------------------------------------------- /global.json: -------------------------------------------------------------------------------- 1 | { 2 | "sdk": { 3 | "version": "8.0.404", 4 | "rollForward": "latestFeature" 5 | }, 6 | "tools": { 7 | "dotnet": "8.0.404" 8 | }, 9 | "msbuild-sdks": { 10 | "Microsoft.DotNet.Arcade.Sdk": "8.0.0-beta.24359.3", 11 | "Microsoft.DotNet.Helix.Sdk": "8.0.0-beta.24359.3", 12 | "Microsoft.Build.NoTargets": "3.7.0" 13 | } 14 | } -------------------------------------------------------------------------------- /examples/Microsoft.Spark.FSharp.Examples/IExample.fs: -------------------------------------------------------------------------------- 1 | // Licensed to the .NET Foundation under one or more agreements. 2 | // The .NET Foundation licenses this file to you under the MIT license. 3 | // See the LICENSE file in the project root for more information. 4 | 5 | namespace Microsoft.Spark.Examples 6 | 7 | type IExample = 8 | abstract member Run : string[] -> int 9 | -------------------------------------------------------------------------------- /examples/Directory.Build.props: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | false 7 | 8 | 9 | -------------------------------------------------------------------------------- /benchmark/csharp/Directory.Build.props: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | false 7 | 8 | 9 | -------------------------------------------------------------------------------- /examples/Microsoft.Spark.CSharp.Examples/IExample.cs: -------------------------------------------------------------------------------- 1 | // Licensed to the .NET Foundation under one or more agreements. 2 | // The .NET Foundation licenses this file to you under the MIT license. 3 | // See the LICENSE file in the project root for more information. 4 | 5 | namespace Microsoft.Spark.Examples 6 | { 7 | internal interface IExample 8 | { 9 | void Run(string[] args); 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Set default behavior to automatically normalize line endings. 3 | ############################################################################### 4 | * text=auto 5 | 6 | # Force bash scripts to always use lf line endings so that if a repo is accessed 7 | # in Unix via a file share from Windows, the scripts will work. 8 | *.sh text eol=lf 9 | -------------------------------------------------------------------------------- /THIRD-PARTY-NOTICES.TXT: -------------------------------------------------------------------------------- 1 | .NET for Apache Spark uses third-party libraries or other resources that may be 2 | distributed under licenses different than the .NET for Apache Spark software. 3 | 4 | In the event that we accidentally failed to list a required notice, please 5 | bring it to our attention. Post an issue or email us: 6 | 7 | dotnet@microsoft.com 8 | 9 | The attached notices are provided for information only. 10 | -------------------------------------------------------------------------------- /src/csharp/Microsoft.Spark/Constants.cs: -------------------------------------------------------------------------------- 1 | // Licensed to the .NET Foundation under one or more agreements. 2 | // The .NET Foundation licenses this file to you under the MIT license. 3 | // See the LICENSE file in the project root for more information. 4 | 5 | namespace Microsoft.Spark 6 | { 7 | internal class Constants 8 | { 9 | internal const string RunningREPLEnvVar = "DOTNET_SPARK_RUNNING_REPL"; 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /src/csharp/Microsoft.Spark/Interop/Ipc/IJvmBridgeFactory.cs: -------------------------------------------------------------------------------- 1 | // Licensed to the .NET Foundation under one or more agreements. 2 | // The .NET Foundation licenses this file to you under the MIT license. 3 | // See the LICENSE file in the project root for more information. 4 | 5 | namespace Microsoft.Spark.Interop.Ipc 6 | { 7 | internal interface IJvmBridgeFactory 8 | { 9 | IJvmBridge Create(int portNumber); 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /docs/coding-guidelines/scala-coding-style.md: -------------------------------------------------------------------------------- 1 | Scala Coding Style 2 | =============== 3 | 4 | * For Scala code, we follow the official [Scala style guide](https://docs.scala-lang.org/style/). 5 | * For formatting, [scalafmt](https://scalameta.org/scalafmt) is used with the custom configuration (found in [/dev/.scalafmt.conf](/dev/.scalafmt.conf)) 6 | * Installation of `scalafmt` can be found [here](https://scalameta.org/scalafmt/docs/installation.html) 7 | -------------------------------------------------------------------------------- /src/csharp/Directory.Build.targets: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | all 7 | runtime; build; native; contentfiles; analyzers 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /dev/.scalafmt.conf: -------------------------------------------------------------------------------- 1 | # The following configs are taken from https://github.com/apache/spark/blob/master/dev/.scalafmt.conf 2 | align = none 3 | align.openParenDefnSite = false 4 | align.openParenCallSite = false 5 | align.tokens = [] 6 | optIn = { 7 | configStyleArguments = false 8 | } 9 | danglingParentheses = false 10 | docstrings = JavaDoc 11 | maxColumn = 98 12 | 13 | # The following are specific to donet/spark. 14 | importSelectors = singleLine 15 | -------------------------------------------------------------------------------- /src/csharp/Microsoft.Spark.E2ETest/Resources/people.json: -------------------------------------------------------------------------------- 1 | {"name":"Michael", "ids":[1], "info1":{"city":"Burdwan"}, "info2":{"state":"Paschimbanga"}, "info3":{"company":{"job":"Developer"}}}" 2 | {"name":"Andy", "age":30, "ids":[3,5], "info1":{"city":"Los Angeles"}, "info2":{"state":"California"}, "info3":{"company":{"job":"Developer"}}} 3 | {"name":"Justin", "age":19, "ids":[2,4], "info1":{"city":"Seattle"}, "info2":{"state":"Washington"}, "info3":{"company":{"job":"Developer"}}} 4 | -------------------------------------------------------------------------------- /eng/Version.Details.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | https://github.com/dotnet/arcade 8 | db87887481d4110c09a1004191002482fdd7e4f2 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /src/csharp/Extensions/Microsoft.Spark.Extensions.Hyperspace/Microsoft.Spark.Extensions.Hyperspace.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | netstandard2.0;netstandard2.1 5 | true 6 | true 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /src/csharp/Microsoft.Spark.UnitTest/Microsoft.Spark.UnitTest.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | net8.0 5 | Microsoft.Spark.UnitTest 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /eng/common/templates/steps/add-build-to-channel.yml: -------------------------------------------------------------------------------- 1 | parameters: 2 | ChannelId: 0 3 | 4 | steps: 5 | - task: PowerShell@2 6 | displayName: Add Build to Channel 7 | inputs: 8 | filePath: $(Build.SourcesDirectory)/eng/common/post-build/add-build-to-channel.ps1 9 | arguments: -BuildId $(BARBuildId) 10 | -ChannelId ${{ parameters.ChannelId }} 11 | -MaestroApiAccessToken $(MaestroApiAccessToken) 12 | -MaestroApiEndPoint $(MaestroApiEndPoint) 13 | -MaestroApiVersion $(MaestroApiVersion) 14 | -------------------------------------------------------------------------------- /eng/common/templates-official/steps/add-build-to-channel.yml: -------------------------------------------------------------------------------- 1 | parameters: 2 | ChannelId: 0 3 | 4 | steps: 5 | - task: PowerShell@2 6 | displayName: Add Build to Channel 7 | inputs: 8 | filePath: $(Build.SourcesDirectory)/eng/common/post-build/add-build-to-channel.ps1 9 | arguments: -BuildId $(BARBuildId) 10 | -ChannelId ${{ parameters.ChannelId }} 11 | -MaestroApiAccessToken $(MaestroApiAccessToken) 12 | -MaestroApiEndPoint $(MaestroApiEndPoint) 13 | -MaestroApiVersion $(MaestroApiVersion) 14 | -------------------------------------------------------------------------------- /.config/1espt/PipelineAutobaseliningConfig.yml: -------------------------------------------------------------------------------- 1 | ## DO NOT MODIFY THIS FILE MANUALLY. This is part of auto-baselining from 1ES Pipeline Templates. Go to [https://aka.ms/1espt-autobaselining] for more details. 2 | 3 | pipelines: 4 | 51729: 5 | retail: 6 | source: 7 | credscan: 8 | lastModifiedDate: 2024-12-12 9 | eslint: 10 | lastModifiedDate: 2024-12-12 11 | psscriptanalyzer: 12 | lastModifiedDate: 2024-12-12 13 | armory: 14 | lastModifiedDate: 2024-12-12 15 | -------------------------------------------------------------------------------- /eng/common/templates/post-build/trigger-subscription.yml: -------------------------------------------------------------------------------- 1 | parameters: 2 | ChannelId: 0 3 | 4 | steps: 5 | - task: PowerShell@2 6 | displayName: Triggering subscriptions 7 | inputs: 8 | filePath: $(Build.SourcesDirectory)/eng/common/post-build/trigger-subscriptions.ps1 9 | arguments: -SourceRepo $(Build.Repository.Uri) 10 | -ChannelId ${{ parameters.ChannelId }} 11 | -MaestroApiAccessToken $(MaestroAccessToken) 12 | -MaestroApiEndPoint $(MaestroApiEndPoint) 13 | -MaestroApiVersion $(MaestroApiVersion) 14 | -------------------------------------------------------------------------------- /src/csharp/Microsoft.Spark/Interop/Ipc/JvmBridgeFactory.cs: -------------------------------------------------------------------------------- 1 | // Licensed to the .NET Foundation under one or more agreements. 2 | // The .NET Foundation licenses this file to you under the MIT license. 3 | // See the LICENSE file in the project root for more information. 4 | 5 | namespace Microsoft.Spark.Interop.Ipc 6 | { 7 | internal class JvmBridgeFactory : IJvmBridgeFactory 8 | { 9 | public IJvmBridge Create(int portNumber) 10 | { 11 | return new JvmBridge(portNumber); 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-2-4/src/main/scala/org/apache/spark/api/dotnet/DotnetException.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.api.dotnet 8 | 9 | class DotnetException(message: String, cause: Throwable) 10 | extends Exception(message, cause) { 11 | 12 | def this(message: String) = this(message, null) 13 | } 14 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-0/src/main/scala/org/apache/spark/api/dotnet/DotnetException.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.api.dotnet 8 | 9 | class DotnetException(message: String, cause: Throwable) 10 | extends Exception(message, cause) { 11 | 12 | def this(message: String) = this(message, null) 13 | } 14 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-1/src/main/scala/org/apache/spark/api/dotnet/DotnetException.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.api.dotnet 8 | 9 | class DotnetException(message: String, cause: Throwable) 10 | extends Exception(message, cause) { 11 | 12 | def this(message: String) = this(message, null) 13 | } 14 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-2/src/main/scala/org/apache/spark/api/dotnet/DotnetException.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.api.dotnet 8 | 9 | class DotnetException(message: String, cause: Throwable) 10 | extends Exception(message, cause) { 11 | 12 | def this(message: String) = this(message, null) 13 | } 14 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-3/src/main/scala/org/apache/spark/api/dotnet/DotnetException.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.api.dotnet 8 | 9 | class DotnetException(message: String, cause: Throwable) 10 | extends Exception(message, cause) { 11 | 12 | def this(message: String) = this(message, null) 13 | } 14 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-4/src/main/scala/org/apache/spark/api/dotnet/DotnetException.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.api.dotnet 8 | 9 | class DotnetException(message: String, cause: Throwable) 10 | extends Exception(message, cause) { 11 | 12 | def this(message: String) = this(message, null) 13 | } 14 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-5/src/main/scala/org/apache/spark/api/dotnet/DotnetException.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.api.dotnet 8 | 9 | class DotnetException(message: String, cause: Throwable) 10 | extends Exception(message, cause) { 11 | 12 | def this(message: String) = this(message, null) 13 | } 14 | -------------------------------------------------------------------------------- /eng/common/cross/armel/tizen/tizen.patch: -------------------------------------------------------------------------------- 1 | diff -u -r a/usr/lib/libc.so b/usr/lib/libc.so 2 | --- a/usr/lib/libc.so 2016-12-30 23:00:08.284951863 +0900 3 | +++ b/usr/lib/libc.so 2016-12-30 23:00:32.140951815 +0900 4 | @@ -2,4 +2,4 @@ 5 | Use the shared library, but some functions are only in 6 | the static library, so try that secondarily. */ 7 | OUTPUT_FORMAT(elf32-littlearm) 8 | -GROUP ( /lib/libc.so.6 /usr/lib/libc_nonshared.a AS_NEEDED ( /lib/ld-linux.so.3 ) ) 9 | +GROUP ( libc.so.6 libc_nonshared.a AS_NEEDED ( ld-linux.so.3 ) ) 10 | -------------------------------------------------------------------------------- /eng/common/PSScriptAnalyzerSettings.psd1: -------------------------------------------------------------------------------- 1 | @{ 2 | IncludeRules=@('PSAvoidUsingCmdletAliases', 3 | 'PSAvoidUsingWMICmdlet', 4 | 'PSAvoidUsingPositionalParameters', 5 | 'PSAvoidUsingInvokeExpression', 6 | 'PSUseDeclaredVarsMoreThanAssignments', 7 | 'PSUseCmdletCorrectly', 8 | 'PSStandardDSCFunctionsInResource', 9 | 'PSUseIdenticalMandatoryParametersForDSC', 10 | 'PSUseIdenticalParametersForDSC') 11 | } -------------------------------------------------------------------------------- /eng/common/templates-official/post-build/trigger-subscription.yml: -------------------------------------------------------------------------------- 1 | parameters: 2 | ChannelId: 0 3 | 4 | steps: 5 | - task: PowerShell@2 6 | displayName: Triggering subscriptions 7 | inputs: 8 | filePath: $(Build.SourcesDirectory)/eng/common/post-build/trigger-subscriptions.ps1 9 | arguments: -SourceRepo $(Build.Repository.Uri) 10 | -ChannelId ${{ parameters.ChannelId }} 11 | -MaestroApiAccessToken $(MaestroAccessToken) 12 | -MaestroApiEndPoint $(MaestroApiEndPoint) 13 | -MaestroApiVersion $(MaestroApiVersion) 14 | -------------------------------------------------------------------------------- /eng/common/cross/arm/tizen/tizen.patch: -------------------------------------------------------------------------------- 1 | diff -u -r a/usr/lib/libc.so b/usr/lib/libc.so 2 | --- a/usr/lib/libc.so 2016-12-30 23:00:08.284951863 +0900 3 | +++ b/usr/lib/libc.so 2016-12-30 23:00:32.140951815 +0900 4 | @@ -2,4 +2,4 @@ 5 | Use the shared library, but some functions are only in 6 | the static library, so try that secondarily. */ 7 | OUTPUT_FORMAT(elf32-littlearm) 8 | -GROUP ( /lib/libc.so.6 /usr/lib/libc_nonshared.a AS_NEEDED ( /lib/ld-linux-armhf.so.3 ) ) 9 | +GROUP ( libc.so.6 libc_nonshared.a AS_NEEDED ( ld-linux-armhf.so.3 ) ) 10 | -------------------------------------------------------------------------------- /benchmark/csharp/Tpch/StringExtensions.cs: -------------------------------------------------------------------------------- 1 | // Licensed to the .NET Foundation under one or more agreements. 2 | // The .NET Foundation licenses this file to you under the MIT license. 3 | // See the LICENSE file in the project root for more information. 4 | 5 | using System.Text.RegularExpressions; 6 | 7 | namespace Tpch 8 | { 9 | internal static class StringExtensions 10 | { 11 | internal static string StripMargin(this string s) 12 | { 13 | return Regex.Replace(s, @"[ \t]+\|", string.Empty); 14 | } 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /eng/common/cross/arm64/sources.list.buster: -------------------------------------------------------------------------------- 1 | deb http://deb.debian.org/debian buster main 2 | deb-src http://deb.debian.org/debian buster main 3 | 4 | deb http://deb.debian.org/debian-security/ buster/updates main 5 | deb-src http://deb.debian.org/debian-security/ buster/updates main 6 | 7 | deb http://deb.debian.org/debian buster-updates main 8 | deb-src http://deb.debian.org/debian buster-updates main 9 | 10 | deb http://deb.debian.org/debian buster-backports main contrib non-free 11 | deb-src http://deb.debian.org/debian buster-backports main contrib non-free 12 | -------------------------------------------------------------------------------- /src/csharp/Microsoft.Spark/ML/Util/Identifiable.cs: -------------------------------------------------------------------------------- 1 | // Licensed to the .NET Foundation under one or more agreements. 2 | // The .NET Foundation licenses this file to you under the MIT license. 3 | // See the LICENSE file in the project root for more information. 4 | 5 | namespace Microsoft.Spark.ML.Feature 6 | { 7 | public interface Identifiable 8 | { 9 | /// 10 | /// The UID of the object. 11 | /// 12 | /// string UID identifying the object 13 | string Uid(); 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /eng/common/cross/arm64/tizen/tizen.patch: -------------------------------------------------------------------------------- 1 | diff -u -r a/usr/lib/libc.so b/usr/lib/libc.so 2 | --- a/usr/lib64/libc.so 2016-12-30 23:00:08.284951863 +0900 3 | +++ b/usr/lib64/libc.so 2016-12-30 23:00:32.140951815 +0900 4 | @@ -2,4 +2,4 @@ 5 | Use the shared library, but some functions are only in 6 | the static library, so try that secondarily. */ 7 | OUTPUT_FORMAT(elf64-littleaarch64) 8 | -GROUP ( /lib64/libc.so.6 /usr/lib64/libc_nonshared.a AS_NEEDED ( /lib/ld-linux-aarch64.so.1 ) ) 9 | +GROUP ( libc.so.6 libc_nonshared.a AS_NEEDED ( ld-linux-aarch64.so.1 ) ) 10 | -------------------------------------------------------------------------------- /eng/common/cross/arm64/sources.list.stretch: -------------------------------------------------------------------------------- 1 | deb http://deb.debian.org/debian stretch main 2 | deb-src http://deb.debian.org/debian stretch main 3 | 4 | deb http://deb.debian.org/debian-security/ stretch/updates main 5 | deb-src http://deb.debian.org/debian-security/ stretch/updates main 6 | 7 | deb http://deb.debian.org/debian stretch-updates main 8 | deb-src http://deb.debian.org/debian stretch-updates main 9 | 10 | deb http://deb.debian.org/debian stretch-backports main contrib non-free 11 | deb-src http://deb.debian.org/debian stretch-backports main contrib non-free 12 | 13 | -------------------------------------------------------------------------------- /src/csharp/Extensions/Microsoft.Spark.Extensions.Delta.E2ETest/Constants.cs: -------------------------------------------------------------------------------- 1 | // Licensed to the .NET Foundation under one or more agreements. 2 | // The .NET Foundation licenses this file to you under the MIT license. 3 | // See the LICENSE file in the project root for more information. 4 | 5 | namespace Microsoft.Spark.Extensions.Delta.E2ETest 6 | { 7 | /// 8 | /// Constants related to the Delta test suite. 9 | /// 10 | internal class Constants 11 | { 12 | public const string DeltaTestContainerName = "Delta Tests"; 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /src/csharp/Microsoft.Spark/Interop/Ipc/ICallbackHandler.cs: -------------------------------------------------------------------------------- 1 | // Licensed to the .NET Foundation under one or more agreements. 2 | // The .NET Foundation licenses this file to you under the MIT license. 3 | // See the LICENSE file in the project root for more information. 4 | 5 | using System.IO; 6 | 7 | namespace Microsoft.Spark.Interop.Ipc 8 | { 9 | /// 10 | /// Interface for handling callbacks between the JVM and Dotnet. 11 | /// 12 | internal interface ICallbackHandler 13 | { 14 | void Run(Stream inputStream); 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /src/csharp/Extensions/Microsoft.Spark.Extensions.Delta.E2ETest/Microsoft.Spark.Extensions.Delta.E2ETest.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | net8.0 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /src/csharp/Extensions/Microsoft.Spark.Extensions.Hyperspace/HyperspaceVersions.cs: -------------------------------------------------------------------------------- 1 | // Licensed to the .NET Foundation under one or more agreements. 2 | // The .NET Foundation licenses this file to you under the MIT license. 3 | // See the LICENSE file in the project root for more information. 4 | 5 | namespace Microsoft.Spark.Extensions.Hyperspace 6 | { 7 | internal static class HyperspaceVersions 8 | { 9 | internal const string V0_0_1 = "0.0.1"; 10 | internal const string V0_0_3 = "0.0.3"; 11 | internal const string V0_0_4 = "0.0.4"; 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /benchmark/run_scala_benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | COLD_ITERATION=$1 4 | NUM_EXECUTORS=$2 5 | DRIVER_MEMORY=$3 6 | EXECUTOR_MEMORY=$4 7 | EXECUTOR_CORES=$5 8 | JAR_PATH=$6 9 | DATA_PATH=$7 10 | NUM_ITERATION=$8 11 | IS_SQL=$9 12 | 13 | for i in {1..22} 14 | do 15 | for j in $(seq 1 $COLD_ITERATION) 16 | do 17 | $SPARK_HOME/bin/spark-submit --master yarn --num-executors $NUM_EXECUTORS --driver-memory $DRIVER_MEMORY --executor-memory $EXECUTOR_MEMORY --executor-cores $EXECUTOR_CORES --class com.microsoft.tpch.App $JAR_PATH $DATA_PATH $i $NUM_ITERATION $IS_SQL 18 | done 19 | done 20 | -------------------------------------------------------------------------------- /src/csharp/Extensions/Microsoft.Spark.Extensions.Hyperspace.E2ETest/Constants.cs: -------------------------------------------------------------------------------- 1 | // Licensed to the .NET Foundation under one or more agreements. 2 | // The .NET Foundation licenses this file to you under the MIT license. 3 | // See the LICENSE file in the project root for more information. 4 | 5 | namespace Microsoft.Spark.Extensions.Hyperspace.E2ETest 6 | { 7 | /// 8 | /// Constants related to the Hyperspace test suite. 9 | /// 10 | internal class Constants 11 | { 12 | public const string HyperspaceTestContainerName = "Hyperspace Tests"; 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /src/csharp/Extensions/Microsoft.Spark.Extensions.Hyperspace.E2ETest/Microsoft.Spark.Extensions.Hyperspace.E2ETest.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | net8.0 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /eng/common/cibuild.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | source="${BASH_SOURCE[0]}" 4 | 5 | # resolve $SOURCE until the file is no longer a symlink 6 | while [[ -h $source ]]; do 7 | scriptroot="$( cd -P "$( dirname "$source" )" && pwd )" 8 | source="$(readlink "$source")" 9 | 10 | # if $source was a relative symlink, we need to resolve it relative to the path where 11 | # the symlink file was located 12 | [[ $source != /* ]] && source="$scriptroot/$source" 13 | done 14 | scriptroot="$( cd -P "$( dirname "$source" )" && pwd )" 15 | 16 | . "$scriptroot/build.sh" --restore --build --test --pack --publish --ci $@ -------------------------------------------------------------------------------- /eng/common/templates/steps/build-reason.yml: -------------------------------------------------------------------------------- 1 | # build-reason.yml 2 | # Description: runs steps if build.reason condition is valid. conditions is a string of valid build reasons 3 | # to include steps (',' separated). 4 | parameters: 5 | conditions: '' 6 | steps: [] 7 | 8 | steps: 9 | - ${{ if and( not(startsWith(parameters.conditions, 'not')), contains(parameters.conditions, variables['build.reason'])) }}: 10 | - ${{ parameters.steps }} 11 | - ${{ if and( startsWith(parameters.conditions, 'not'), not(contains(parameters.conditions, variables['build.reason']))) }}: 12 | - ${{ parameters.steps }} 13 | -------------------------------------------------------------------------------- /eng/Publishing.props: -------------------------------------------------------------------------------- 1 | 2 | 3 | PackageWorker;$(PublishDependsOnTargets) 4 | 5 | 6 | 7 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /eng/common/templates-official/steps/build-reason.yml: -------------------------------------------------------------------------------- 1 | # build-reason.yml 2 | # Description: runs steps if build.reason condition is valid. conditions is a string of valid build reasons 3 | # to include steps (',' separated). 4 | parameters: 5 | conditions: '' 6 | steps: [] 7 | 8 | steps: 9 | - ${{ if and( not(startsWith(parameters.conditions, 'not')), contains(parameters.conditions, variables['build.reason'])) }}: 10 | - ${{ parameters.steps }} 11 | - ${{ if and( startsWith(parameters.conditions, 'not'), not(contains(parameters.conditions, variables['build.reason']))) }}: 12 | - ${{ parameters.steps }} 13 | -------------------------------------------------------------------------------- /benchmark/run_python_benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | COLD_ITERATION=$1 4 | NUM_EXECUTORS=$2 5 | DRIVER_MEMORY=$3 6 | EXECUTOR_MEMORY=$4 7 | EXECUTOR_CORES=$5 8 | PYTHON_SCRIPT=$6 9 | DATA_PATH=$7 10 | NUM_ITERATION=$8 11 | IS_SQL=$9 12 | 13 | for i in {1..22} #1a 8a ----> Remove the comment to run Arrow based queries 14 | do 15 | for j in $(seq 1 $COLD_ITERATION) 16 | do 17 | $SPARK_HOME/bin/spark-submit --master yarn --num-executors $NUM_EXECUTORS --driver-memory $DRIVER_MEMORY --executor-memory $EXECUTOR_MEMORY --executor-cores $EXECUTOR_CORES $PYTHON_SCRIPT $DATA_PATH $i $NUM_ITERATION $IS_SQL 18 | done 19 | done 20 | -------------------------------------------------------------------------------- /eng/common/templates/steps/component-governance.yml: -------------------------------------------------------------------------------- 1 | parameters: 2 | disableComponentGovernance: false 3 | componentGovernanceIgnoreDirectories: '' 4 | 5 | steps: 6 | - ${{ if eq(parameters.disableComponentGovernance, 'true') }}: 7 | - script: echo "##vso[task.setvariable variable=skipComponentGovernanceDetection]true" 8 | displayName: Set skipComponentGovernanceDetection variable 9 | - ${{ if ne(parameters.disableComponentGovernance, 'true') }}: 10 | - task: ComponentGovernanceComponentDetection@0 11 | continueOnError: true 12 | inputs: 13 | ignoreDirectories: ${{ parameters.componentGovernanceIgnoreDirectories }} -------------------------------------------------------------------------------- /eng/common/templates-official/steps/component-governance.yml: -------------------------------------------------------------------------------- 1 | parameters: 2 | disableComponentGovernance: false 3 | componentGovernanceIgnoreDirectories: '' 4 | 5 | steps: 6 | - ${{ if eq(parameters.disableComponentGovernance, 'true') }}: 7 | - script: echo "##vso[task.setvariable variable=skipComponentGovernanceDetection]true" 8 | displayName: Set skipComponentGovernanceDetection variable 9 | - ${{ if ne(parameters.disableComponentGovernance, 'true') }}: 10 | - task: ComponentGovernanceComponentDetection@0 11 | continueOnError: true 12 | inputs: 13 | ignoreDirectories: ${{ parameters.componentGovernanceIgnoreDirectories }} -------------------------------------------------------------------------------- /src/csharp/Microsoft.Spark/JvmException.cs: -------------------------------------------------------------------------------- 1 | // Licensed to the .NET Foundation under one or more agreements. 2 | // The .NET Foundation licenses this file to you under the MIT license. 3 | // See the LICENSE file in the project root for more information. 4 | 5 | using System; 6 | 7 | namespace Microsoft.Spark 8 | { 9 | /// 10 | /// Contains the message returned from the on an error. 11 | /// 12 | public class JvmException : Exception 13 | { 14 | public JvmException(string message) 15 | : base(message) 16 | { 17 | } 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /NuGet.config: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /src/csharp/Microsoft.Spark/Utils/CollectionUtils.cs: -------------------------------------------------------------------------------- 1 | // Licensed to the .NET Foundation under one or more agreements. 2 | // The .NET Foundation licenses this file to you under the MIT license. 3 | // See the LICENSE file in the project root for more information. 4 | 5 | using System; 6 | using System.Linq; 7 | 8 | namespace Microsoft.Spark.Utils 9 | { 10 | internal static class CollectionUtils 11 | { 12 | internal static bool ArrayEquals(T[] array1, T[] array2) 13 | { 14 | return (array1?.Length == array2?.Length) && 15 | ((array1 == null) || array1.SequenceEqual(array2)); 16 | } 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /eng/common/enable-cross-org-publishing.ps1: -------------------------------------------------------------------------------- 1 | param( 2 | [string] $token 3 | ) 4 | 5 | 6 | . $PSScriptRoot\pipeline-logging-functions.ps1 7 | 8 | # Write-PipelineSetVariable will no-op if a variable named $ci is not defined 9 | # Since this script is only ever called in AzDO builds, just universally set it 10 | $ci = $true 11 | 12 | Write-PipelineSetVariable -Name 'VSS_NUGET_ACCESSTOKEN' -Value $token -IsMultiJobVariable $false 13 | Write-PipelineSetVariable -Name 'VSS_NUGET_URI_PREFIXES' -Value 'https://dnceng.pkgs.visualstudio.com/;https://pkgs.dev.azure.com/dnceng/;https://devdiv.pkgs.visualstudio.com/;https://pkgs.dev.azure.com/devdiv/' -IsMultiJobVariable $false 14 | -------------------------------------------------------------------------------- /src/csharp/Microsoft.Spark.Worker/Utils/DateTimeExtension.cs: -------------------------------------------------------------------------------- 1 | // Licensed to the .NET Foundation under one or more agreements. 2 | // The .NET Foundation licenses this file to you under the MIT license. 3 | // See the LICENSE file in the project root for more information. 4 | 5 | using System; 6 | 7 | namespace Microsoft.Spark.Worker.Utils 8 | { 9 | internal static class DateTimeExtension 10 | { 11 | internal static long ToUnixTime(this DateTime dt) 12 | { 13 | var unixTimeEpoch = new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc); 14 | return (long)(dt - unixTimeEpoch).TotalMilliseconds; 15 | } 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-2-4/src/test/scala/org/apache/spark/api/dotnet/Extensions.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.api.dotnet 8 | 9 | import java.io.DataInputStream 10 | 11 | private[dotnet] object Extensions { 12 | implicit class DataInputStreamExt(stream: DataInputStream) { 13 | def readNBytes(n: Int): Array[Byte] = { 14 | val buf = new Array[Byte](n) 15 | stream.readFully(buf) 16 | buf 17 | } 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /eng/Signing.props: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /eng/common/sdl/NuGet.config: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-0/src/test/scala/org/apache/spark/api/dotnet/Extensions.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | 8 | package org.apache.spark.api.dotnet 9 | 10 | import java.io.DataInputStream 11 | 12 | private[dotnet] object Extensions { 13 | implicit class DataInputStreamExt(stream: DataInputStream) { 14 | def readNBytes(n: Int): Array[Byte] = { 15 | val buf = new Array[Byte](n) 16 | stream.readFully(buf) 17 | buf 18 | } 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-1/src/test/scala/org/apache/spark/api/dotnet/Extensions.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | 8 | package org.apache.spark.api.dotnet 9 | 10 | import java.io.DataInputStream 11 | 12 | private[dotnet] object Extensions { 13 | implicit class DataInputStreamExt(stream: DataInputStream) { 14 | def readNBytes(n: Int): Array[Byte] = { 15 | val buf = new Array[Byte](n) 16 | stream.readFully(buf) 17 | buf 18 | } 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-2/src/test/scala/org/apache/spark/api/dotnet/Extensions.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | 8 | package org.apache.spark.api.dotnet 9 | 10 | import java.io.DataInputStream 11 | 12 | private[dotnet] object Extensions { 13 | implicit class DataInputStreamExt(stream: DataInputStream) { 14 | def readNBytes(n: Int): Array[Byte] = { 15 | val buf = new Array[Byte](n) 16 | stream.readFully(buf) 17 | buf 18 | } 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-3/src/test/scala/org/apache/spark/api/dotnet/Extensions.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | 8 | package org.apache.spark.api.dotnet 9 | 10 | import java.io.DataInputStream 11 | 12 | private[dotnet] object Extensions { 13 | implicit class DataInputStreamExt(stream: DataInputStream) { 14 | def readNBytes(n: Int): Array[Byte] = { 15 | val buf = new Array[Byte](n) 16 | stream.readFully(buf) 17 | buf 18 | } 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-4/src/test/scala/org/apache/spark/api/dotnet/Extensions.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | 8 | package org.apache.spark.api.dotnet 9 | 10 | import java.io.DataInputStream 11 | 12 | private[dotnet] object Extensions { 13 | implicit class DataInputStreamExt(stream: DataInputStream) { 14 | def readNBytes(n: Int): Array[Byte] = { 15 | val buf = new Array[Byte](n) 16 | stream.readFully(buf) 17 | buf 18 | } 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-5/src/test/scala/org/apache/spark/api/dotnet/Extensions.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | 8 | package org.apache.spark.api.dotnet 9 | 10 | import java.io.DataInputStream 11 | 12 | private[dotnet] object Extensions { 13 | implicit class DataInputStreamExt(stream: DataInputStream) { 14 | def readNBytes(n: Int): Array[Byte] = { 15 | val buf = new Array[Byte](n) 16 | stream.readFully(buf) 17 | buf 18 | } 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/csharp/Microsoft.Spark.E2ETest/IpcTests/SparkFilesTests.cs: -------------------------------------------------------------------------------- 1 | // Licensed to the .NET Foundation under one or more agreements. 2 | // The .NET Foundation licenses this file to you under the MIT license. 3 | // See the LICENSE file in the project root for more information. 4 | 5 | using Xunit; 6 | 7 | namespace Microsoft.Spark.E2ETest.IpcTests 8 | { 9 | [Collection("Spark E2E Tests")] 10 | public class SparkFilesTests 11 | { 12 | [Fact] 13 | public void TestSparkFiles() 14 | { 15 | Assert.IsType(SparkFiles.Get("people.json")); 16 | Assert.IsType(SparkFiles.GetRootDirectory()); 17 | } 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | We are excited to review your PR. 2 | 3 | So we can do the best job, please check: 4 | 5 | - [ ] There's a descriptive title that will make sense to other developers some time from now. 6 | - [ ] There's associated issues. All PR's should have issue(s) associated - unless a trivial self-evident change such as fixing a typo. You can use the format `Fixes #nnnn` in your description to cause GitHub to automatically close the issue(s) when your PR is merged. 7 | - [ ] Your change description explains what the change does, why you chose your approach, and anything else that reviewers should know. 8 | - [ ] You have included any necessary tests in the same PR. 9 | 10 | -------------------------------------------------------------------------------- /src/csharp/Microsoft.Spark/Sql/Streaming/StreamingQueryException.cs: -------------------------------------------------------------------------------- 1 | // Licensed to the .NET Foundation under one or more agreements. 2 | // The .NET Foundation licenses this file to you under the MIT license. 3 | // See the LICENSE file in the project root for more information. 4 | 5 | using Microsoft.Spark.Interop.Ipc; 6 | 7 | namespace Microsoft.Spark.Sql.Streaming 8 | { 9 | /// 10 | /// Exception that stopped a . 11 | /// 12 | public class StreamingQueryException : JvmException 13 | { 14 | public StreamingQueryException(string message) 15 | : base(message) 16 | { 17 | } 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/ResolvedNugetPackage.cs: -------------------------------------------------------------------------------- 1 | // Licensed to the .NET Foundation under one or more agreements. 2 | // The .NET Foundation licenses this file to you under the MIT license. 3 | // See the LICENSE file in the project root for more information. 4 | 5 | using System.IO; 6 | using Microsoft.DotNet.Interactive; 7 | 8 | namespace Microsoft.Spark.Extensions.DotNet.Interactive 9 | { 10 | internal class ResolvedNuGetPackage 11 | { 12 | public ResolvedPackageReference ResolvedPackage { get; set; } 13 | public DirectoryInfo PackageRootDirectory { get; set; } 14 | public FileInfo NuGetFile { get; set; } 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /eng/common/msbuild.ps1: -------------------------------------------------------------------------------- 1 | [CmdletBinding(PositionalBinding=$false)] 2 | Param( 3 | [string] $verbosity = 'minimal', 4 | [bool] $warnAsError = $true, 5 | [bool] $nodeReuse = $true, 6 | [switch] $ci, 7 | [switch] $prepareMachine, 8 | [switch] $excludePrereleaseVS, 9 | [string] $msbuildEngine = $null, 10 | [Parameter(ValueFromRemainingArguments=$true)][String[]]$extraArgs 11 | ) 12 | 13 | . $PSScriptRoot\tools.ps1 14 | 15 | try { 16 | if ($ci) { 17 | $nodeReuse = $false 18 | } 19 | 20 | MSBuild @extraArgs 21 | } 22 | catch { 23 | Write-Host $_.ScriptStackTrace 24 | Write-PipelineTelemetryError -Category 'Build' -Message $_ 25 | ExitWithExitCode 1 26 | } 27 | 28 | ExitWithExitCode 0 -------------------------------------------------------------------------------- /eng/common/cross/arm/sources.list.focal: -------------------------------------------------------------------------------- 1 | deb http://ports.ubuntu.com/ubuntu-ports/ focal main restricted universe 2 | deb-src http://ports.ubuntu.com/ubuntu-ports/ focal main restricted universe 3 | 4 | deb http://ports.ubuntu.com/ubuntu-ports/ focal-updates main restricted universe 5 | deb-src http://ports.ubuntu.com/ubuntu-ports/ focal-updates main restricted universe 6 | 7 | deb http://ports.ubuntu.com/ubuntu-ports/ focal-backports main restricted 8 | deb-src http://ports.ubuntu.com/ubuntu-ports/ focal-backports main restricted 9 | 10 | deb http://ports.ubuntu.com/ubuntu-ports/ focal-security main restricted universe multiverse 11 | deb-src http://ports.ubuntu.com/ubuntu-ports/ focal-security main restricted universe multiverse 12 | -------------------------------------------------------------------------------- /eng/common/cross/arm/sources.list.jammy: -------------------------------------------------------------------------------- 1 | deb http://ports.ubuntu.com/ubuntu-ports/ jammy main restricted universe 2 | deb-src http://ports.ubuntu.com/ubuntu-ports/ jammy main restricted universe 3 | 4 | deb http://ports.ubuntu.com/ubuntu-ports/ jammy-updates main restricted universe 5 | deb-src http://ports.ubuntu.com/ubuntu-ports/ jammy-updates main restricted universe 6 | 7 | deb http://ports.ubuntu.com/ubuntu-ports/ jammy-backports main restricted 8 | deb-src http://ports.ubuntu.com/ubuntu-ports/ jammy-backports main restricted 9 | 10 | deb http://ports.ubuntu.com/ubuntu-ports/ jammy-security main restricted universe multiverse 11 | deb-src http://ports.ubuntu.com/ubuntu-ports/ jammy-security main restricted universe multiverse 12 | -------------------------------------------------------------------------------- /eng/common/cross/arm/sources.list.zesty: -------------------------------------------------------------------------------- 1 | deb http://ports.ubuntu.com/ubuntu-ports/ zesty main restricted universe 2 | deb-src http://ports.ubuntu.com/ubuntu-ports/ zesty main restricted universe 3 | 4 | deb http://ports.ubuntu.com/ubuntu-ports/ zesty-updates main restricted universe 5 | deb-src http://ports.ubuntu.com/ubuntu-ports/ zesty-updates main restricted universe 6 | 7 | deb http://ports.ubuntu.com/ubuntu-ports/ zesty-backports main restricted 8 | deb-src http://ports.ubuntu.com/ubuntu-ports/ zesty-backports main restricted 9 | 10 | deb http://ports.ubuntu.com/ubuntu-ports/ zesty-security main restricted universe multiverse 11 | deb-src http://ports.ubuntu.com/ubuntu-ports/ zesty-security main restricted universe multiverse 12 | -------------------------------------------------------------------------------- /eng/common/cross/arm64/sources.list.focal: -------------------------------------------------------------------------------- 1 | deb http://ports.ubuntu.com/ubuntu-ports/ focal main restricted universe 2 | deb-src http://ports.ubuntu.com/ubuntu-ports/ focal main restricted universe 3 | 4 | deb http://ports.ubuntu.com/ubuntu-ports/ focal-updates main restricted universe 5 | deb-src http://ports.ubuntu.com/ubuntu-ports/ focal-updates main restricted universe 6 | 7 | deb http://ports.ubuntu.com/ubuntu-ports/ focal-backports main restricted 8 | deb-src http://ports.ubuntu.com/ubuntu-ports/ focal-backports main restricted 9 | 10 | deb http://ports.ubuntu.com/ubuntu-ports/ focal-security main restricted universe multiverse 11 | deb-src http://ports.ubuntu.com/ubuntu-ports/ focal-security main restricted universe multiverse 12 | -------------------------------------------------------------------------------- /eng/common/cross/arm64/sources.list.jammy: -------------------------------------------------------------------------------- 1 | deb http://ports.ubuntu.com/ubuntu-ports/ jammy main restricted universe 2 | deb-src http://ports.ubuntu.com/ubuntu-ports/ jammy main restricted universe 3 | 4 | deb http://ports.ubuntu.com/ubuntu-ports/ jammy-updates main restricted universe 5 | deb-src http://ports.ubuntu.com/ubuntu-ports/ jammy-updates main restricted universe 6 | 7 | deb http://ports.ubuntu.com/ubuntu-ports/ jammy-backports main restricted 8 | deb-src http://ports.ubuntu.com/ubuntu-ports/ jammy-backports main restricted 9 | 10 | deb http://ports.ubuntu.com/ubuntu-ports/ jammy-security main restricted universe multiverse 11 | deb-src http://ports.ubuntu.com/ubuntu-ports/ jammy-security main restricted universe multiverse 12 | -------------------------------------------------------------------------------- /eng/common/cross/arm64/sources.list.zesty: -------------------------------------------------------------------------------- 1 | deb http://ports.ubuntu.com/ubuntu-ports/ zesty main restricted universe 2 | deb-src http://ports.ubuntu.com/ubuntu-ports/ zesty main restricted universe 3 | 4 | deb http://ports.ubuntu.com/ubuntu-ports/ zesty-updates main restricted universe 5 | deb-src http://ports.ubuntu.com/ubuntu-ports/ zesty-updates main restricted universe 6 | 7 | deb http://ports.ubuntu.com/ubuntu-ports/ zesty-backports main restricted 8 | deb-src http://ports.ubuntu.com/ubuntu-ports/ zesty-backports main restricted 9 | 10 | deb http://ports.ubuntu.com/ubuntu-ports/ zesty-security main restricted universe multiverse 11 | deb-src http://ports.ubuntu.com/ubuntu-ports/ zesty-security main restricted universe multiverse 12 | -------------------------------------------------------------------------------- /docs/contributing.md: -------------------------------------------------------------------------------- 1 | Contributing to dotnet/spark 2 | ====================== 3 | This document describes contribution guidelines. 4 | 5 | Coding Style 6 | ------------ 7 | We intend to bring dotnet/spark into full conformance with the following style guidelines: 8 | * [C# Coding Style](coding-guidelines/csharp-coding-style.md) 9 | * [Scala Coding Style](coding-guidelines/scala-coding-style.md) 10 | 11 | Implementing Spark API functions 12 | -------------------------------- 13 | When you implement one of the Spark API functions that is currently not implemented, you should include comments and unit tests, refer to the [Minimum Requirements for Implementing Spark API Functions](coding-guidelines/new-functions.md) mini-guide. 14 | 15 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '[FEATURE REQUEST]: ' 5 | labels: 'enhancement' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /eng/common/cross/arm/sources.list.bionic: -------------------------------------------------------------------------------- 1 | deb http://ports.ubuntu.com/ubuntu-ports/ bionic main restricted universe 2 | deb-src http://ports.ubuntu.com/ubuntu-ports/ bionic main restricted universe 3 | 4 | deb http://ports.ubuntu.com/ubuntu-ports/ bionic-updates main restricted universe 5 | deb-src http://ports.ubuntu.com/ubuntu-ports/ bionic-updates main restricted universe 6 | 7 | deb http://ports.ubuntu.com/ubuntu-ports/ bionic-backports main restricted 8 | deb-src http://ports.ubuntu.com/ubuntu-ports/ bionic-backports main restricted 9 | 10 | deb http://ports.ubuntu.com/ubuntu-ports/ bionic-security main restricted universe multiverse 11 | deb-src http://ports.ubuntu.com/ubuntu-ports/ bionic-security main restricted universe multiverse 12 | -------------------------------------------------------------------------------- /eng/common/cross/arm/sources.list.xenial: -------------------------------------------------------------------------------- 1 | deb http://ports.ubuntu.com/ubuntu-ports/ xenial main restricted universe 2 | deb-src http://ports.ubuntu.com/ubuntu-ports/ xenial main restricted universe 3 | 4 | deb http://ports.ubuntu.com/ubuntu-ports/ xenial-updates main restricted universe 5 | deb-src http://ports.ubuntu.com/ubuntu-ports/ xenial-updates main restricted universe 6 | 7 | deb http://ports.ubuntu.com/ubuntu-ports/ xenial-backports main restricted 8 | deb-src http://ports.ubuntu.com/ubuntu-ports/ xenial-backports main restricted 9 | 10 | deb http://ports.ubuntu.com/ubuntu-ports/ xenial-security main restricted universe multiverse 11 | deb-src http://ports.ubuntu.com/ubuntu-ports/ xenial-security main restricted universe multiverse 12 | -------------------------------------------------------------------------------- /eng/common/cross/arm64/sources.list.bionic: -------------------------------------------------------------------------------- 1 | deb http://ports.ubuntu.com/ubuntu-ports/ bionic main restricted universe 2 | deb-src http://ports.ubuntu.com/ubuntu-ports/ bionic main restricted universe 3 | 4 | deb http://ports.ubuntu.com/ubuntu-ports/ bionic-updates main restricted universe 5 | deb-src http://ports.ubuntu.com/ubuntu-ports/ bionic-updates main restricted universe 6 | 7 | deb http://ports.ubuntu.com/ubuntu-ports/ bionic-backports main restricted 8 | deb-src http://ports.ubuntu.com/ubuntu-ports/ bionic-backports main restricted 9 | 10 | deb http://ports.ubuntu.com/ubuntu-ports/ bionic-security main restricted universe multiverse 11 | deb-src http://ports.ubuntu.com/ubuntu-ports/ bionic-security main restricted universe multiverse 12 | -------------------------------------------------------------------------------- /eng/common/cross/arm64/sources.list.xenial: -------------------------------------------------------------------------------- 1 | deb http://ports.ubuntu.com/ubuntu-ports/ xenial main restricted universe 2 | deb-src http://ports.ubuntu.com/ubuntu-ports/ xenial main restricted universe 3 | 4 | deb http://ports.ubuntu.com/ubuntu-ports/ xenial-updates main restricted universe 5 | deb-src http://ports.ubuntu.com/ubuntu-ports/ xenial-updates main restricted universe 6 | 7 | deb http://ports.ubuntu.com/ubuntu-ports/ xenial-backports main restricted 8 | deb-src http://ports.ubuntu.com/ubuntu-ports/ xenial-backports main restricted 9 | 10 | deb http://ports.ubuntu.com/ubuntu-ports/ xenial-security main restricted universe multiverse 11 | deb-src http://ports.ubuntu.com/ubuntu-ports/ xenial-security main restricted universe multiverse 12 | -------------------------------------------------------------------------------- /eng/common/cross/s390x/sources.list.bionic: -------------------------------------------------------------------------------- 1 | deb http://ports.ubuntu.com/ubuntu-ports/ bionic main restricted universe 2 | deb-src http://ports.ubuntu.com/ubuntu-ports/ bionic main restricted universe 3 | 4 | deb http://ports.ubuntu.com/ubuntu-ports/ bionic-updates main restricted universe 5 | deb-src http://ports.ubuntu.com/ubuntu-ports/ bionic-updates main restricted universe 6 | 7 | deb http://ports.ubuntu.com/ubuntu-ports/ bionic-backports main restricted 8 | deb-src http://ports.ubuntu.com/ubuntu-ports/ bionic-backports main restricted 9 | 10 | deb http://ports.ubuntu.com/ubuntu-ports/ bionic-security main restricted universe multiverse 11 | deb-src http://ports.ubuntu.com/ubuntu-ports/ bionic-security main restricted universe multiverse 12 | -------------------------------------------------------------------------------- /eng/common/cross/ppc64le/sources.list.bionic: -------------------------------------------------------------------------------- 1 | deb http://ports.ubuntu.com/ubuntu-ports/ bionic main restricted universe 2 | deb-src http://ports.ubuntu.com/ubuntu-ports/ bionic main restricted universe 3 | 4 | deb http://ports.ubuntu.com/ubuntu-ports/ bionic-updates main restricted universe 5 | deb-src http://ports.ubuntu.com/ubuntu-ports/ bionic-updates main restricted universe 6 | 7 | deb http://ports.ubuntu.com/ubuntu-ports/ bionic-backports main restricted 8 | deb-src http://ports.ubuntu.com/ubuntu-ports/ bionic-backports main restricted 9 | 10 | deb http://ports.ubuntu.com/ubuntu-ports/ bionic-security main restricted universe multiverse 11 | deb-src http://ports.ubuntu.com/ubuntu-ports/ bionic-security main restricted universe multiverse 12 | -------------------------------------------------------------------------------- /examples/Microsoft.Spark.CSharp.Examples/Microsoft.Spark.CSharp.Examples.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Exe 5 | net48;net8.0 6 | net8.0 7 | Microsoft.Spark.Examples 8 | Microsoft.Spark.CSharp.Examples 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /eng/common/templates/post-build/common-variables.yml: -------------------------------------------------------------------------------- 1 | variables: 2 | - group: Publish-Build-Assets 3 | 4 | # Whether the build is internal or not 5 | - name: IsInternalBuild 6 | value: ${{ and(ne(variables['System.TeamProject'], 'public'), contains(variables['Build.SourceBranch'], 'internal')) }} 7 | 8 | # Default Maestro++ API Endpoint and API Version 9 | - name: MaestroApiEndPoint 10 | value: "https://maestro.dot.net" 11 | - name: MaestroApiAccessToken 12 | value: $(MaestroAccessToken) 13 | - name: MaestroApiVersion 14 | value: "2020-02-20" 15 | 16 | - name: SourceLinkCLIVersion 17 | value: 3.0.0 18 | - name: SymbolToolVersion 19 | value: 1.0.1 20 | 21 | - name: runCodesignValidationInjection 22 | value: false 23 | -------------------------------------------------------------------------------- /examples/Microsoft.Spark.CSharp.Examples/Sql/README.md: -------------------------------------------------------------------------------- 1 | # .NET for Apache Spark C# Samples: SQL 2 | 3 | [.NET for Apache Spark](https://dot.net/spark) is a free, open-source, and cross-platform big data analytics framework. 4 | 5 | In the **Sql** folder, we provide samples focusing on Spark SQL, which allows us to work with structured data. We can store and analyze data using 6 | the `DataFrame` API and SQL queries. 7 | 8 | There are two categories of .NET for Apache Spark Sql samples: 9 | 10 | * **[Batch](Batch):** .NET for Apache Spark apps that analyze batch data, or data that has already been produced/stored. 11 | 12 | * **[Streaming](Streaming):** .NET for Apache Spark apps that analyze structured streaming data, or data that is currently being produced live. 13 | -------------------------------------------------------------------------------- /src/csharp/Microsoft.Spark/Hadoop/Conf/Configuration.cs: -------------------------------------------------------------------------------- 1 | // Licensed to the .NET Foundation under one or more agreements. 2 | // The .NET Foundation licenses this file to you under the MIT license. 3 | // See the LICENSE file in the project root for more information. 4 | 5 | using Microsoft.Spark.Interop.Ipc; 6 | 7 | namespace Microsoft.Spark.Hadoop.Conf 8 | { 9 | /// 10 | /// Provides access to configuration parameters. 11 | /// 12 | public class Configuration : IJvmObjectReferenceProvider 13 | { 14 | internal Configuration(JvmObjectReference jvmObject) 15 | { 16 | Reference = jvmObject; 17 | } 18 | 19 | public JvmObjectReference Reference { get; private set; } 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/csharp/Microsoft.Spark/Versions.cs: -------------------------------------------------------------------------------- 1 | // Licensed to the .NET Foundation under one or more agreements. 2 | // The .NET Foundation licenses this file to you under the MIT license. 3 | // See the LICENSE file in the project root for more information. 4 | 5 | namespace Microsoft.Spark 6 | { 7 | internal static class Versions 8 | { 9 | internal const string V2_4_0 = "2.4.0"; 10 | internal const string V2_4_2 = "2.4.2"; 11 | internal const string V3_0_0 = "3.0.0"; 12 | internal const string V3_1_0 = "3.1.0"; 13 | internal const string V3_1_1 = "3.1.1"; 14 | internal const string V3_2_0 = "3.2.0"; 15 | internal const string V3_3_0 = "3.3.0"; 16 | internal const string V3_5_1 = "3.5.1"; 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /benchmark/run_csharp_benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | COLD_ITERATION=$1 4 | NUM_EXECUTORS=$2 5 | DRIVER_MEMORY=$3 6 | EXECUTOR_MEMORY=$4 7 | EXECUTOR_CORES=$5 8 | CSHARP_DLL=$6 9 | JAR_PATH=$7 10 | CSHARP_EXECUTABLE=$8 11 | DATA_PATH=$9 12 | NUM_ITERATION=${10} 13 | IS_SQL=${11} 14 | 15 | for i in {1..22} #1a 8a 1ha 8ha ----> Remove the comment to run Arrow and hardware acceleration based queries 16 | do 17 | for j in $(seq 1 $COLD_ITERATION) 18 | do 19 | $SPARK_HOME/bin/spark-submit --master yarn --num-executors $NUM_EXECUTORS --driver-memory $DRIVER_MEMORY --executor-memory $EXECUTOR_MEMORY --executor-cores $EXECUTOR_CORES --files $CSHARP_DLL --class org.apache.spark.deploy.dotnet.DotnetRunner $JAR_PATH $CSHARP_EXECUTABLE $DATA_PATH $i $NUM_ITERATION $IS_SQL 20 | done 21 | done 22 | -------------------------------------------------------------------------------- /eng/common/templates-official/post-build/common-variables.yml: -------------------------------------------------------------------------------- 1 | variables: 2 | - group: Publish-Build-Assets 3 | 4 | # Whether the build is internal or not 5 | - name: IsInternalBuild 6 | value: ${{ and(ne(variables['System.TeamProject'], 'public'), contains(variables['Build.SourceBranch'], 'internal')) }} 7 | 8 | # Default Maestro++ API Endpoint and API Version 9 | - name: MaestroApiEndPoint 10 | value: "https://maestro-prod.westus2.cloudapp.azure.com" 11 | - name: MaestroApiAccessToken 12 | value: $(MaestroAccessToken) 13 | - name: MaestroApiVersion 14 | value: "2020-02-20" 15 | 16 | - name: SourceLinkCLIVersion 17 | value: 3.0.0 18 | - name: SymbolToolVersion 19 | value: 1.0.1 20 | 21 | - name: runCodesignValidationInjection 22 | value: false 23 | -------------------------------------------------------------------------------- /src/csharp/Microsoft.Spark.Worker.UnitTest/Microsoft.Spark.Worker.UnitTest.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | net8.0 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /src/csharp/Microsoft.Spark.E2ETest.ExternalLibrary/ExternalClass.cs: -------------------------------------------------------------------------------- 1 | // Licensed to the .NET Foundation under one or more agreements. 2 | // The .NET Foundation licenses this file to you under the MIT license. 3 | // See the LICENSE file in the project root for more information. 4 | 5 | using System; 6 | 7 | namespace Microsoft.Spark.E2ETest.ExternalLibrary 8 | { 9 | [Serializable] 10 | public class ExternalClass 11 | { 12 | private string s; 13 | 14 | public ExternalClass(string s) 15 | { 16 | this.s = s; 17 | } 18 | 19 | public static string HelloWorld() 20 | { 21 | return "Hello World"; 22 | } 23 | 24 | public string Concat(string s) 25 | { 26 | return this.s + s; 27 | } 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /eng/PackageWorker.proj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 8 | 10 | 11 | 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | # .NET for Apache Spark Samples 2 | 3 | [.NET for Apache Spark](https://dot.net/spark) is a free, open-source, and cross-platform big data analytics framework. 4 | 5 | In the **examples** folder, we provide samples which will help you get started with .NET for Apache Spark 6 | and demonstrate how to infuse big data analytics into existing and new .NET apps. 7 | 8 | There are two broad categories of .NET for Apache Spark samples: 9 | 10 | * **[Microsoft.Spark.CSharp.Examples](Microsoft.Spark.CSharp.Examples):** Sample C# .NET for Apache Spark apps. 11 | 12 | * **[Microsoft.Spark.FSharp.Examples](Microsoft.Spark.FSharp.Examples):** Sample F# .NET for Apache Spark apps. 13 | 14 | **Note:** The samples in each of these folders fall under additional sub-categories, such as batch, streaming, and machine learning. 15 | -------------------------------------------------------------------------------- /src/csharp/Extensions/Microsoft.Spark.Extensions.Delta/Microsoft.Spark.Extensions.Delta.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | netstandard2.0;netstandard2.1 5 | Microsoft.Spark.Extensions.Delta 6 | true 7 | true 8 | 9 | Delta Extension for .NET for Apache Spark 10 | https://github.com/dotnet/spark/tree/master/docs/release-notes 11 | spark;dotnet;csharp;delta;delta lake 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /src/csharp/Microsoft.Spark.Worker/Utils/SettingUtils.cs: -------------------------------------------------------------------------------- 1 | // Licensed to the .NET Foundation under one or more agreements. 2 | // The .NET Foundation licenses this file to you under the MIT license. 3 | // See the LICENSE file in the project root for more information. 4 | 5 | using System; 6 | using static System.Environment; 7 | 8 | namespace Microsoft.Spark.Worker.Utils 9 | { 10 | /// 11 | /// Provides functionalities to retrieve various settings. 12 | /// 13 | internal static class SettingUtils 14 | { 15 | internal static string GetWorkerFactorySecret() => 16 | GetEnvironmentVariable("PYTHON_WORKER_FACTORY_SECRET"); 17 | 18 | internal static int GetWorkerFactoryPort() => 19 | int.Parse(GetEnvironmentVariable("PYTHON_WORKER_FACTORY_PORT").Trim()); 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/csharp/Microsoft.Spark/Network/SocketFactory.cs: -------------------------------------------------------------------------------- 1 | // Licensed to the .NET Foundation under one or more agreements. 2 | // The .NET Foundation licenses this file to you under the MIT license. 3 | // See the LICENSE file in the project root for more information. 4 | 5 | namespace Microsoft.Spark.Network 6 | { 7 | /// 8 | /// SocketFactory is used to create ISocketWrapper instance. 9 | /// 10 | internal static class SocketFactory 11 | { 12 | /// 13 | /// Creates an ISocket instance based on the socket type set. 14 | /// 15 | /// 16 | /// ISocketWrapper instance. 17 | /// 18 | public static ISocketWrapper CreateSocket() 19 | { 20 | return new DefaultSocketWrapper(); 21 | } 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/csharp/Extensions/README.md: -------------------------------------------------------------------------------- 1 | # .Net for Apache Spark Extensions 2 | 3 | ## Table of Contents 4 | * [NuGet Packages](#nuget-packages) 5 | 6 | ## NuGet Packages 7 | 8 | The following .Net for Apache Spark extensions are available as NuGet packages: 9 | 10 | ### First-Party 11 | 12 | * [Microsoft.Spark.Extensions.Azure.Synapse.Analytics](https://www.nuget.org/packages/Microsoft.Spark.Extensions.Azure.Synapse.Analytics/) 13 | * [Microsoft.Spark.Extensions.Delta](https://www.nuget.org/packages/Microsoft.Spark.Extensions.Delta/) 14 | * [Microsoft.Spark.Extensions.DotNet.Interactive](https://www.nuget.org/packages/Microsoft.Spark.Extensions.DotNet.Interactive/) 15 | * [Microsoft.Spark.Extensions.Hyperspace](https://www.nuget.org/packages/Microsoft.Spark.Extensions.Hyperspace/) 16 | 17 | ### Third-Party 18 | 19 | * Community-created extensions can be added here. -------------------------------------------------------------------------------- /benchmark/python/tpch_base.py: -------------------------------------------------------------------------------- 1 | # Licensed to the .NET Foundation under one or more agreements. 2 | # The .NET Foundation licenses this file to you under the MIT license. 3 | # See the LICENSE file in the project root for more information. 4 | 5 | import pyspark 6 | from pyspark.sql import SparkSession 7 | 8 | 9 | class TpchBase: 10 | def __init__(self, spark, dir): 11 | self.customer = spark.read.parquet(dir + "customer") 12 | self.lineitem = spark.read.parquet(dir + "lineitem") 13 | self.nation = spark.read.parquet(dir + "nation") 14 | self.region = spark.read.parquet(dir + "region") 15 | self.orders = spark.read.parquet(dir + "orders") 16 | self.part = spark.read.parquet(dir + "part") 17 | self.partsupp = spark.read.parquet(dir + "partsupp") 18 | self.supplier = spark.read.parquet(dir + "supplier") 19 | -------------------------------------------------------------------------------- /eng/common/templates/steps/publish-logs.yml: -------------------------------------------------------------------------------- 1 | parameters: 2 | StageLabel: '' 3 | JobLabel: '' 4 | 5 | steps: 6 | - task: Powershell@2 7 | displayName: Prepare Binlogs to Upload 8 | inputs: 9 | targetType: inline 10 | script: | 11 | New-Item -ItemType Directory $(Build.SourcesDirectory)/PostBuildLogs/${{parameters.StageLabel}}/${{parameters.JobLabel}}/ 12 | Move-Item -Path $(Build.SourcesDirectory)/artifacts/log/Debug/* $(Build.SourcesDirectory)/PostBuildLogs/${{parameters.StageLabel}}/${{parameters.JobLabel}}/ 13 | continueOnError: true 14 | condition: always() 15 | 16 | - task: PublishBuildArtifacts@1 17 | displayName: Publish Logs 18 | inputs: 19 | PathtoPublish: '$(Build.SourcesDirectory)/PostBuildLogs' 20 | PublishLocation: Container 21 | ArtifactName: PostBuildLogs 22 | continueOnError: true 23 | condition: always() 24 | -------------------------------------------------------------------------------- /eng/common/generate-sbom-prep.ps1: -------------------------------------------------------------------------------- 1 | Param( 2 | [Parameter(Mandatory=$true)][string] $ManifestDirPath # Manifest directory where sbom will be placed 3 | ) 4 | 5 | . $PSScriptRoot\pipeline-logging-functions.ps1 6 | 7 | Write-Host "Creating dir $ManifestDirPath" 8 | # create directory for sbom manifest to be placed 9 | if (!(Test-Path -path $ManifestDirPath)) 10 | { 11 | New-Item -ItemType Directory -path $ManifestDirPath 12 | Write-Host "Successfully created directory $ManifestDirPath" 13 | } 14 | else{ 15 | Write-PipelineTelemetryError -category 'Build' "Unable to create sbom folder." 16 | } 17 | 18 | Write-Host "Updating artifact name" 19 | $artifact_name = "${env:SYSTEM_STAGENAME}_${env:AGENT_JOBNAME}_SBOM" -replace '["/:<>\\|?@*"() ]', '_' 20 | Write-Host "Artifact name $artifact_name" 21 | Write-Host "##vso[task.setvariable variable=ARTIFACT_NAME]$artifact_name" 22 | -------------------------------------------------------------------------------- /eng/common/templates-official/steps/publish-logs.yml: -------------------------------------------------------------------------------- 1 | parameters: 2 | StageLabel: '' 3 | JobLabel: '' 4 | 5 | steps: 6 | - task: Powershell@2 7 | displayName: Prepare Binlogs to Upload 8 | inputs: 9 | targetType: inline 10 | script: | 11 | New-Item -ItemType Directory $(Build.SourcesDirectory)/PostBuildLogs/${{parameters.StageLabel}}/${{parameters.JobLabel}}/ 12 | Move-Item -Path $(Build.SourcesDirectory)/artifacts/log/Debug/* $(Build.SourcesDirectory)/PostBuildLogs/${{parameters.StageLabel}}/${{parameters.JobLabel}}/ 13 | continueOnError: true 14 | condition: always() 15 | 16 | - task: 1ES.PublishBuildArtifacts@1 17 | displayName: Publish Logs 18 | inputs: 19 | PathtoPublish: '$(Build.SourcesDirectory)/PostBuildLogs' 20 | PublishLocation: Container 21 | ArtifactName: PostBuildLogs 22 | continueOnError: true 23 | condition: always() 24 | -------------------------------------------------------------------------------- /src/csharp/Extensions/Microsoft.Spark.Extensions.Delta/DeltaLakeVersions.cs: -------------------------------------------------------------------------------- 1 | // Licensed to the .NET Foundation under one or more agreements. 2 | // The .NET Foundation licenses this file to you under the MIT license. 3 | // See the LICENSE file in the project root for more information. 4 | 5 | namespace Microsoft.Spark.Extensions.Delta 6 | { 7 | internal static class DeltaLakeVersions 8 | { 9 | internal const string V0_1_0 = "0.1.0"; 10 | internal const string V0_2_0 = "0.2.0"; 11 | internal const string V0_3_0 = "0.3.0"; 12 | internal const string V0_4_0 = "0.4.0"; 13 | internal const string V0_5_0 = "0.5.0"; 14 | internal const string V0_6_0 = "0.6.0"; 15 | internal const string V0_6_1 = "0.6.1"; 16 | internal const string V0_7_0 = "0.7.0"; 17 | internal const string V0_8_0 = "0.8.0"; 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /src/csharp/Microsoft.Spark.Worker/Microsoft.Spark.Worker.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | Exe 4 | net48;net8.0 5 | net8.0 6 | Microsoft.Spark.Worker 7 | true 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /eng/common/helixpublish.proj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | msbuild 5 | 6 | 7 | 8 | 9 | %(Identity) 10 | 11 | 12 | 13 | 14 | 15 | $(WorkItemDirectory) 16 | $(WorkItemCommand) 17 | $(WorkItemTimeout) 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /eng/common/dotnet-install.ps1: -------------------------------------------------------------------------------- 1 | [CmdletBinding(PositionalBinding=$false)] 2 | Param( 3 | [string] $verbosity = 'minimal', 4 | [string] $architecture = '', 5 | [string] $version = 'Latest', 6 | [string] $runtime = 'dotnet', 7 | [string] $RuntimeSourceFeed = '', 8 | [string] $RuntimeSourceFeedKey = '' 9 | ) 10 | 11 | . $PSScriptRoot\tools.ps1 12 | 13 | $dotnetRoot = Join-Path $RepoRoot '.dotnet' 14 | 15 | $installdir = $dotnetRoot 16 | try { 17 | if ($architecture -and $architecture.Trim() -eq 'x86') { 18 | $installdir = Join-Path $installdir 'x86' 19 | } 20 | InstallDotNet $installdir $version $architecture $runtime $true -RuntimeSourceFeed $RuntimeSourceFeed -RuntimeSourceFeedKey $RuntimeSourceFeedKey 21 | } 22 | catch { 23 | Write-Host $_.ScriptStackTrace 24 | Write-PipelineTelemetryError -Category 'InitializeToolset' -Message $_ 25 | ExitWithExitCode 1 26 | } 27 | 28 | ExitWithExitCode 0 29 | -------------------------------------------------------------------------------- /src/csharp/Extensions/Microsoft.Spark.Extensions.Delta/Attributes.cs: -------------------------------------------------------------------------------- 1 | // Licensed to the .NET Foundation under one or more agreements. 2 | // The .NET Foundation licenses this file to you under the MIT license. 3 | // See the LICENSE file in the project root for more information. 4 | 5 | using System; 6 | 7 | namespace Microsoft.Spark.Extensions.Delta 8 | { 9 | /// 10 | /// Custom attribute to denote the Delta Lake version in which an API is introduced. 11 | /// 12 | [AttributeUsage(AttributeTargets.All)] 13 | public sealed class DeltaLakeSinceAttribute : VersionAttribute 14 | { 15 | /// 16 | /// Constructor for DeltaLakeSinceAttribute class. 17 | /// 18 | /// Delta Lake version 19 | public DeltaLakeSinceAttribute(string version) 20 | : base(version) 21 | { 22 | } 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /src/csharp/Extensions/Microsoft.Spark.Extensions.Hyperspace/Attributes.cs: -------------------------------------------------------------------------------- 1 | // Licensed to the .NET Foundation under one or more agreements. 2 | // The .NET Foundation licenses this file to you under the MIT license. 3 | // See the LICENSE file in the project root for more information. 4 | 5 | using System; 6 | 7 | namespace Microsoft.Spark.Extensions.Hyperspace 8 | { 9 | /// 10 | /// Custom attribute to denote the Hyperspace version in which an API is introduced. 11 | /// 12 | [AttributeUsage(AttributeTargets.All)] 13 | public sealed class HyperspaceSinceAttribute : VersionAttribute 14 | { 15 | /// 16 | /// Constructor for HyperspaceSinceAttribute class. 17 | /// 18 | /// Hyperspace version 19 | public HyperspaceSinceAttribute(string version) 20 | : base(version) 21 | { 22 | } 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /src/scala/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | com.microsoft.scala 5 | microsoft-spark 6 | pom 7 | ${microsoft-spark.version} 8 | 9 | UTF-8 10 | 2.3.0 11 | 12 | 13 | 14 | microsoft-spark-2-4 15 | microsoft-spark-3-0 16 | microsoft-spark-3-1 17 | microsoft-spark-3-2 18 | microsoft-spark-3-3 19 | microsoft-spark-3-4 20 | microsoft-spark-3-5 21 | 22 | 23 | -------------------------------------------------------------------------------- /src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest/Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | net8.0 5 | Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-2-4/src/main/scala/org/apache/spark/deploy/dotnet/DotNetUserAppException.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.deploy.dotnet 8 | 9 | import org.apache.spark.SparkException 10 | 11 | /** 12 | * This exception type describes an exception thrown by a .NET user application. 13 | * 14 | * @param exitCode Exit code returned by the .NET application. 15 | * @param dotNetStackTrace Stacktrace extracted from .NET application logs. 16 | */ 17 | private[spark] class DotNetUserAppException(exitCode: Int, dotNetStackTrace: Option[String]) 18 | extends SparkException( 19 | dotNetStackTrace match { 20 | case None => s"User application exited with $exitCode" 21 | case Some(e) => s"User application exited with $exitCode and .NET exception: $e" 22 | }) 23 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-0/src/main/scala/org/apache/spark/deploy/dotnet/DotNetUserAppException.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.deploy.dotnet 8 | 9 | import org.apache.spark.SparkException 10 | 11 | /** 12 | * This exception type describes an exception thrown by a .NET user application. 13 | * 14 | * @param exitCode Exit code returned by the .NET application. 15 | * @param dotNetStackTrace Stacktrace extracted from .NET application logs. 16 | */ 17 | private[spark] class DotNetUserAppException(exitCode: Int, dotNetStackTrace: Option[String]) 18 | extends SparkException( 19 | dotNetStackTrace match { 20 | case None => s"User application exited with $exitCode" 21 | case Some(e) => s"User application exited with $exitCode and .NET exception: $e" 22 | }) 23 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-1/src/main/scala/org/apache/spark/deploy/dotnet/DotNetUserAppException.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.deploy.dotnet 8 | 9 | import org.apache.spark.SparkException 10 | 11 | /** 12 | * This exception type describes an exception thrown by a .NET user application. 13 | * 14 | * @param exitCode Exit code returned by the .NET application. 15 | * @param dotNetStackTrace Stacktrace extracted from .NET application logs. 16 | */ 17 | private[spark] class DotNetUserAppException(exitCode: Int, dotNetStackTrace: Option[String]) 18 | extends SparkException( 19 | dotNetStackTrace match { 20 | case None => s"User application exited with $exitCode" 21 | case Some(e) => s"User application exited with $exitCode and .NET exception: $e" 22 | }) 23 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-2/src/main/scala/org/apache/spark/deploy/dotnet/DotNetUserAppException.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.deploy.dotnet 8 | 9 | import org.apache.spark.SparkException 10 | 11 | /** 12 | * This exception type describes an exception thrown by a .NET user application. 13 | * 14 | * @param exitCode Exit code returned by the .NET application. 15 | * @param dotNetStackTrace Stacktrace extracted from .NET application logs. 16 | */ 17 | private[spark] class DotNetUserAppException(exitCode: Int, dotNetStackTrace: Option[String]) 18 | extends SparkException( 19 | dotNetStackTrace match { 20 | case None => s"User application exited with $exitCode" 21 | case Some(e) => s"User application exited with $exitCode and .NET exception: $e" 22 | }) 23 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-3/src/main/scala/org/apache/spark/deploy/dotnet/DotNetUserAppException.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.deploy.dotnet 8 | 9 | import org.apache.spark.SparkException 10 | 11 | /** 12 | * This exception type describes an exception thrown by a .NET user application. 13 | * 14 | * @param exitCode Exit code returned by the .NET application. 15 | * @param dotNetStackTrace Stacktrace extracted from .NET application logs. 16 | */ 17 | private[spark] class DotNetUserAppException(exitCode: Int, dotNetStackTrace: Option[String]) 18 | extends SparkException( 19 | dotNetStackTrace match { 20 | case None => s"User application exited with $exitCode" 21 | case Some(e) => s"User application exited with $exitCode and .NET exception: $e" 22 | }) 23 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-4/src/main/scala/org/apache/spark/deploy/dotnet/DotNetUserAppException.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.deploy.dotnet 8 | 9 | import org.apache.spark.SparkException 10 | 11 | /** 12 | * This exception type describes an exception thrown by a .NET user application. 13 | * 14 | * @param exitCode Exit code returned by the .NET application. 15 | * @param dotNetStackTrace Stacktrace extracted from .NET application logs. 16 | */ 17 | private[spark] class DotNetUserAppException(exitCode: Int, dotNetStackTrace: Option[String]) 18 | extends SparkException( 19 | dotNetStackTrace match { 20 | case None => s"User application exited with $exitCode" 21 | case Some(e) => s"User application exited with $exitCode and .NET exception: $e" 22 | }) 23 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-5/src/main/scala/org/apache/spark/deploy/dotnet/DotNetUserAppException.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.deploy.dotnet 8 | 9 | import org.apache.spark.SparkException 10 | 11 | /** 12 | * This exception type describes an exception thrown by a .NET user application. 13 | * 14 | * @param exitCode Exit code returned by the .NET application. 15 | * @param dotNetStackTrace Stacktrace extracted from .NET application logs. 16 | */ 17 | private[spark] class DotNetUserAppException(exitCode: Int, dotNetStackTrace: Option[String]) 18 | extends SparkException( 19 | dotNetStackTrace match { 20 | case None => s"User application exited with $exitCode" 21 | case Some(e) => s"User application exited with $exitCode and .NET exception: $e" 22 | }) 23 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-2-4/src/main/scala/org/apache/spark/api/dotnet/JvmBridgeUtils.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.sql.api.dotnet 8 | 9 | import org.apache.spark.SparkConf 10 | 11 | /* 12 | * Utils for JvmBridge. 13 | */ 14 | object JvmBridgeUtils { 15 | def getKeyValuePairAsString(kvp: (String, String)): String = { 16 | return kvp._1 + "=" + kvp._2 17 | } 18 | 19 | def getKeyValuePairArrayAsString(kvpArray: Array[(String, String)]): String = { 20 | val sb = new StringBuilder 21 | 22 | for (kvp <- kvpArray) { 23 | sb.append(getKeyValuePairAsString(kvp)) 24 | sb.append(";") 25 | } 26 | 27 | sb.toString 28 | } 29 | 30 | def getSparkConfAsString(sparkConf: SparkConf): String = { 31 | getKeyValuePairArrayAsString(sparkConf.getAll) 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-0/src/main/scala/org/apache/spark/api/dotnet/JvmBridgeUtils.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.sql.api.dotnet 8 | 9 | import org.apache.spark.SparkConf 10 | 11 | /* 12 | * Utils for JvmBridge. 13 | */ 14 | object JvmBridgeUtils { 15 | def getKeyValuePairAsString(kvp: (String, String)): String = { 16 | return kvp._1 + "=" + kvp._2 17 | } 18 | 19 | def getKeyValuePairArrayAsString(kvpArray: Array[(String, String)]): String = { 20 | val sb = new StringBuilder 21 | 22 | for (kvp <- kvpArray) { 23 | sb.append(getKeyValuePairAsString(kvp)) 24 | sb.append(";") 25 | } 26 | 27 | sb.toString 28 | } 29 | 30 | def getSparkConfAsString(sparkConf: SparkConf): String = { 31 | getKeyValuePairArrayAsString(sparkConf.getAll) 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-1/src/main/scala/org/apache/spark/api/dotnet/JvmBridgeUtils.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.sql.api.dotnet 8 | 9 | import org.apache.spark.SparkConf 10 | 11 | /* 12 | * Utils for JvmBridge. 13 | */ 14 | object JvmBridgeUtils { 15 | def getKeyValuePairAsString(kvp: (String, String)): String = { 16 | return kvp._1 + "=" + kvp._2 17 | } 18 | 19 | def getKeyValuePairArrayAsString(kvpArray: Array[(String, String)]): String = { 20 | val sb = new StringBuilder 21 | 22 | for (kvp <- kvpArray) { 23 | sb.append(getKeyValuePairAsString(kvp)) 24 | sb.append(";") 25 | } 26 | 27 | sb.toString 28 | } 29 | 30 | def getSparkConfAsString(sparkConf: SparkConf): String = { 31 | getKeyValuePairArrayAsString(sparkConf.getAll) 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-2/src/main/scala/org/apache/spark/api/dotnet/JvmBridgeUtils.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.sql.api.dotnet 8 | 9 | import org.apache.spark.SparkConf 10 | 11 | /* 12 | * Utils for JvmBridge. 13 | */ 14 | object JvmBridgeUtils { 15 | def getKeyValuePairAsString(kvp: (String, String)): String = { 16 | return kvp._1 + "=" + kvp._2 17 | } 18 | 19 | def getKeyValuePairArrayAsString(kvpArray: Array[(String, String)]): String = { 20 | val sb = new StringBuilder 21 | 22 | for (kvp <- kvpArray) { 23 | sb.append(getKeyValuePairAsString(kvp)) 24 | sb.append(";") 25 | } 26 | 27 | sb.toString 28 | } 29 | 30 | def getSparkConfAsString(sparkConf: SparkConf): String = { 31 | getKeyValuePairArrayAsString(sparkConf.getAll) 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-3/src/main/scala/org/apache/spark/api/dotnet/JvmBridgeUtils.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.sql.api.dotnet 8 | 9 | import org.apache.spark.SparkConf 10 | 11 | /* 12 | * Utils for JvmBridge. 13 | */ 14 | object JvmBridgeUtils { 15 | def getKeyValuePairAsString(kvp: (String, String)): String = { 16 | return kvp._1 + "=" + kvp._2 17 | } 18 | 19 | def getKeyValuePairArrayAsString(kvpArray: Array[(String, String)]): String = { 20 | val sb = new StringBuilder 21 | 22 | for (kvp <- kvpArray) { 23 | sb.append(getKeyValuePairAsString(kvp)) 24 | sb.append(";") 25 | } 26 | 27 | sb.toString 28 | } 29 | 30 | def getSparkConfAsString(sparkConf: SparkConf): String = { 31 | getKeyValuePairArrayAsString(sparkConf.getAll) 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-4/src/main/scala/org/apache/spark/api/dotnet/JvmBridgeUtils.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.sql.api.dotnet 8 | 9 | import org.apache.spark.SparkConf 10 | 11 | /* 12 | * Utils for JvmBridge. 13 | */ 14 | object JvmBridgeUtils { 15 | def getKeyValuePairAsString(kvp: (String, String)): String = { 16 | return kvp._1 + "=" + kvp._2 17 | } 18 | 19 | def getKeyValuePairArrayAsString(kvpArray: Array[(String, String)]): String = { 20 | val sb = new StringBuilder 21 | 22 | for (kvp <- kvpArray) { 23 | sb.append(getKeyValuePairAsString(kvp)) 24 | sb.append(";") 25 | } 26 | 27 | sb.toString 28 | } 29 | 30 | def getSparkConfAsString(sparkConf: SparkConf): String = { 31 | getKeyValuePairArrayAsString(sparkConf.getAll) 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-5/src/main/scala/org/apache/spark/api/dotnet/JvmBridgeUtils.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.sql.api.dotnet 8 | 9 | import org.apache.spark.SparkConf 10 | 11 | /* 12 | * Utils for JvmBridge. 13 | */ 14 | object JvmBridgeUtils { 15 | def getKeyValuePairAsString(kvp: (String, String)): String = { 16 | return kvp._1 + "=" + kvp._2 17 | } 18 | 19 | def getKeyValuePairArrayAsString(kvpArray: Array[(String, String)]): String = { 20 | val sb = new StringBuilder 21 | 22 | for (kvp <- kvpArray) { 23 | sb.append(getKeyValuePairAsString(kvp)) 24 | sb.append(";") 25 | } 26 | 27 | sb.toString 28 | } 29 | 30 | def getSparkConfAsString(sparkConf: SparkConf): String = { 31 | getKeyValuePairArrayAsString(sparkConf.getAll) 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /benchmark/scala/src/main/scala/com/microsoft/tpch/TpchBase.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package com.microsoft.tpch 8 | 9 | import org.apache.spark.sql.{DataFrame, SparkSession} 10 | 11 | class TpchBase(spark: SparkSession, tpchRoot: String) { 12 | val customer: DataFrame = spark.read.parquet(s"${tpchRoot}customer") 13 | val lineitem: DataFrame = spark.read.parquet(s"${tpchRoot}lineitem") 14 | val nation: DataFrame = spark.read.parquet(s"${tpchRoot}nation") 15 | val order: DataFrame = spark.read.parquet(s"${tpchRoot}orders") 16 | val part: DataFrame = spark.read.parquet(s"${tpchRoot}part") 17 | val partsupp: DataFrame = spark.read.parquet(s"${tpchRoot}partsupp") 18 | val region: DataFrame = spark.read.parquet(s"${tpchRoot}region") 19 | val supplier: DataFrame = spark.read.parquet(s"${tpchRoot}supplier") 20 | } 21 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-2-4/src/main/scala/org/apache/spark/mllib/api/dotnet/MLUtils.scala: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | * Licensed to the .NET Foundation under one or more agreements. 4 | * The .NET Foundation licenses this file to you under the MIT license. 5 | * See the LICENSE file in the project root for more information. 6 | */ 7 | 8 | package org.apache.spark.mllib.api.dotnet 9 | 10 | import org.apache.spark.ml._ 11 | import scala.collection.JavaConverters._ 12 | 13 | /** MLUtils object that hosts helper functions 14 | * related to ML usage 15 | */ 16 | object MLUtils { 17 | 18 | /** A helper function to let pipeline accept java.util.ArrayList 19 | * format stages in scala code 20 | * @param pipeline - The pipeline to be set stages 21 | * @param value - A java.util.ArrayList of PipelineStages to be set as stages 22 | * @return The pipeline 23 | */ 24 | def setPipelineStages(pipeline: Pipeline, value: java.util.ArrayList[_ <: PipelineStage]): Pipeline = 25 | pipeline.setStages(value.asScala.toArray) 26 | } 27 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-0/src/main/scala/org/apache/spark/mllib/api/dotnet/MLUtils.scala: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | * Licensed to the .NET Foundation under one or more agreements. 4 | * The .NET Foundation licenses this file to you under the MIT license. 5 | * See the LICENSE file in the project root for more information. 6 | */ 7 | 8 | package org.apache.spark.mllib.api.dotnet 9 | 10 | import org.apache.spark.ml._ 11 | import scala.collection.JavaConverters._ 12 | 13 | /** MLUtils object that hosts helper functions 14 | * related to ML usage 15 | */ 16 | object MLUtils { 17 | 18 | /** A helper function to let pipeline accept java.util.ArrayList 19 | * format stages in scala code 20 | * @param pipeline - The pipeline to be set stages 21 | * @param value - A java.util.ArrayList of PipelineStages to be set as stages 22 | * @return The pipeline 23 | */ 24 | def setPipelineStages(pipeline: Pipeline, value: java.util.ArrayList[_ <: PipelineStage]): Pipeline = 25 | pipeline.setStages(value.asScala.toArray) 26 | } 27 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-1/src/main/scala/org/apache/spark/mllib/api/dotnet/MLUtils.scala: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | * Licensed to the .NET Foundation under one or more agreements. 4 | * The .NET Foundation licenses this file to you under the MIT license. 5 | * See the LICENSE file in the project root for more information. 6 | */ 7 | 8 | package org.apache.spark.mllib.api.dotnet 9 | 10 | import org.apache.spark.ml._ 11 | import scala.collection.JavaConverters._ 12 | 13 | /** MLUtils object that hosts helper functions 14 | * related to ML usage 15 | */ 16 | object MLUtils { 17 | 18 | /** A helper function to let pipeline accept java.util.ArrayList 19 | * format stages in scala code 20 | * @param pipeline - The pipeline to be set stages 21 | * @param value - A java.util.ArrayList of PipelineStages to be set as stages 22 | * @return The pipeline 23 | */ 24 | def setPipelineStages(pipeline: Pipeline, value: java.util.ArrayList[_ <: PipelineStage]): Pipeline = 25 | pipeline.setStages(value.asScala.toArray) 26 | } 27 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-2/src/main/scala/org/apache/spark/mllib/api/dotnet/MLUtils.scala: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | * Licensed to the .NET Foundation under one or more agreements. 4 | * The .NET Foundation licenses this file to you under the MIT license. 5 | * See the LICENSE file in the project root for more information. 6 | */ 7 | 8 | package org.apache.spark.mllib.api.dotnet 9 | 10 | import org.apache.spark.ml._ 11 | import scala.collection.JavaConverters._ 12 | 13 | /** MLUtils object that hosts helper functions 14 | * related to ML usage 15 | */ 16 | object MLUtils { 17 | 18 | /** A helper function to let pipeline accept java.util.ArrayList 19 | * format stages in scala code 20 | * @param pipeline - The pipeline to be set stages 21 | * @param value - A java.util.ArrayList of PipelineStages to be set as stages 22 | * @return The pipeline 23 | */ 24 | def setPipelineStages(pipeline: Pipeline, value: java.util.ArrayList[_ <: PipelineStage]): Pipeline = 25 | pipeline.setStages(value.asScala.toArray) 26 | } 27 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-3/src/main/scala/org/apache/spark/mllib/api/dotnet/MLUtils.scala: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | * Licensed to the .NET Foundation under one or more agreements. 4 | * The .NET Foundation licenses this file to you under the MIT license. 5 | * See the LICENSE file in the project root for more information. 6 | */ 7 | 8 | package org.apache.spark.mllib.api.dotnet 9 | 10 | import org.apache.spark.ml._ 11 | import scala.collection.JavaConverters._ 12 | 13 | /** MLUtils object that hosts helper functions 14 | * related to ML usage 15 | */ 16 | object MLUtils { 17 | 18 | /** A helper function to let pipeline accept java.util.ArrayList 19 | * format stages in scala code 20 | * @param pipeline - The pipeline to be set stages 21 | * @param value - A java.util.ArrayList of PipelineStages to be set as stages 22 | * @return The pipeline 23 | */ 24 | def setPipelineStages(pipeline: Pipeline, value: java.util.ArrayList[_ <: PipelineStage]): Pipeline = 25 | pipeline.setStages(value.asScala.toArray) 26 | } 27 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-4/src/main/scala/org/apache/spark/mllib/api/dotnet/MLUtils.scala: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | * Licensed to the .NET Foundation under one or more agreements. 4 | * The .NET Foundation licenses this file to you under the MIT license. 5 | * See the LICENSE file in the project root for more information. 6 | */ 7 | 8 | package org.apache.spark.mllib.api.dotnet 9 | 10 | import org.apache.spark.ml._ 11 | import scala.collection.JavaConverters._ 12 | 13 | /** MLUtils object that hosts helper functions 14 | * related to ML usage 15 | */ 16 | object MLUtils { 17 | 18 | /** A helper function to let pipeline accept java.util.ArrayList 19 | * format stages in scala code 20 | * @param pipeline - The pipeline to be set stages 21 | * @param value - A java.util.ArrayList of PipelineStages to be set as stages 22 | * @return The pipeline 23 | */ 24 | def setPipelineStages(pipeline: Pipeline, value: java.util.ArrayList[_ <: PipelineStage]): Pipeline = 25 | pipeline.setStages(value.asScala.toArray) 26 | } 27 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-5/src/main/scala/org/apache/spark/mllib/api/dotnet/MLUtils.scala: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | * Licensed to the .NET Foundation under one or more agreements. 4 | * The .NET Foundation licenses this file to you under the MIT license. 5 | * See the LICENSE file in the project root for more information. 6 | */ 7 | 8 | package org.apache.spark.mllib.api.dotnet 9 | 10 | import org.apache.spark.ml._ 11 | import scala.collection.JavaConverters._ 12 | 13 | /** MLUtils object that hosts helper functions 14 | * related to ML usage 15 | */ 16 | object MLUtils { 17 | 18 | /** A helper function to let pipeline accept java.util.ArrayList 19 | * format stages in scala code 20 | * @param pipeline - The pipeline to be set stages 21 | * @param value - A java.util.ArrayList of PipelineStages to be set as stages 22 | * @return The pipeline 23 | */ 24 | def setPipelineStages(pipeline: Pipeline, value: java.util.ArrayList[_ <: PipelineStage]): Pipeline = 25 | pipeline.setStages(value.asScala.toArray) 26 | } 27 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-2-4/src/main/scala/org/apache/spark/api/dotnet/DotnetRDD.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.api.dotnet 8 | 9 | import org.apache.spark.SparkContext 10 | import org.apache.spark.api.java.JavaRDD 11 | import org.apache.spark.api.python._ 12 | import org.apache.spark.rdd.RDD 13 | 14 | object DotnetRDD { 15 | def createPythonRDD( 16 | parent: RDD[_], 17 | func: PythonFunction, 18 | preservePartitoning: Boolean): PythonRDD = { 19 | new PythonRDD(parent, func, preservePartitoning) 20 | } 21 | 22 | def createJavaRDDFromArray( 23 | sc: SparkContext, 24 | arr: Array[Array[Byte]], 25 | numSlices: Int): JavaRDD[Array[Byte]] = { 26 | JavaRDD.fromRDD(sc.parallelize(arr, numSlices)) 27 | } 28 | 29 | def toJavaRDD(rdd: RDD[_]): JavaRDD[_] = JavaRDD.fromRDD(rdd) 30 | } 31 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-0/src/main/scala/org/apache/spark/api/dotnet/DotnetRDD.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.api.dotnet 8 | 9 | import org.apache.spark.SparkContext 10 | import org.apache.spark.api.java.JavaRDD 11 | import org.apache.spark.api.python._ 12 | import org.apache.spark.rdd.RDD 13 | 14 | object DotnetRDD { 15 | def createPythonRDD( 16 | parent: RDD[_], 17 | func: PythonFunction, 18 | preservePartitoning: Boolean): PythonRDD = { 19 | new PythonRDD(parent, func, preservePartitoning) 20 | } 21 | 22 | def createJavaRDDFromArray( 23 | sc: SparkContext, 24 | arr: Array[Array[Byte]], 25 | numSlices: Int): JavaRDD[Array[Byte]] = { 26 | JavaRDD.fromRDD(sc.parallelize(arr, numSlices)) 27 | } 28 | 29 | def toJavaRDD(rdd: RDD[_]): JavaRDD[_] = JavaRDD.fromRDD(rdd) 30 | } 31 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-1/src/main/scala/org/apache/spark/api/dotnet/DotnetRDD.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.api.dotnet 8 | 9 | import org.apache.spark.SparkContext 10 | import org.apache.spark.api.java.JavaRDD 11 | import org.apache.spark.api.python._ 12 | import org.apache.spark.rdd.RDD 13 | 14 | object DotnetRDD { 15 | def createPythonRDD( 16 | parent: RDD[_], 17 | func: PythonFunction, 18 | preservePartitoning: Boolean): PythonRDD = { 19 | new PythonRDD(parent, func, preservePartitoning) 20 | } 21 | 22 | def createJavaRDDFromArray( 23 | sc: SparkContext, 24 | arr: Array[Array[Byte]], 25 | numSlices: Int): JavaRDD[Array[Byte]] = { 26 | JavaRDD.fromRDD(sc.parallelize(arr, numSlices)) 27 | } 28 | 29 | def toJavaRDD(rdd: RDD[_]): JavaRDD[_] = JavaRDD.fromRDD(rdd) 30 | } 31 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-2/src/main/scala/org/apache/spark/api/dotnet/DotnetRDD.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.api.dotnet 8 | 9 | import org.apache.spark.SparkContext 10 | import org.apache.spark.api.java.JavaRDD 11 | import org.apache.spark.api.python._ 12 | import org.apache.spark.rdd.RDD 13 | 14 | object DotnetRDD { 15 | def createPythonRDD( 16 | parent: RDD[_], 17 | func: PythonFunction, 18 | preservePartitoning: Boolean): PythonRDD = { 19 | new PythonRDD(parent, func, preservePartitoning) 20 | } 21 | 22 | def createJavaRDDFromArray( 23 | sc: SparkContext, 24 | arr: Array[Array[Byte]], 25 | numSlices: Int): JavaRDD[Array[Byte]] = { 26 | JavaRDD.fromRDD(sc.parallelize(arr, numSlices)) 27 | } 28 | 29 | def toJavaRDD(rdd: RDD[_]): JavaRDD[_] = JavaRDD.fromRDD(rdd) 30 | } 31 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-3/src/main/scala/org/apache/spark/api/dotnet/DotnetRDD.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.api.dotnet 8 | 9 | import org.apache.spark.SparkContext 10 | import org.apache.spark.api.java.JavaRDD 11 | import org.apache.spark.api.python._ 12 | import org.apache.spark.rdd.RDD 13 | 14 | object DotnetRDD { 15 | def createPythonRDD( 16 | parent: RDD[_], 17 | func: PythonFunction, 18 | preservePartitoning: Boolean): PythonRDD = { 19 | new PythonRDD(parent, func, preservePartitoning) 20 | } 21 | 22 | def createJavaRDDFromArray( 23 | sc: SparkContext, 24 | arr: Array[Array[Byte]], 25 | numSlices: Int): JavaRDD[Array[Byte]] = { 26 | JavaRDD.fromRDD(sc.parallelize(arr, numSlices)) 27 | } 28 | 29 | def toJavaRDD(rdd: RDD[_]): JavaRDD[_] = JavaRDD.fromRDD(rdd) 30 | } 31 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-4/src/main/scala/org/apache/spark/api/dotnet/DotnetRDD.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.api.dotnet 8 | 9 | import org.apache.spark.SparkContext 10 | import org.apache.spark.api.java.JavaRDD 11 | import org.apache.spark.api.python._ 12 | import org.apache.spark.rdd.RDD 13 | 14 | object DotnetRDD { 15 | def createPythonRDD( 16 | parent: RDD[_], 17 | func: PythonFunction, 18 | preservePartitoning: Boolean): PythonRDD = { 19 | new PythonRDD(parent, func, preservePartitoning) 20 | } 21 | 22 | def createJavaRDDFromArray( 23 | sc: SparkContext, 24 | arr: Array[Array[Byte]], 25 | numSlices: Int): JavaRDD[Array[Byte]] = { 26 | JavaRDD.fromRDD(sc.parallelize(arr, numSlices)) 27 | } 28 | 29 | def toJavaRDD(rdd: RDD[_]): JavaRDD[_] = JavaRDD.fromRDD(rdd) 30 | } 31 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-5/src/main/scala/org/apache/spark/api/dotnet/DotnetRDD.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.api.dotnet 8 | 9 | import org.apache.spark.SparkContext 10 | import org.apache.spark.api.java.JavaRDD 11 | import org.apache.spark.api.python._ 12 | import org.apache.spark.rdd.RDD 13 | 14 | object DotnetRDD { 15 | def createPythonRDD( 16 | parent: RDD[_], 17 | func: PythonFunction, 18 | preservePartitoning: Boolean): PythonRDD = { 19 | new PythonRDD(parent, func, preservePartitoning) 20 | } 21 | 22 | def createJavaRDDFromArray( 23 | sc: SparkContext, 24 | arr: Array[Array[Byte]], 25 | numSlices: Int): JavaRDD[Array[Byte]] = { 26 | JavaRDD.fromRDD(sc.parallelize(arr, numSlices)) 27 | } 28 | 29 | def toJavaRDD(rdd: RDD[_]): JavaRDD[_] = JavaRDD.fromRDD(rdd) 30 | } 31 | -------------------------------------------------------------------------------- /eng/common/post-build/nuget-validation.ps1: -------------------------------------------------------------------------------- 1 | # This script validates NuGet package metadata information using this 2 | # tool: https://github.com/NuGet/NuGetGallery/tree/jver-verify/src/VerifyMicrosoftPackage 3 | 4 | param( 5 | [Parameter(Mandatory=$true)][string] $PackagesPath, # Path to where the packages to be validated are 6 | [Parameter(Mandatory=$true)][string] $ToolDestinationPath # Where the validation tool should be downloaded to 7 | ) 8 | 9 | try { 10 | . $PSScriptRoot\post-build-utils.ps1 11 | 12 | $url = 'https://raw.githubusercontent.com/NuGet/NuGetGallery/3e25ad135146676bcab0050a516939d9958bfa5d/src/VerifyMicrosoftPackage/verify.ps1' 13 | 14 | New-Item -ItemType 'directory' -Path ${ToolDestinationPath} -Force 15 | 16 | Invoke-WebRequest $url -OutFile ${ToolDestinationPath}\verify.ps1 17 | 18 | & ${ToolDestinationPath}\verify.ps1 ${PackagesPath}\*.nupkg 19 | } 20 | catch { 21 | Write-Host $_.ScriptStackTrace 22 | Write-PipelineTelemetryError -Category 'NuGetValidation' -Message $_ 23 | ExitWithExitCode 1 24 | } 25 | -------------------------------------------------------------------------------- /src/csharp/Microsoft.Spark/Sql/ArrowGroupedMapUdfWrapper.cs: -------------------------------------------------------------------------------- 1 | // Licensed to the .NET Foundation under one or more agreements. 2 | // The .NET Foundation licenses this file to you under the MIT license. 3 | // See the LICENSE file in the project root for more information. 4 | 5 | using System; 6 | using Apache.Arrow; 7 | 8 | namespace Microsoft.Spark.Sql 9 | { 10 | /// 11 | /// Wraps the given Func object, which represents a Grouped Map UDF. 12 | /// 13 | /// 14 | /// UDF serialization requires a "wrapper" object in order to serialize/deserialize. 15 | /// 16 | [UdfWrapper] 17 | internal sealed class ArrowGroupedMapUdfWrapper 18 | { 19 | private readonly Func _func; 20 | 21 | internal ArrowGroupedMapUdfWrapper(Func func) 22 | { 23 | _func = func; 24 | } 25 | 26 | internal RecordBatch Execute(RecordBatch input) 27 | { 28 | return _func(input); 29 | } 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/Streaming/TriggerTests.cs: -------------------------------------------------------------------------------- 1 | // Licensed to the .NET Foundation under one or more agreements. 2 | // The .NET Foundation licenses this file to you under the MIT license. 3 | // See the LICENSE file in the project root for more information. 4 | 5 | using Microsoft.Spark.Sql; 6 | using Microsoft.Spark.Sql.Streaming; 7 | using Xunit; 8 | 9 | namespace Microsoft.Spark.E2ETest.IpcTests 10 | { 11 | [Collection("Spark E2E Tests")] 12 | public class TriggerTests 13 | { 14 | /// 15 | /// Test Trigger's static functions 16 | /// 17 | [Fact] 18 | public void TestSignatures() 19 | { 20 | Assert.IsType(Trigger.Once()); 21 | 22 | Assert.IsType(Trigger.Continuous("1 seconds")); 23 | Assert.IsType(Trigger.Continuous(1000)); 24 | 25 | Assert.IsType(Trigger.ProcessingTime("1 seconds")); 26 | Assert.IsType(Trigger.ProcessingTime(1000)); 27 | } 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /script/package-worker.ps1: -------------------------------------------------------------------------------- 1 | $version = $args[0] 2 | $worker_dir = $args[1] 3 | $output_dir = $args[2] 4 | 5 | $worker_version_dir = "Microsoft.Spark.Worker-$version" 6 | 7 | $frameworks = Get-ChildItem -Directory $worker_dir 8 | foreach ($framework in $frameworks) 9 | { 10 | $runtimes = Get-ChildItem -Directory $framework.FullName 11 | foreach ($runtime in $runtimes) 12 | { 13 | New-Item $worker_version_dir -ItemType Directory 14 | Copy-Item "$($runtime.FullName)\*" -Destination $worker_version_dir -Recurse 15 | $filename = "Microsoft.Spark.Worker.$framework.$runtime-$version" 16 | 17 | # Generate additional tar.gz worker files only for linux-x64. 18 | if ($runtime.Name.ToLower().Equals("linux-x64")) 19 | { 20 | tar czf "$output_dir/$filename.tar.gz" $worker_version_dir --force-local 21 | } 22 | 23 | Compress-Archive -DestinationPath "$output_dir/$filename.zip" -Path $worker_version_dir -CompressionLevel Optimal 24 | 25 | Remove-Item -Path $worker_version_dir -Recurse -Force 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /src/csharp/Microsoft.Spark.E2ETest/IpcTests/SerDeTests.cs: -------------------------------------------------------------------------------- 1 | // Licensed to the .NET Foundation under one or more agreements. 2 | // The .NET Foundation licenses this file to you under the MIT license. 3 | // See the LICENSE file in the project root for more information. 4 | 5 | using Microsoft.Spark.Sql; 6 | using Xunit; 7 | 8 | namespace Microsoft.Spark.E2ETest.IpcTests 9 | { 10 | [Collection("Spark E2E Tests")] 11 | public class SerDeTests 12 | { 13 | private readonly SparkSession _spark; 14 | 15 | public SerDeTests(SparkFixture fixture) 16 | { 17 | _spark = fixture.Spark; 18 | } 19 | 20 | [Fact] 21 | public void TestUnicode() 22 | { 23 | string expected = 24 | "①Ⅻㄨㄩ 啊阿鼾齄丂丄狚狛狜狝﨨﨩ˊˋ˙–⿻〇㐀㐁㐃㐄䶴䶵U1[]U2[]U3[]"; 25 | 26 | RuntimeConfig conf = _spark.Conf(); 27 | string key = "SerDeTests.TestUnicode"; 28 | conf.Set(key, expected); 29 | 30 | string actual = conf.Get(key); 31 | Assert.Equal(expected, actual); 32 | } 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /eng/common/templates/steps/enable-internal-runtimes.yml: -------------------------------------------------------------------------------- 1 | # Obtains internal runtime download credentials and populates the 'dotnetbuilds-internal-container-read-token-base64' 2 | # variable with the base64-encoded SAS token, by default 3 | 4 | parameters: 5 | - name: federatedServiceConnection 6 | type: string 7 | default: 'dotnetbuilds-internal-read' 8 | - name: outputVariableName 9 | type: string 10 | default: 'dotnetbuilds-internal-container-read-token-base64' 11 | - name: expiryInHours 12 | type: number 13 | default: 1 14 | - name: base64Encode 15 | type: boolean 16 | default: true 17 | 18 | steps: 19 | - ${{ if ne(variables['System.TeamProject'], 'public') }}: 20 | - template: /eng/common/templates/steps/get-delegation-sas.yml 21 | parameters: 22 | federatedServiceConnection: ${{ parameters.federatedServiceConnection }} 23 | outputVariableName: ${{ parameters.outputVariableName }} 24 | expiryInHours: ${{ parameters.expiryInHours }} 25 | base64Encode: ${{ parameters.base64Encode }} 26 | storageAccount: dotnetbuilds 27 | container: internal 28 | permissions: rl 29 | -------------------------------------------------------------------------------- /src/csharp/Microsoft.Spark.E2ETest/Microsoft.Spark.E2ETest.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | net8.0 4 | 5 | 6 | 7 | PreserveNewest 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /eng/common/templates/steps/run-script-ifequalelse.yml: -------------------------------------------------------------------------------- 1 | parameters: 2 | # if parameter1 equals parameter 2, run 'ifScript' command, else run 'elsescript' command 3 | parameter1: '' 4 | parameter2: '' 5 | ifScript: '' 6 | elseScript: '' 7 | 8 | # name of script step 9 | name: Script 10 | 11 | # display name of script step 12 | displayName: If-Equal-Else Script 13 | 14 | # environment 15 | env: {} 16 | 17 | # conditional expression for step execution 18 | condition: '' 19 | 20 | steps: 21 | - ${{ if and(ne(parameters.ifScript, ''), eq(parameters.parameter1, parameters.parameter2)) }}: 22 | - script: ${{ parameters.ifScript }} 23 | name: ${{ parameters.name }} 24 | displayName: ${{ parameters.displayName }} 25 | env: ${{ parameters.env }} 26 | condition: ${{ parameters.condition }} 27 | 28 | - ${{ if and(ne(parameters.elseScript, ''), ne(parameters.parameter1, parameters.parameter2)) }}: 29 | - script: ${{ parameters.elseScript }} 30 | name: ${{ parameters.name }} 31 | displayName: ${{ parameters.displayName }} 32 | env: ${{ parameters.env }} 33 | condition: ${{ parameters.condition }} -------------------------------------------------------------------------------- /eng/common/templates-official/steps/enable-internal-runtimes.yml: -------------------------------------------------------------------------------- 1 | # Obtains internal runtime download credentials and populates the 'dotnetbuilds-internal-container-read-token-base64' 2 | # variable with the base64-encoded SAS token, by default 3 | 4 | parameters: 5 | - name: federatedServiceConnection 6 | type: string 7 | default: 'dotnetbuilds-internal-read' 8 | - name: outputVariableName 9 | type: string 10 | default: 'dotnetbuilds-internal-container-read-token-base64' 11 | - name: expiryInHours 12 | type: number 13 | default: 1 14 | - name: base64Encode 15 | type: boolean 16 | default: true 17 | 18 | steps: 19 | - ${{ if ne(variables['System.TeamProject'], 'public') }}: 20 | - template: /eng/common/templates-official/steps/get-delegation-sas.yml 21 | parameters: 22 | federatedServiceConnection: ${{ parameters.federatedServiceConnection }} 23 | outputVariableName: ${{ parameters.outputVariableName }} 24 | expiryInHours: ${{ parameters.expiryInHours }} 25 | base64Encode: ${{ parameters.base64Encode }} 26 | storageAccount: dotnetbuilds 27 | container: internal 28 | permissions: rl 29 | -------------------------------------------------------------------------------- /src/csharp/Microsoft.Spark/Sql/DataFrameGroupedMapUdfWrapper.cs: -------------------------------------------------------------------------------- 1 | // Licensed to the .NET Foundation under one or more agreements. 2 | // The .NET Foundation licenses this file to you under the MIT license. 3 | // See the LICENSE file in the project root for more information. 4 | 5 | using System; 6 | using Apache.Arrow; 7 | using FxDataFrame = Microsoft.Data.Analysis.DataFrame; 8 | 9 | namespace Microsoft.Spark.Sql 10 | { 11 | /// 12 | /// Wraps the given Func object, which represents a Grouped Map UDF. 13 | /// 14 | /// 15 | /// UDF serialization requires a "wrapper" object in order to serialize/deserialize. 16 | /// 17 | [UdfWrapper] 18 | internal sealed class DataFrameGroupedMapUdfWrapper 19 | { 20 | private readonly Func _func; 21 | 22 | internal DataFrameGroupedMapUdfWrapper(Func func) 23 | { 24 | _func = func; 25 | } 26 | 27 | internal FxDataFrame Execute(FxDataFrame input) 28 | { 29 | return _func(input); 30 | } 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /benchmark/csharp/Tpch/Tpch.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Exe 5 | net48;net8.0 6 | net8.0 7 | Tpch 8 | Tpch 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | true 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /src/csharp/Microsoft.Spark/ML/Param/ParamPair.cs: -------------------------------------------------------------------------------- 1 | // Licensed to the .NET Foundation under one or more agreements. 2 | // The .NET Foundation licenses this file to you under the MIT license. 3 | // See the LICENSE file in the project root for more information. 4 | 5 | using Microsoft.Spark.Interop; 6 | using Microsoft.Spark.Interop.Ipc; 7 | 8 | namespace Microsoft.Spark.ML.Feature.Param 9 | { 10 | /// 11 | /// A param and its value. 12 | /// 13 | public sealed class ParamPair : IJvmObjectReferenceProvider 14 | { 15 | private static readonly string s_ParamPairClassName = "org.apache.spark.ml.param.ParamPair"; 16 | 17 | /// 18 | /// Creates a new instance of a 19 | /// 20 | public ParamPair(Param param, T value) 21 | : this(SparkEnvironment.JvmBridge.CallConstructor(s_ParamPairClassName, param, value)) 22 | { 23 | } 24 | 25 | internal ParamPair(JvmObjectReference jvmObject) => Reference = jvmObject; 26 | 27 | public JvmObjectReference Reference { get; private set; } 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /src/csharp/Microsoft.Spark.E2ETest/TestEnvironment.cs: -------------------------------------------------------------------------------- 1 | // Licensed to the .NET Foundation under one or more agreements. 2 | // The .NET Foundation licenses this file to you under the MIT license. 3 | // See the LICENSE file in the project root for more information. 4 | 5 | using System; 6 | using System.IO; 7 | 8 | namespace Microsoft.Spark.E2ETest 9 | { 10 | /// 11 | /// TestEnvironment provides functionalities related to E2E test environment. 12 | /// 13 | internal static class TestEnvironment 14 | { 15 | private static string s_resourceDirectory; 16 | internal static string ResourceDirectory 17 | { 18 | get 19 | { 20 | if (s_resourceDirectory is null) 21 | { 22 | s_resourceDirectory = 23 | Path.Combine( 24 | AppDomain.CurrentDomain.BaseDirectory, 25 | "Resources") 26 | + Path.DirectorySeparatorChar; 27 | } 28 | 29 | return s_resourceDirectory; 30 | } 31 | } 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-2-4/src/main/scala/org/apache/spark/internal/config/dotnet/Dotnet.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.internal.config.dotnet 8 | 9 | import org.apache.spark.internal.config.ConfigBuilder 10 | 11 | private[spark] object Dotnet { 12 | val DOTNET_NUM_BACKEND_THREADS = ConfigBuilder("spark.dotnet.numDotnetBackendThreads").intConf 13 | .createWithDefault(10) 14 | 15 | val DOTNET_IGNORE_SPARK_PATCH_VERSION_CHECK = 16 | ConfigBuilder("spark.dotnet.ignoreSparkPatchVersionCheck").booleanConf 17 | .createWithDefault(false) 18 | 19 | val ERROR_REDIRECITON_ENABLED = 20 | ConfigBuilder("spark.nonjvm.error.forwarding.enabled").booleanConf 21 | .createWithDefault(false) 22 | 23 | val ERROR_BUFFER_SIZE = 24 | ConfigBuilder("spark.nonjvm.error.buffer.size") 25 | .intConf 26 | .checkValue(_ >= 0, "The error buffer size must not be negative") 27 | .createWithDefault(10240) 28 | } 29 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-0/src/main/scala/org/apache/spark/internal/config/dotnet/Dotnet.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.internal.config.dotnet 8 | 9 | import org.apache.spark.internal.config.ConfigBuilder 10 | 11 | private[spark] object Dotnet { 12 | val DOTNET_NUM_BACKEND_THREADS = ConfigBuilder("spark.dotnet.numDotnetBackendThreads").intConf 13 | .createWithDefault(10) 14 | 15 | val DOTNET_IGNORE_SPARK_PATCH_VERSION_CHECK = 16 | ConfigBuilder("spark.dotnet.ignoreSparkPatchVersionCheck").booleanConf 17 | .createWithDefault(false) 18 | 19 | val ERROR_REDIRECITON_ENABLED = 20 | ConfigBuilder("spark.nonjvm.error.forwarding.enabled").booleanConf 21 | .createWithDefault(false) 22 | 23 | val ERROR_BUFFER_SIZE = 24 | ConfigBuilder("spark.nonjvm.error.buffer.size") 25 | .intConf 26 | .checkValue(_ >= 0, "The error buffer size must not be negative") 27 | .createWithDefault(10240) 28 | } 29 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-1/src/main/scala/org/apache/spark/internal/config/dotnet/Dotnet.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.internal.config.dotnet 8 | 9 | import org.apache.spark.internal.config.ConfigBuilder 10 | 11 | private[spark] object Dotnet { 12 | val DOTNET_NUM_BACKEND_THREADS = ConfigBuilder("spark.dotnet.numDotnetBackendThreads").intConf 13 | .createWithDefault(10) 14 | 15 | val DOTNET_IGNORE_SPARK_PATCH_VERSION_CHECK = 16 | ConfigBuilder("spark.dotnet.ignoreSparkPatchVersionCheck").booleanConf 17 | .createWithDefault(false) 18 | 19 | val ERROR_REDIRECITON_ENABLED = 20 | ConfigBuilder("spark.nonjvm.error.forwarding.enabled").booleanConf 21 | .createWithDefault(false) 22 | 23 | val ERROR_BUFFER_SIZE = 24 | ConfigBuilder("spark.nonjvm.error.buffer.size") 25 | .intConf 26 | .checkValue(_ >= 0, "The error buffer size must not be negative") 27 | .createWithDefault(10240) 28 | } 29 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-2/src/main/scala/org/apache/spark/internal/config/dotnet/Dotnet.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.internal.config.dotnet 8 | 9 | import org.apache.spark.internal.config.ConfigBuilder 10 | 11 | private[spark] object Dotnet { 12 | val DOTNET_NUM_BACKEND_THREADS = ConfigBuilder("spark.dotnet.numDotnetBackendThreads").intConf 13 | .createWithDefault(10) 14 | 15 | val DOTNET_IGNORE_SPARK_PATCH_VERSION_CHECK = 16 | ConfigBuilder("spark.dotnet.ignoreSparkPatchVersionCheck").booleanConf 17 | .createWithDefault(false) 18 | 19 | val ERROR_REDIRECITON_ENABLED = 20 | ConfigBuilder("spark.nonjvm.error.forwarding.enabled").booleanConf 21 | .createWithDefault(false) 22 | 23 | val ERROR_BUFFER_SIZE = 24 | ConfigBuilder("spark.nonjvm.error.buffer.size") 25 | .intConf 26 | .checkValue(_ >= 0, "The error buffer size must not be negative") 27 | .createWithDefault(10240) 28 | } 29 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-3/src/main/scala/org/apache/spark/internal/config/dotnet/Dotnet.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.internal.config.dotnet 8 | 9 | import org.apache.spark.internal.config.ConfigBuilder 10 | 11 | private[spark] object Dotnet { 12 | val DOTNET_NUM_BACKEND_THREADS = ConfigBuilder("spark.dotnet.numDotnetBackendThreads").intConf 13 | .createWithDefault(10) 14 | 15 | val DOTNET_IGNORE_SPARK_PATCH_VERSION_CHECK = 16 | ConfigBuilder("spark.dotnet.ignoreSparkPatchVersionCheck").booleanConf 17 | .createWithDefault(false) 18 | 19 | val ERROR_REDIRECITON_ENABLED = 20 | ConfigBuilder("spark.nonjvm.error.forwarding.enabled").booleanConf 21 | .createWithDefault(false) 22 | 23 | val ERROR_BUFFER_SIZE = 24 | ConfigBuilder("spark.nonjvm.error.buffer.size") 25 | .intConf 26 | .checkValue(_ >= 0, "The error buffer size must not be negative") 27 | .createWithDefault(10240) 28 | } 29 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-4/src/main/scala/org/apache/spark/internal/config/dotnet/Dotnet.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.internal.config.dotnet 8 | 9 | import org.apache.spark.internal.config.ConfigBuilder 10 | 11 | private[spark] object Dotnet { 12 | val DOTNET_NUM_BACKEND_THREADS = ConfigBuilder("spark.dotnet.numDotnetBackendThreads").intConf 13 | .createWithDefault(10) 14 | 15 | val DOTNET_IGNORE_SPARK_PATCH_VERSION_CHECK = 16 | ConfigBuilder("spark.dotnet.ignoreSparkPatchVersionCheck").booleanConf 17 | .createWithDefault(false) 18 | 19 | val ERROR_REDIRECITON_ENABLED = 20 | ConfigBuilder("spark.nonjvm.error.forwarding.enabled").booleanConf 21 | .createWithDefault(false) 22 | 23 | val ERROR_BUFFER_SIZE = 24 | ConfigBuilder("spark.nonjvm.error.buffer.size") 25 | .intConf 26 | .checkValue(_ >= 0, "The error buffer size must not be negative") 27 | .createWithDefault(10240) 28 | } 29 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-5/src/main/scala/org/apache/spark/internal/config/dotnet/Dotnet.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.internal.config.dotnet 8 | 9 | import org.apache.spark.internal.config.ConfigBuilder 10 | 11 | private[spark] object Dotnet { 12 | val DOTNET_NUM_BACKEND_THREADS = ConfigBuilder("spark.dotnet.numDotnetBackendThreads").intConf 13 | .createWithDefault(10) 14 | 15 | val DOTNET_IGNORE_SPARK_PATCH_VERSION_CHECK = 16 | ConfigBuilder("spark.dotnet.ignoreSparkPatchVersionCheck").booleanConf 17 | .createWithDefault(false) 18 | 19 | val ERROR_REDIRECITON_ENABLED = 20 | ConfigBuilder("spark.nonjvm.error.forwarding.enabled").booleanConf 21 | .createWithDefault(false) 22 | 23 | val ERROR_BUFFER_SIZE = 24 | ConfigBuilder("spark.nonjvm.error.buffer.size") 25 | .intConf 26 | .checkValue(_ >= 0, "The error buffer size must not be negative") 27 | .createWithDefault(10240) 28 | } 29 | -------------------------------------------------------------------------------- /src/csharp/Microsoft.Spark.E2ETest/IpcTests/JvmBridgeTests.cs: -------------------------------------------------------------------------------- 1 | // Licensed to the .NET Foundation under one or more agreements. 2 | // The .NET Foundation licenses this file to you under the MIT license. 3 | // See the LICENSE file in the project root for more information. 4 | 5 | using System; 6 | using Microsoft.Spark.Sql; 7 | using Xunit; 8 | 9 | namespace Microsoft.Spark.E2ETest.IpcTests 10 | { 11 | [Collection("Spark E2E Tests")] 12 | public class JvmBridgeTests 13 | { 14 | private readonly SparkSession _spark; 15 | 16 | public JvmBridgeTests(SparkFixture fixture) 17 | { 18 | _spark = fixture.Spark; 19 | } 20 | 21 | [Fact] 22 | public void TestInnerJvmException() 23 | { 24 | try 25 | { 26 | _spark.Sql("THROW!!!"); 27 | } 28 | catch (Exception ex) 29 | { 30 | Assert.NotNull(ex.InnerException); 31 | Assert.IsType(ex.InnerException); 32 | Assert.False(string.IsNullOrWhiteSpace(ex.InnerException.Message)); 33 | } 34 | } 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-0/src/test/scala/org/apache/spark/api/dotnet/DotnetBackendTest.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | 8 | package org.apache.spark.api.dotnet 9 | 10 | import org.junit.Assert._ 11 | import org.junit.{After, Before, Test} 12 | 13 | import java.net.InetAddress 14 | 15 | @Test 16 | class DotnetBackendTest { 17 | private var backend: DotnetBackend = _ 18 | 19 | @Before 20 | def before(): Unit = { 21 | backend = new DotnetBackend 22 | } 23 | 24 | @After 25 | def after(): Unit = { 26 | backend.close() 27 | } 28 | 29 | @Test 30 | def shouldNotResetCallbackClient(): Unit = { 31 | // Specifying port = 0 to select port dynamically. 32 | backend.setCallbackClient(InetAddress.getLoopbackAddress.toString, port = 0) 33 | 34 | assertTrue(backend.callbackClient.isDefined) 35 | assertThrows(classOf[Exception], () => { 36 | backend.setCallbackClient(InetAddress.getLoopbackAddress.toString, port = 0) 37 | }) 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-1/src/test/scala/org/apache/spark/api/dotnet/DotnetBackendTest.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | 8 | package org.apache.spark.api.dotnet 9 | 10 | import org.junit.Assert._ 11 | import org.junit.{After, Before, Test} 12 | 13 | import java.net.InetAddress 14 | 15 | @Test 16 | class DotnetBackendTest { 17 | private var backend: DotnetBackend = _ 18 | 19 | @Before 20 | def before(): Unit = { 21 | backend = new DotnetBackend 22 | } 23 | 24 | @After 25 | def after(): Unit = { 26 | backend.close() 27 | } 28 | 29 | @Test 30 | def shouldNotResetCallbackClient(): Unit = { 31 | // Specifying port = 0 to select port dynamically. 32 | backend.setCallbackClient(InetAddress.getLoopbackAddress.toString, port = 0) 33 | 34 | assertTrue(backend.callbackClient.isDefined) 35 | assertThrows(classOf[Exception], () => { 36 | backend.setCallbackClient(InetAddress.getLoopbackAddress.toString, port = 0) 37 | }) 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-2/src/test/scala/org/apache/spark/api/dotnet/DotnetBackendTest.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | 8 | package org.apache.spark.api.dotnet 9 | 10 | import org.junit.Assert._ 11 | import org.junit.{After, Before, Test} 12 | 13 | import java.net.InetAddress 14 | 15 | @Test 16 | class DotnetBackendTest { 17 | private var backend: DotnetBackend = _ 18 | 19 | @Before 20 | def before(): Unit = { 21 | backend = new DotnetBackend 22 | } 23 | 24 | @After 25 | def after(): Unit = { 26 | backend.close() 27 | } 28 | 29 | @Test 30 | def shouldNotResetCallbackClient(): Unit = { 31 | // Specifying port = 0 to select port dynamically. 32 | backend.setCallbackClient(InetAddress.getLoopbackAddress.toString, port = 0) 33 | 34 | assertTrue(backend.callbackClient.isDefined) 35 | assertThrows(classOf[Exception], () => { 36 | backend.setCallbackClient(InetAddress.getLoopbackAddress.toString, port = 0) 37 | }) 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-3/src/test/scala/org/apache/spark/api/dotnet/DotnetBackendTest.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | 8 | package org.apache.spark.api.dotnet 9 | 10 | import org.junit.Assert._ 11 | import org.junit.{After, Before, Test} 12 | 13 | import java.net.InetAddress 14 | 15 | @Test 16 | class DotnetBackendTest { 17 | private var backend: DotnetBackend = _ 18 | 19 | @Before 20 | def before(): Unit = { 21 | backend = new DotnetBackend 22 | } 23 | 24 | @After 25 | def after(): Unit = { 26 | backend.close() 27 | } 28 | 29 | @Test 30 | def shouldNotResetCallbackClient(): Unit = { 31 | // Specifying port = 0 to select port dynamically. 32 | backend.setCallbackClient(InetAddress.getLoopbackAddress.toString, port = 0) 33 | 34 | assertTrue(backend.callbackClient.isDefined) 35 | assertThrows(classOf[Exception], () => { 36 | backend.setCallbackClient(InetAddress.getLoopbackAddress.toString, port = 0) 37 | }) 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-4/src/test/scala/org/apache/spark/api/dotnet/DotnetBackendTest.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | 8 | package org.apache.spark.api.dotnet 9 | 10 | import org.junit.Assert._ 11 | import org.junit.{After, Before, Test} 12 | 13 | import java.net.InetAddress 14 | 15 | @Test 16 | class DotnetBackendTest { 17 | private var backend: DotnetBackend = _ 18 | 19 | @Before 20 | def before(): Unit = { 21 | backend = new DotnetBackend 22 | } 23 | 24 | @After 25 | def after(): Unit = { 26 | backend.close() 27 | } 28 | 29 | @Test 30 | def shouldNotResetCallbackClient(): Unit = { 31 | // Specifying port = 0 to select port dynamically. 32 | backend.setCallbackClient(InetAddress.getLoopbackAddress.toString, port = 0) 33 | 34 | assertTrue(backend.callbackClient.isDefined) 35 | assertThrows(classOf[Exception], () => { 36 | backend.setCallbackClient(InetAddress.getLoopbackAddress.toString, port = 0) 37 | }) 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-5/src/test/scala/org/apache/spark/api/dotnet/DotnetBackendTest.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | 8 | package org.apache.spark.api.dotnet 9 | 10 | import org.junit.Assert._ 11 | import org.junit.{After, Before, Test} 12 | 13 | import java.net.InetAddress 14 | 15 | @Test 16 | class DotnetBackendTest { 17 | private var backend: DotnetBackend = _ 18 | 19 | @Before 20 | def before(): Unit = { 21 | backend = new DotnetBackend 22 | } 23 | 24 | @After 25 | def after(): Unit = { 26 | backend.close() 27 | } 28 | 29 | @Test 30 | def shouldNotResetCallbackClient(): Unit = { 31 | // Specifying port = 0 to select port dynamically. 32 | backend.setCallbackClient(InetAddress.getLoopbackAddress.toString, port = 0) 33 | 34 | assertTrue(backend.callbackClient.isDefined) 35 | assertThrows(classOf[Exception], () => { 36 | backend.setCallbackClient(InetAddress.getLoopbackAddress.toString, port = 0) 37 | }) 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-2-4/src/main/scala/org/apache/spark/sql/test/TestUtils.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.sql.test 8 | 9 | import org.apache.spark.sql.SQLContext 10 | import org.apache.spark.sql.execution.streaming.MemoryStream 11 | 12 | object TestUtils { 13 | 14 | /** 15 | * Helper method to create typed MemoryStreams intended for use in unit tests. 16 | * @param sqlContext The SQLContext. 17 | * @param streamType The type of memory stream to create. This string is the `Name` 18 | * property of the dotnet type. 19 | * @return A typed MemoryStream. 20 | */ 21 | def createMemoryStream(implicit sqlContext: SQLContext, streamType: String): MemoryStream[_] = { 22 | import sqlContext.implicits._ 23 | 24 | streamType match { 25 | case "Int32" => MemoryStream[Int] 26 | case "String" => MemoryStream[String] 27 | case _ => throw new Exception(s"$streamType not supported") 28 | } 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-0/src/main/scala/org/apache/spark/sql/test/TestUtils.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.sql.test 8 | 9 | import org.apache.spark.sql.SQLContext 10 | import org.apache.spark.sql.execution.streaming.MemoryStream 11 | 12 | object TestUtils { 13 | 14 | /** 15 | * Helper method to create typed MemoryStreams intended for use in unit tests. 16 | * @param sqlContext The SQLContext. 17 | * @param streamType The type of memory stream to create. This string is the `Name` 18 | * property of the dotnet type. 19 | * @return A typed MemoryStream. 20 | */ 21 | def createMemoryStream(implicit sqlContext: SQLContext, streamType: String): MemoryStream[_] = { 22 | import sqlContext.implicits._ 23 | 24 | streamType match { 25 | case "Int32" => MemoryStream[Int] 26 | case "String" => MemoryStream[String] 27 | case _ => throw new Exception(s"$streamType not supported") 28 | } 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-1/src/main/scala/org/apache/spark/sql/test/TestUtils.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.sql.test 8 | 9 | import org.apache.spark.sql.SQLContext 10 | import org.apache.spark.sql.execution.streaming.MemoryStream 11 | 12 | object TestUtils { 13 | 14 | /** 15 | * Helper method to create typed MemoryStreams intended for use in unit tests. 16 | * @param sqlContext The SQLContext. 17 | * @param streamType The type of memory stream to create. This string is the `Name` 18 | * property of the dotnet type. 19 | * @return A typed MemoryStream. 20 | */ 21 | def createMemoryStream(implicit sqlContext: SQLContext, streamType: String): MemoryStream[_] = { 22 | import sqlContext.implicits._ 23 | 24 | streamType match { 25 | case "Int32" => MemoryStream[Int] 26 | case "String" => MemoryStream[String] 27 | case _ => throw new Exception(s"$streamType not supported") 28 | } 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-2/src/main/scala/org/apache/spark/sql/test/TestUtils.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.sql.test 8 | 9 | import org.apache.spark.sql.SQLContext 10 | import org.apache.spark.sql.execution.streaming.MemoryStream 11 | 12 | object TestUtils { 13 | 14 | /** 15 | * Helper method to create typed MemoryStreams intended for use in unit tests. 16 | * @param sqlContext The SQLContext. 17 | * @param streamType The type of memory stream to create. This string is the `Name` 18 | * property of the dotnet type. 19 | * @return A typed MemoryStream. 20 | */ 21 | def createMemoryStream(implicit sqlContext: SQLContext, streamType: String): MemoryStream[_] = { 22 | import sqlContext.implicits._ 23 | 24 | streamType match { 25 | case "Int32" => MemoryStream[Int] 26 | case "String" => MemoryStream[String] 27 | case _ => throw new Exception(s"$streamType not supported") 28 | } 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-3/src/main/scala/org/apache/spark/sql/test/TestUtils.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.sql.test 8 | 9 | import org.apache.spark.sql.SQLContext 10 | import org.apache.spark.sql.execution.streaming.MemoryStream 11 | 12 | object TestUtils { 13 | 14 | /** 15 | * Helper method to create typed MemoryStreams intended for use in unit tests. 16 | * @param sqlContext The SQLContext. 17 | * @param streamType The type of memory stream to create. This string is the `Name` 18 | * property of the dotnet type. 19 | * @return A typed MemoryStream. 20 | */ 21 | def createMemoryStream(implicit sqlContext: SQLContext, streamType: String): MemoryStream[_] = { 22 | import sqlContext.implicits._ 23 | 24 | streamType match { 25 | case "Int32" => MemoryStream[Int] 26 | case "String" => MemoryStream[String] 27 | case _ => throw new Exception(s"$streamType not supported") 28 | } 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-4/src/main/scala/org/apache/spark/sql/test/TestUtils.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.sql.test 8 | 9 | import org.apache.spark.sql.SQLContext 10 | import org.apache.spark.sql.execution.streaming.MemoryStream 11 | 12 | object TestUtils { 13 | 14 | /** 15 | * Helper method to create typed MemoryStreams intended for use in unit tests. 16 | * @param sqlContext The SQLContext. 17 | * @param streamType The type of memory stream to create. This string is the `Name` 18 | * property of the dotnet type. 19 | * @return A typed MemoryStream. 20 | */ 21 | def createMemoryStream(implicit sqlContext: SQLContext, streamType: String): MemoryStream[_] = { 22 | import sqlContext.implicits._ 23 | 24 | streamType match { 25 | case "Int32" => MemoryStream[Int] 26 | case "String" => MemoryStream[String] 27 | case _ => throw new Exception(s"$streamType not supported") 28 | } 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-5/src/main/scala/org/apache/spark/sql/test/TestUtils.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.sql.test 8 | 9 | import org.apache.spark.sql.SQLContext 10 | import org.apache.spark.sql.execution.streaming.MemoryStream 11 | 12 | object TestUtils { 13 | 14 | /** 15 | * Helper method to create typed MemoryStreams intended for use in unit tests. 16 | * @param sqlContext The SQLContext. 17 | * @param streamType The type of memory stream to create. This string is the `Name` 18 | * property of the dotnet type. 19 | * @return A typed MemoryStream. 20 | */ 21 | def createMemoryStream(implicit sqlContext: SQLContext, streamType: String): MemoryStream[_] = { 22 | import sqlContext.implicits._ 23 | 24 | streamType match { 25 | case "Int32" => MemoryStream[Int] 26 | case "String" => MemoryStream[String] 27 | case _ => throw new Exception(s"$streamType not supported") 28 | } 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /examples/Microsoft.Spark.FSharp.Examples/Microsoft.Spark.FSharp.Examples.fsproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Exe 5 | net48;net8.0 6 | net8.0 7 | Microsoft.Spark.Examples 8 | Microsoft.Spark.FSharp.Examples 9 | false 10 | 11 | 2003;$(NoWarn) 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) .NET Foundation and Contributors 4 | 5 | All rights reserved. 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the rights 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in all 15 | copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | SOFTWARE. 24 | -------------------------------------------------------------------------------- /src/csharp/Microsoft.Spark/Utils/EnvironmentUtils.cs: -------------------------------------------------------------------------------- 1 | // Licensed to the .NET Foundation under one or more agreements. 2 | // The .NET Foundation licenses this file to you under the MIT license. 3 | // See the LICENSE file in the project root for more information. 4 | 5 | using System; 6 | 7 | namespace Microsoft.Spark.Utils 8 | { 9 | /// 10 | /// Various environment utility methods. 11 | /// 12 | internal static class EnvironmentUtils 13 | { 14 | internal static bool GetEnvironmentVariableAsBool(string name) 15 | { 16 | string str = Environment.GetEnvironmentVariable(name); 17 | if (string.IsNullOrEmpty(str)) 18 | { 19 | return false; 20 | } 21 | 22 | switch (str.ToLowerInvariant()) 23 | { 24 | case "true": 25 | case "1": 26 | case "yes": 27 | return true; 28 | case "false": 29 | case "0": 30 | case "no": 31 | return false; 32 | default: 33 | return false; 34 | } 35 | } 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/csharp/Microsoft.Spark.UnitTest/CollectionUtilsTests.cs: -------------------------------------------------------------------------------- 1 | // Licensed to the .NET Foundation under one or more agreements. 2 | // The .NET Foundation licenses this file to you under the MIT license. 3 | // See the LICENSE file in the project root for more information. 4 | 5 | using Microsoft.Spark.Utils; 6 | using Xunit; 7 | 8 | namespace Microsoft.Spark.UnitTest 9 | { 10 | public class CollectionUtilsTests 11 | { 12 | [Fact] 13 | public void TestArrayEquals() 14 | { 15 | Assert.False(CollectionUtils.ArrayEquals(new int[] { 1 }, null)); 16 | Assert.False(CollectionUtils.ArrayEquals(null, new int[] { 1 })); 17 | Assert.False(CollectionUtils.ArrayEquals(new int[] { }, new int[] { 1 })); 18 | Assert.False(CollectionUtils.ArrayEquals(new int[] { 1 }, new int[] { })); 19 | Assert.False(CollectionUtils.ArrayEquals(new int[] { 1 }, new int[] { 1, 2 })); 20 | Assert.False(CollectionUtils.ArrayEquals(new int[] { 1 }, new int[] { 2 })); 21 | 22 | Assert.True(CollectionUtils.ArrayEquals(null, null)); 23 | Assert.True(CollectionUtils.ArrayEquals(new int[] { 1 }, new int[] { 1 })); 24 | } 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-2-4/src/main/scala/org/apache/spark/sql/api/dotnet/SQLUtils.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.sql.api.dotnet 8 | 9 | import java.util.{List => JList, Map => JMap} 10 | 11 | import org.apache.spark.api.python.{PythonAccumulatorV2, PythonBroadcast, PythonFunction} 12 | import org.apache.spark.broadcast.Broadcast 13 | 14 | object SQLUtils { 15 | 16 | /** 17 | * Exposes createPythonFunction to the .NET client to enable registering UDFs. 18 | */ 19 | def createPythonFunction( 20 | command: Array[Byte], 21 | envVars: JMap[String, String], 22 | pythonIncludes: JList[String], 23 | pythonExec: String, 24 | pythonVersion: String, 25 | broadcastVars: JList[Broadcast[PythonBroadcast]], 26 | accumulator: PythonAccumulatorV2): PythonFunction = { 27 | 28 | PythonFunction( 29 | command, 30 | envVars, 31 | pythonIncludes, 32 | pythonExec, 33 | pythonVersion, 34 | broadcastVars, 35 | accumulator) 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-0/src/main/scala/org/apache/spark/sql/api/dotnet/SQLUtils.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.sql.api.dotnet 8 | 9 | import java.util.{List => JList, Map => JMap} 10 | 11 | import org.apache.spark.api.python.{PythonAccumulatorV2, PythonBroadcast, PythonFunction} 12 | import org.apache.spark.broadcast.Broadcast 13 | 14 | object SQLUtils { 15 | 16 | /** 17 | * Exposes createPythonFunction to the .NET client to enable registering UDFs. 18 | */ 19 | def createPythonFunction( 20 | command: Array[Byte], 21 | envVars: JMap[String, String], 22 | pythonIncludes: JList[String], 23 | pythonExec: String, 24 | pythonVersion: String, 25 | broadcastVars: JList[Broadcast[PythonBroadcast]], 26 | accumulator: PythonAccumulatorV2): PythonFunction = { 27 | 28 | PythonFunction( 29 | command, 30 | envVars, 31 | pythonIncludes, 32 | pythonExec, 33 | pythonVersion, 34 | broadcastVars, 35 | accumulator) 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-1/src/main/scala/org/apache/spark/sql/api/dotnet/SQLUtils.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.sql.api.dotnet 8 | 9 | import java.util.{List => JList, Map => JMap} 10 | 11 | import org.apache.spark.api.python.{PythonAccumulatorV2, PythonBroadcast, PythonFunction} 12 | import org.apache.spark.broadcast.Broadcast 13 | 14 | object SQLUtils { 15 | 16 | /** 17 | * Exposes createPythonFunction to the .NET client to enable registering UDFs. 18 | */ 19 | def createPythonFunction( 20 | command: Array[Byte], 21 | envVars: JMap[String, String], 22 | pythonIncludes: JList[String], 23 | pythonExec: String, 24 | pythonVersion: String, 25 | broadcastVars: JList[Broadcast[PythonBroadcast]], 26 | accumulator: PythonAccumulatorV2): PythonFunction = { 27 | 28 | PythonFunction( 29 | command, 30 | envVars, 31 | pythonIncludes, 32 | pythonExec, 33 | pythonVersion, 34 | broadcastVars, 35 | accumulator) 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-2/src/main/scala/org/apache/spark/sql/api/dotnet/SQLUtils.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.sql.api.dotnet 8 | 9 | import java.util.{List => JList, Map => JMap} 10 | 11 | import org.apache.spark.api.python.{PythonAccumulatorV2, PythonBroadcast, PythonFunction} 12 | import org.apache.spark.broadcast.Broadcast 13 | 14 | object SQLUtils { 15 | 16 | /** 17 | * Exposes createPythonFunction to the .NET client to enable registering UDFs. 18 | */ 19 | def createPythonFunction( 20 | command: Array[Byte], 21 | envVars: JMap[String, String], 22 | pythonIncludes: JList[String], 23 | pythonExec: String, 24 | pythonVersion: String, 25 | broadcastVars: JList[Broadcast[PythonBroadcast]], 26 | accumulator: PythonAccumulatorV2): PythonFunction = { 27 | 28 | PythonFunction( 29 | command, 30 | envVars, 31 | pythonIncludes, 32 | pythonExec, 33 | pythonVersion, 34 | broadcastVars, 35 | accumulator) 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-3/src/main/scala/org/apache/spark/sql/api/dotnet/SQLUtils.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.sql.api.dotnet 8 | 9 | import java.util.{List => JList, Map => JMap} 10 | 11 | import org.apache.spark.api.python.{PythonAccumulatorV2, PythonBroadcast, PythonFunction} 12 | import org.apache.spark.broadcast.Broadcast 13 | 14 | object SQLUtils { 15 | 16 | /** 17 | * Exposes createPythonFunction to the .NET client to enable registering UDFs. 18 | */ 19 | def createPythonFunction( 20 | command: Array[Byte], 21 | envVars: JMap[String, String], 22 | pythonIncludes: JList[String], 23 | pythonExec: String, 24 | pythonVersion: String, 25 | broadcastVars: JList[Broadcast[PythonBroadcast]], 26 | accumulator: PythonAccumulatorV2): PythonFunction = { 27 | 28 | PythonFunction( 29 | command, 30 | envVars, 31 | pythonIncludes, 32 | pythonExec, 33 | pythonVersion, 34 | broadcastVars, 35 | accumulator) 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Param/ParamTests.cs: -------------------------------------------------------------------------------- 1 | // Licensed to the .NET Foundation under one or more agreements. 2 | // The .NET Foundation licenses this file to you under the MIT license. 3 | // See the LICENSE file in the project root for more information. 4 | 5 | using Microsoft.Spark.ML.Feature.Param; 6 | using Microsoft.Spark.Sql; 7 | using Xunit; 8 | 9 | namespace Microsoft.Spark.E2ETest.IpcTests.ML.ParamTests 10 | { 11 | [Collection("Spark E2E Tests")] 12 | public class ParamTests 13 | { 14 | private readonly SparkSession _spark; 15 | 16 | public ParamTests(SparkFixture fixture) 17 | { 18 | _spark = fixture.Spark; 19 | } 20 | 21 | [Fact] 22 | public void Test() 23 | { 24 | const string expectedParent = "parent"; 25 | const string expectedName = "name"; 26 | const string expectedDoc = "doc"; 27 | 28 | var param = new Param(expectedParent, expectedName, expectedDoc); 29 | 30 | Assert.Equal(expectedParent, param.Parent); 31 | Assert.Equal(expectedDoc, param.Doc); 32 | Assert.Equal(expectedName, param.Name); 33 | } 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/csharp/Microsoft.Spark.E2ETest/Utils/SQLUtils.cs: -------------------------------------------------------------------------------- 1 | // Licensed to the .NET Foundation under one or more agreements. 2 | // The .NET Foundation licenses this file to you under the MIT license. 3 | // See the LICENSE file in the project root for more information. 4 | 5 | using System; 6 | using System.Collections.Generic; 7 | using System.Linq; 8 | using Microsoft.Spark.Sql; 9 | 10 | namespace Microsoft.Spark.E2ETest.Utils 11 | { 12 | internal static class SQLUtils 13 | { 14 | /// 15 | /// Drops tables in after calling . 16 | /// 17 | /// The 18 | /// Names of the tables to drop 19 | /// to execute. 20 | public static void WithTable(SparkSession spark, IEnumerable tableNames, Action action) 21 | { 22 | try 23 | { 24 | action(); 25 | } 26 | finally 27 | { 28 | tableNames.ToList().ForEach(name => spark.Sql($"DROP TABLE IF EXISTS {name}")); 29 | } 30 | } 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /benchmark/csharp/Tpch/VectorDataFrameFunctions.cs: -------------------------------------------------------------------------------- 1 | // Licensed to the .NET Foundation under one or more agreements. 2 | // The .NET Foundation licenses this file to you under the MIT license. 3 | // See the LICENSE file in the project root for more information. 4 | 5 | using System; 6 | using Microsoft.Data.Analysis; 7 | 8 | namespace Tpch 9 | { 10 | internal static class VectorDataFrameFunctions 11 | { 12 | internal static DoubleDataFrameColumn ComputeTotal(DoubleDataFrameColumn price, DoubleDataFrameColumn discount, DoubleDataFrameColumn tax) 13 | { 14 | if ((price.Length != discount.Length) || (price.Length != tax.Length)) 15 | { 16 | throw new ArgumentException("Arrays need to be the same length"); 17 | } 18 | 19 | return price * (1 - discount) * (1 + tax); 20 | } 21 | 22 | internal static DoubleDataFrameColumn ComputeDiscountPrice(DoubleDataFrameColumn price, DoubleDataFrameColumn discount) 23 | { 24 | if (price.Length != discount.Length) 25 | { 26 | throw new ArgumentException("Arrays need to be the same length"); 27 | } 28 | 29 | return price * (1 - discount); 30 | } 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /benchmark/csharp/Tpch/TpchBase.cs: -------------------------------------------------------------------------------- 1 | // Licensed to the .NET Foundation under one or more agreements. 2 | // The .NET Foundation licenses this file to you under the MIT license. 3 | // See the LICENSE file in the project root for more information. 4 | 5 | using System.IO; 6 | using Microsoft.Spark.Sql; 7 | 8 | namespace Tpch 9 | { 10 | internal class TpchBase 11 | { 12 | protected readonly DataFrame _customer, _lineitem, _nation, _orders, 13 | _part, _partsupp, _region, _supplier; 14 | 15 | internal TpchBase(string tpchRoot, SparkSession spark) 16 | { 17 | // Load all the TPC-H tables. 18 | tpchRoot += Path.DirectorySeparatorChar; 19 | _customer = spark.Read().Parquet($"{tpchRoot}customer"); 20 | _lineitem = spark.Read().Parquet($"{tpchRoot}lineitem"); 21 | _nation = spark.Read().Parquet($"{tpchRoot}nation"); 22 | _orders = spark.Read().Parquet($"{tpchRoot}orders"); 23 | _part = spark.Read().Parquet($"{tpchRoot}part"); 24 | _partsupp = spark.Read().Parquet($"{tpchRoot}partsupp"); 25 | _region = spark.Read().Parquet($"{tpchRoot}region"); 26 | _supplier = spark.Read().Parquet($"{tpchRoot}supplier"); 27 | } 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-2-4/src/test/scala/org/apache/spark/api/dotnet/JVMObjectTrackerTest.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.api.dotnet 8 | 9 | import org.junit.Test 10 | 11 | @Test 12 | class JVMObjectTrackerTest { 13 | 14 | @Test 15 | def shouldReleaseAllReferences(): Unit = { 16 | val tracker = new JVMObjectTracker 17 | val firstId = tracker.put(new Object) 18 | val secondId = tracker.put(new Object) 19 | val thirdId = tracker.put(new Object) 20 | 21 | tracker.clear() 22 | 23 | assert(tracker.get(firstId).isEmpty) 24 | assert(tracker.get(secondId).isEmpty) 25 | assert(tracker.get(thirdId).isEmpty) 26 | } 27 | 28 | @Test 29 | def shouldResetCounter(): Unit = { 30 | val tracker = new JVMObjectTracker 31 | val firstId = tracker.put(new Object) 32 | val secondId = tracker.put(new Object) 33 | 34 | tracker.clear() 35 | 36 | val thirdId = tracker.put(new Object) 37 | 38 | assert(firstId.equals("1")) 39 | assert(secondId.equals("2")) 40 | assert(thirdId.equals("1")) 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-0/src/test/scala/org/apache/spark/api/dotnet/JVMObjectTrackerTest.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.api.dotnet 8 | 9 | import org.junit.Test 10 | 11 | @Test 12 | class JVMObjectTrackerTest { 13 | 14 | @Test 15 | def shouldReleaseAllReferences(): Unit = { 16 | val tracker = new JVMObjectTracker 17 | val firstId = tracker.put(new Object) 18 | val secondId = tracker.put(new Object) 19 | val thirdId = tracker.put(new Object) 20 | 21 | tracker.clear() 22 | 23 | assert(tracker.get(firstId).isEmpty) 24 | assert(tracker.get(secondId).isEmpty) 25 | assert(tracker.get(thirdId).isEmpty) 26 | } 27 | 28 | @Test 29 | def shouldResetCounter(): Unit = { 30 | val tracker = new JVMObjectTracker 31 | val firstId = tracker.put(new Object) 32 | val secondId = tracker.put(new Object) 33 | 34 | tracker.clear() 35 | 36 | val thirdId = tracker.put(new Object) 37 | 38 | assert(firstId.equals("1")) 39 | assert(secondId.equals("2")) 40 | assert(thirdId.equals("1")) 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-1/src/test/scala/org/apache/spark/api/dotnet/JVMObjectTrackerTest.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.api.dotnet 8 | 9 | import org.junit.Test 10 | 11 | @Test 12 | class JVMObjectTrackerTest { 13 | 14 | @Test 15 | def shouldReleaseAllReferences(): Unit = { 16 | val tracker = new JVMObjectTracker 17 | val firstId = tracker.put(new Object) 18 | val secondId = tracker.put(new Object) 19 | val thirdId = tracker.put(new Object) 20 | 21 | tracker.clear() 22 | 23 | assert(tracker.get(firstId).isEmpty) 24 | assert(tracker.get(secondId).isEmpty) 25 | assert(tracker.get(thirdId).isEmpty) 26 | } 27 | 28 | @Test 29 | def shouldResetCounter(): Unit = { 30 | val tracker = new JVMObjectTracker 31 | val firstId = tracker.put(new Object) 32 | val secondId = tracker.put(new Object) 33 | 34 | tracker.clear() 35 | 36 | val thirdId = tracker.put(new Object) 37 | 38 | assert(firstId.equals("1")) 39 | assert(secondId.equals("2")) 40 | assert(thirdId.equals("1")) 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-2/src/test/scala/org/apache/spark/api/dotnet/JVMObjectTrackerTest.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.api.dotnet 8 | 9 | import org.junit.Test 10 | 11 | @Test 12 | class JVMObjectTrackerTest { 13 | 14 | @Test 15 | def shouldReleaseAllReferences(): Unit = { 16 | val tracker = new JVMObjectTracker 17 | val firstId = tracker.put(new Object) 18 | val secondId = tracker.put(new Object) 19 | val thirdId = tracker.put(new Object) 20 | 21 | tracker.clear() 22 | 23 | assert(tracker.get(firstId).isEmpty) 24 | assert(tracker.get(secondId).isEmpty) 25 | assert(tracker.get(thirdId).isEmpty) 26 | } 27 | 28 | @Test 29 | def shouldResetCounter(): Unit = { 30 | val tracker = new JVMObjectTracker 31 | val firstId = tracker.put(new Object) 32 | val secondId = tracker.put(new Object) 33 | 34 | tracker.clear() 35 | 36 | val thirdId = tracker.put(new Object) 37 | 38 | assert(firstId.equals("1")) 39 | assert(secondId.equals("2")) 40 | assert(thirdId.equals("1")) 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-3/src/test/scala/org/apache/spark/api/dotnet/JVMObjectTrackerTest.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.api.dotnet 8 | 9 | import org.junit.Test 10 | 11 | @Test 12 | class JVMObjectTrackerTest { 13 | 14 | @Test 15 | def shouldReleaseAllReferences(): Unit = { 16 | val tracker = new JVMObjectTracker 17 | val firstId = tracker.put(new Object) 18 | val secondId = tracker.put(new Object) 19 | val thirdId = tracker.put(new Object) 20 | 21 | tracker.clear() 22 | 23 | assert(tracker.get(firstId).isEmpty) 24 | assert(tracker.get(secondId).isEmpty) 25 | assert(tracker.get(thirdId).isEmpty) 26 | } 27 | 28 | @Test 29 | def shouldResetCounter(): Unit = { 30 | val tracker = new JVMObjectTracker 31 | val firstId = tracker.put(new Object) 32 | val secondId = tracker.put(new Object) 33 | 34 | tracker.clear() 35 | 36 | val thirdId = tracker.put(new Object) 37 | 38 | assert(firstId.equals("1")) 39 | assert(secondId.equals("2")) 40 | assert(thirdId.equals("1")) 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-4/src/test/scala/org/apache/spark/api/dotnet/JVMObjectTrackerTest.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.api.dotnet 8 | 9 | import org.junit.Test 10 | 11 | @Test 12 | class JVMObjectTrackerTest { 13 | 14 | @Test 15 | def shouldReleaseAllReferences(): Unit = { 16 | val tracker = new JVMObjectTracker 17 | val firstId = tracker.put(new Object) 18 | val secondId = tracker.put(new Object) 19 | val thirdId = tracker.put(new Object) 20 | 21 | tracker.clear() 22 | 23 | assert(tracker.get(firstId).isEmpty) 24 | assert(tracker.get(secondId).isEmpty) 25 | assert(tracker.get(thirdId).isEmpty) 26 | } 27 | 28 | @Test 29 | def shouldResetCounter(): Unit = { 30 | val tracker = new JVMObjectTracker 31 | val firstId = tracker.put(new Object) 32 | val secondId = tracker.put(new Object) 33 | 34 | tracker.clear() 35 | 36 | val thirdId = tracker.put(new Object) 37 | 38 | assert(firstId.equals("1")) 39 | assert(secondId.equals("2")) 40 | assert(thirdId.equals("1")) 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-5/src/test/scala/org/apache/spark/api/dotnet/JVMObjectTrackerTest.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.api.dotnet 8 | 9 | import org.junit.Test 10 | 11 | @Test 12 | class JVMObjectTrackerTest { 13 | 14 | @Test 15 | def shouldReleaseAllReferences(): Unit = { 16 | val tracker = new JVMObjectTracker 17 | val firstId = tracker.put(new Object) 18 | val secondId = tracker.put(new Object) 19 | val thirdId = tracker.put(new Object) 20 | 21 | tracker.clear() 22 | 23 | assert(tracker.get(firstId).isEmpty) 24 | assert(tracker.get(secondId).isEmpty) 25 | assert(tracker.get(thirdId).isEmpty) 26 | } 27 | 28 | @Test 29 | def shouldResetCounter(): Unit = { 30 | val tracker = new JVMObjectTracker 31 | val firstId = tracker.put(new Object) 32 | val secondId = tracker.put(new Object) 33 | 34 | tracker.clear() 35 | 36 | val thirdId = tracker.put(new Object) 37 | 38 | assert(firstId.equals("1")) 39 | assert(secondId.equals("2")) 40 | assert(thirdId.equals("1")) 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/csharp/Microsoft.Spark.UnitTest/TestUtils/XunitConsoleOutHelper.cs: -------------------------------------------------------------------------------- 1 | // Licensed to the .NET Foundation under one or more agreements. 2 | // The .NET Foundation licenses this file to you under the MIT license. 3 | // See the LICENSE file in the project root for more information. 4 | 5 | using System; 6 | using System.IO; 7 | using Xunit.Abstractions; 8 | 9 | namespace Microsoft.Spark.UnitTest.TestUtils 10 | { 11 | // Tests can subclass this to get Console output to display when using 12 | // xUnit testing framework. 13 | // Workaround found at https://github.com/microsoft/vstest/issues/799 14 | public class XunitConsoleOutHelper : IDisposable 15 | { 16 | private readonly ITestOutputHelper _output; 17 | private readonly TextWriter _originalOut; 18 | private readonly TextWriter _textWriter; 19 | 20 | public XunitConsoleOutHelper(ITestOutputHelper output) 21 | { 22 | _output = output; 23 | _originalOut = Console.Out; 24 | _textWriter = new StringWriter(); 25 | Console.SetOut(_textWriter); 26 | } 27 | 28 | public void Dispose() 29 | { 30 | _output.WriteLine(_textWriter.ToString()); 31 | Console.SetOut(_originalOut); 32 | } 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/csharp/Microsoft.Spark/Interop/Ipc/ForeachBatchCallbackHandler.cs: -------------------------------------------------------------------------------- 1 | // Licensed to the .NET Foundation under one or more agreements. 2 | // The .NET Foundation licenses this file to you under the MIT license. 3 | // See the LICENSE file in the project root for more information. 4 | 5 | using System; 6 | using System.IO; 7 | using Microsoft.Spark.Sql; 8 | using Microsoft.Spark.Sql.Streaming; 9 | 10 | namespace Microsoft.Spark.Interop.Ipc 11 | { 12 | /// 13 | /// callback handler. 14 | /// 15 | internal sealed class ForeachBatchCallbackHandler : ICallbackHandler 16 | { 17 | private readonly IJvmBridge _jvm; 18 | 19 | private readonly Action _func; 20 | 21 | internal ForeachBatchCallbackHandler(IJvmBridge jvm, Action func) 22 | { 23 | _jvm = jvm; 24 | _func = func; 25 | } 26 | 27 | public void Run(Stream inputStream) 28 | { 29 | var batchDf = 30 | new DataFrame(new JvmObjectReference(SerDe.ReadString(inputStream), _jvm)); 31 | long batchId = SerDe.ReadInt64(inputStream); 32 | 33 | _func(batchDf, batchId); 34 | } 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /docs/release-notes/0.6/release-0.6.md: -------------------------------------------------------------------------------- 1 | # .NET for Apache Spark 0.6 Release Notes 2 | 3 | ### New Features and Improvements 4 | 5 | * Support for v0.4.0 `DeltaTable` APIs in [Delta Lake](https://github.com/delta-io/delta) ([#297](https://github.com/dotnet/spark/pull/297)) 6 | 7 | ### Breaking Changes 8 | * None 9 | 10 | ### Supported Spark Versions 11 | 12 | The following table outlines the supported Spark versions along with the microsoft-spark JAR to use with: 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 |
Spark Versionmicrosoft-spark JAR
2.3.*microsoft-spark-2.3.x-0.6.0.jar
2.4.0microsoft-spark-2.4.x-0.6.0.jar
2.4.1
2.4.3
2.4.4
2.4.2Not supported
45 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-2-4/src/test/scala/org/apache/spark/api/dotnet/DotnetBackendTest.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.api.dotnet 8 | 9 | import org.junit.Assert._ 10 | import org.junit.function.ThrowingRunnable 11 | import org.junit.{After, Before, Test} 12 | 13 | import java.net.InetAddress 14 | 15 | @Test 16 | class DotnetBackendTest { 17 | private var backend: DotnetBackend = _ 18 | 19 | @Before 20 | def before(): Unit = { 21 | backend = new DotnetBackend 22 | } 23 | 24 | @After 25 | def after(): Unit = { 26 | backend.close() 27 | } 28 | 29 | @Test 30 | def shouldNotResetCallbackClient(): Unit = { 31 | // Specifying port = 0 to select port dynamically. 32 | backend.setCallbackClient(InetAddress.getLoopbackAddress.toString, port = 0) 33 | 34 | assertTrue(backend.callbackClient.isDefined) 35 | assertThrows( 36 | classOf[Exception], 37 | new ThrowingRunnable { 38 | override def run(): Unit = { 39 | backend.setCallbackClient(InetAddress.getLoopbackAddress.toString, port = 0) 40 | } 41 | }) 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/csharp/Microsoft.Spark/Interop/Internal/Dotnet/ArrayExtensions.cs: -------------------------------------------------------------------------------- 1 | // Licensed to the .NET Foundation under one or more agreements. 2 | // The .NET Foundation licenses this file to you under the MIT license. 3 | // See the LICENSE file in the project root for more information. 4 | 5 | using Microsoft.Spark.Interop; 6 | using Microsoft.Spark.Interop.Internal.Java.Util; 7 | 8 | namespace System 9 | { 10 | /// 11 | /// ArrayExtensions host custom extension methods for the 12 | /// dotnet base class array T[]. 13 | /// 14 | public static class ArrayExtensions 15 | { 16 | /// 17 | /// A custom extension method that helps transform from dotnet 18 | /// array of type T to java.util.ArrayList. 19 | /// 20 | /// an array instance 21 | /// elements type of param array 22 | /// 23 | internal static ArrayList ToJavaArrayList(this T[] array) 24 | { 25 | var arrayList = new ArrayList(SparkEnvironment.JvmBridge); 26 | foreach (T item in array) 27 | { 28 | arrayList.Add(item); 29 | } 30 | return arrayList; 31 | } 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: "[BUG]: " 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | 15 | Steps to reproduce the behavior: 16 | 1. Go to '...' 17 | 2. Click on '....' 18 | 3. Scroll down to '....' 19 | 4. See error 20 | 21 | If you think the bug depends on external factors (e.g., dataset), please provide us with a minimal reproducible example that consists of the following items: 22 | 23 | - a minimal dataset, necessary to reproduce the error 24 | - the minimal runnable code necessary to reproduce the error, which can be run on the given dataset 25 | - the necessary information on any used packages, .NET runtime version, and system it is run on 26 | - in the case of random processes, a seed for reproducibility 27 | 28 | **Expected behavior** 29 | A clear and concise description of what you expected to happen. 30 | 31 | **Screenshots** 32 | If applicable, add screenshots to help explain your problem. 33 | 34 | **Desktop (please complete the following information):** 35 | - OS: [e.g. iOS] 36 | - Browser [e.g. chrome, safari] 37 | - Version [e.g. 22] 38 | 39 | **Additional context** 40 | Add any other context about the problem here. 41 | -------------------------------------------------------------------------------- /eng/AfterSolutionBuild.targets: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | <_PublishProject Include="$(RepoRoot)src\csharp\Microsoft.Spark.Worker\Microsoft.Spark.Worker.csproj" /> 5 | 6 | 7 | 8 | <_PublishTarget Include="FullFramework" Framework="net48" RuntimeId="win-x64" /> 9 | <_PublishTarget Include="WindowsCore" Framework="net8.0" RuntimeId="win-x64" /> 10 | <_PublishTarget Include="LinuxCore" Framework="net8.0" RuntimeId="linux-x64" /> 11 | <_PublishTarget Include="MacOSCore" Framework="net8.0" RuntimeId="osx-x64" /> 12 | 13 | 14 | 17 | 18 | 20 | 21 | 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /eng/common/templates/jobs/codeql-build.yml: -------------------------------------------------------------------------------- 1 | parameters: 2 | # See schema documentation in /Documentation/AzureDevOps/TemplateSchema.md 3 | continueOnError: false 4 | # Required: A collection of jobs to run - https://docs.microsoft.com/en-us/azure/devops/pipelines/yaml-schema?view=vsts&tabs=schema#job 5 | jobs: [] 6 | # Optional: if specified, restore and use this version of Guardian instead of the default. 7 | overrideGuardianVersion: '' 8 | 9 | jobs: 10 | - template: /eng/common/templates/jobs/jobs.yml 11 | parameters: 12 | enableMicrobuild: false 13 | enablePublishBuildArtifacts: false 14 | enablePublishTestResults: false 15 | enablePublishBuildAssets: false 16 | enablePublishUsingPipelines: false 17 | enableTelemetry: true 18 | 19 | variables: 20 | - group: Publish-Build-Assets 21 | # The Guardian version specified in 'eng/common/sdl/packages.config'. This value must be kept in 22 | # sync with the packages.config file. 23 | - name: DefaultGuardianVersion 24 | value: 0.109.0 25 | - name: GuardianPackagesConfigFile 26 | value: $(Build.SourcesDirectory)\eng\common\sdl\packages.config 27 | - name: GuardianVersion 28 | value: ${{ coalesce(parameters.overrideGuardianVersion, '$(DefaultGuardianVersion)') }} 29 | 30 | jobs: ${{ parameters.jobs }} 31 | 32 | -------------------------------------------------------------------------------- /eng/common/templates/steps/retain-build.yml: -------------------------------------------------------------------------------- 1 | parameters: 2 | # Optional azure devops PAT with build execute permissions for the build's organization, 3 | # only needed if the build that should be retained ran on a different organization than 4 | # the pipeline where this template is executing from 5 | Token: '' 6 | # Optional BuildId to retain, defaults to the current running build 7 | BuildId: '' 8 | # Azure devops Organization URI for the build in the https://dev.azure.com/ format. 9 | # Defaults to the organization the current pipeline is running on 10 | AzdoOrgUri: '$(System.CollectionUri)' 11 | # Azure devops project for the build. Defaults to the project the current pipeline is running on 12 | AzdoProject: '$(System.TeamProject)' 13 | 14 | steps: 15 | - task: powershell@2 16 | inputs: 17 | targetType: 'filePath' 18 | filePath: eng/common/retain-build.ps1 19 | pwsh: true 20 | arguments: > 21 | -AzdoOrgUri: ${{parameters.AzdoOrgUri}} 22 | -AzdoProject ${{parameters.AzdoProject}} 23 | -Token ${{coalesce(parameters.Token, '$env:SYSTEM_ACCESSTOKEN') }} 24 | -BuildId ${{coalesce(parameters.BuildId, '$env:BUILD_ID')}} 25 | displayName: Enable permanent build retention 26 | env: 27 | SYSTEM_ACCESSTOKEN: $(System.AccessToken) 28 | BUILD_ID: $(Build.BuildId) -------------------------------------------------------------------------------- /eng/common/templates-official/steps/retain-build.yml: -------------------------------------------------------------------------------- 1 | parameters: 2 | # Optional azure devops PAT with build execute permissions for the build's organization, 3 | # only needed if the build that should be retained ran on a different organization than 4 | # the pipeline where this template is executing from 5 | Token: '' 6 | # Optional BuildId to retain, defaults to the current running build 7 | BuildId: '' 8 | # Azure devops Organization URI for the build in the https://dev.azure.com/ format. 9 | # Defaults to the organization the current pipeline is running on 10 | AzdoOrgUri: '$(System.CollectionUri)' 11 | # Azure devops project for the build. Defaults to the project the current pipeline is running on 12 | AzdoProject: '$(System.TeamProject)' 13 | 14 | steps: 15 | - task: powershell@2 16 | inputs: 17 | targetType: 'filePath' 18 | filePath: eng/common/retain-build.ps1 19 | pwsh: true 20 | arguments: > 21 | -AzdoOrgUri: ${{parameters.AzdoOrgUri}} 22 | -AzdoProject ${{parameters.AzdoProject}} 23 | -Token ${{coalesce(parameters.Token, '$env:SYSTEM_ACCESSTOKEN') }} 24 | -BuildId ${{coalesce(parameters.BuildId, '$env:BUILD_ID')}} 25 | displayName: Enable permanent build retention 26 | env: 27 | SYSTEM_ACCESSTOKEN: $(System.AccessToken) 28 | BUILD_ID: $(Build.BuildId) -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-0/src/main/scala/org/apache/spark/sql/api/dotnet/DotnetForeachBatch.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.sql.api.dotnet 8 | 9 | import org.apache.spark.api.dotnet.CallbackClient 10 | import org.apache.spark.internal.Logging 11 | import org.apache.spark.sql.{DataFrame, Row} 12 | import org.apache.spark.sql.streaming.DataStreamWriter 13 | 14 | class DotnetForeachBatchFunction(callbackClient: CallbackClient, callbackId: Int) extends Logging { 15 | def call(batchDF: DataFrame, batchId: Long): Unit = 16 | callbackClient.send( 17 | callbackId, 18 | (dos, serDe) => { 19 | serDe.writeJObj(dos, batchDF) 20 | serDe.writeLong(dos, batchId) 21 | }) 22 | } 23 | 24 | object DotnetForeachBatchHelper { 25 | def callForeachBatch(client: Option[CallbackClient], dsw: DataStreamWriter[Row], callbackId: Int): Unit = { 26 | val dotnetForeachFunc = client match { 27 | case Some(value) => new DotnetForeachBatchFunction(value, callbackId) 28 | case None => throw new Exception("CallbackClient is null.") 29 | } 30 | 31 | dsw.foreachBatch(dotnetForeachFunc.call _) 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-1/src/main/scala/org/apache/spark/sql/api/dotnet/DotnetForeachBatch.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.sql.api.dotnet 8 | 9 | import org.apache.spark.api.dotnet.CallbackClient 10 | import org.apache.spark.internal.Logging 11 | import org.apache.spark.sql.{DataFrame, Row} 12 | import org.apache.spark.sql.streaming.DataStreamWriter 13 | 14 | class DotnetForeachBatchFunction(callbackClient: CallbackClient, callbackId: Int) extends Logging { 15 | def call(batchDF: DataFrame, batchId: Long): Unit = 16 | callbackClient.send( 17 | callbackId, 18 | (dos, serDe) => { 19 | serDe.writeJObj(dos, batchDF) 20 | serDe.writeLong(dos, batchId) 21 | }) 22 | } 23 | 24 | object DotnetForeachBatchHelper { 25 | def callForeachBatch(client: Option[CallbackClient], dsw: DataStreamWriter[Row], callbackId: Int): Unit = { 26 | val dotnetForeachFunc = client match { 27 | case Some(value) => new DotnetForeachBatchFunction(value, callbackId) 28 | case None => throw new Exception("CallbackClient is null.") 29 | } 30 | 31 | dsw.foreachBatch(dotnetForeachFunc.call _) 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-2/src/main/scala/org/apache/spark/sql/api/dotnet/DotnetForeachBatch.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.sql.api.dotnet 8 | 9 | import org.apache.spark.api.dotnet.CallbackClient 10 | import org.apache.spark.internal.Logging 11 | import org.apache.spark.sql.{DataFrame, Row} 12 | import org.apache.spark.sql.streaming.DataStreamWriter 13 | 14 | class DotnetForeachBatchFunction(callbackClient: CallbackClient, callbackId: Int) extends Logging { 15 | def call(batchDF: DataFrame, batchId: Long): Unit = 16 | callbackClient.send( 17 | callbackId, 18 | (dos, serDe) => { 19 | serDe.writeJObj(dos, batchDF) 20 | serDe.writeLong(dos, batchId) 21 | }) 22 | } 23 | 24 | object DotnetForeachBatchHelper { 25 | def callForeachBatch(client: Option[CallbackClient], dsw: DataStreamWriter[Row], callbackId: Int): Unit = { 26 | val dotnetForeachFunc = client match { 27 | case Some(value) => new DotnetForeachBatchFunction(value, callbackId) 28 | case None => throw new Exception("CallbackClient is null.") 29 | } 30 | 31 | dsw.foreachBatch(dotnetForeachFunc.call _) 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/scala/microsoft-spark-3-3/src/main/scala/org/apache/spark/sql/api/dotnet/DotnetForeachBatch.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the .NET Foundation under one or more agreements. 3 | * The .NET Foundation licenses this file to you under the MIT license. 4 | * See the LICENSE file in the project root for more information. 5 | */ 6 | 7 | package org.apache.spark.sql.api.dotnet 8 | 9 | import org.apache.spark.api.dotnet.CallbackClient 10 | import org.apache.spark.internal.Logging 11 | import org.apache.spark.sql.{DataFrame, Row} 12 | import org.apache.spark.sql.streaming.DataStreamWriter 13 | 14 | class DotnetForeachBatchFunction(callbackClient: CallbackClient, callbackId: Int) extends Logging { 15 | def call(batchDF: DataFrame, batchId: Long): Unit = 16 | callbackClient.send( 17 | callbackId, 18 | (dos, serDe) => { 19 | serDe.writeJObj(dos, batchDF) 20 | serDe.writeLong(dos, batchId) 21 | }) 22 | } 23 | 24 | object DotnetForeachBatchHelper { 25 | def callForeachBatch(client: Option[CallbackClient], dsw: DataStreamWriter[Row], callbackId: Int): Unit = { 26 | val dotnetForeachFunc = client match { 27 | case Some(value) => new DotnetForeachBatchFunction(value, callbackId) 28 | case None => throw new Exception("CallbackClient is null.") 29 | } 30 | 31 | dsw.foreachBatch(dotnetForeachFunc.call _) 32 | } 33 | } 34 | --------------------------------------------------------------------------------