├── tests ├── integration │ ├── requirements.txt │ ├── spark-apps │ │ ├── wheeljobs │ │ │ └── abfssInAbfssOut │ │ │ │ ├── abfssintest │ │ │ │ ├── __init__.py │ │ │ │ └── main.py │ │ │ │ └── setup.py │ │ ├── jarjobs │ │ │ ├── abfssInAbfssOut │ │ │ │ ├── gradle │ │ │ │ │ └── wrapper │ │ │ │ │ │ ├── gradle-wrapper.jar │ │ │ │ │ │ └── gradle-wrapper.properties │ │ │ │ ├── settings.gradle │ │ │ │ └── app │ │ │ │ │ ├── src │ │ │ │ │ ├── test │ │ │ │ │ │ └── java │ │ │ │ │ │ │ └── SparkApp │ │ │ │ │ │ │ └── Basic │ │ │ │ │ │ │ └── AppTest.java │ │ │ │ │ └── main │ │ │ │ │ │ └── java │ │ │ │ │ │ └── SparkApp │ │ │ │ │ │ └── Basic │ │ │ │ │ │ └── App.java │ │ │ │ │ └── build.gradle │ │ │ └── readSampleWriteSample │ │ │ │ ├── gradle │ │ │ │ └── wrapper │ │ │ │ │ ├── gradle-wrapper.jar │ │ │ │ │ └── gradle-wrapper.properties │ │ │ │ ├── settings.gradle │ │ │ │ └── app │ │ │ │ ├── src │ │ │ │ ├── test │ │ │ │ │ └── java │ │ │ │ │ │ └── SparkApp │ │ │ │ │ │ └── ReadWrite │ │ │ │ │ │ └── AppTest.java │ │ │ │ └── main │ │ │ │ │ └── java │ │ │ │ │ └── SparkApp │ │ │ │ │ └── ReadWrite │ │ │ │ │ └── App.java │ │ │ │ └── build.gradle │ │ ├── notebooks │ │ │ ├── nested-parent.scala │ │ │ ├── hive-in-hive-out-insert.py │ │ │ ├── hive+mgd+not+default-in-hive+mgd+not+default-out-insert.py │ │ │ ├── hive+mnt-in-hive+mnt-out-insert.py │ │ │ ├── abfss-in-hive+saveAsTable-out.scala │ │ │ ├── postgres-in-postgres-out.py │ │ │ ├── name-with-periods.scala │ │ │ ├── mysql-in-mysql-out.py │ │ │ ├── hive+abfss-in-hive+abfss-out-insert.py │ │ │ ├── wasbs-in-wasbs-out.scala │ │ │ ├── mnt-in-mnt-out.scala │ │ │ ├── nested-child.scala │ │ │ ├── delta-in-delta-out-abfss.scala │ │ │ ├── abfss-in-abfss-out-root.scala │ │ │ ├── abfss-in-abfss-out.scala │ │ │ ├── call-via-adf-spark2.scala │ │ │ ├── call-via-adf-spark3.scala │ │ │ ├── delta-in-delta-out-mnt.scala │ │ │ ├── delta-in-delta-out-fs.scala │ │ │ ├── abfss-in-hive+notmgd+saveAsTable-out.scala │ │ │ ├── populate-data-kusto.scala │ │ │ ├── spark-sql-table-in-abfss-out.scala │ │ │ ├── kusto-in-wasbs-out.scala │ │ │ ├── abfss-in-abfss-out-oauth.scala │ │ │ └── intermix-languages.scala │ │ ├── pythonscript │ │ │ ├── pythonscript.py │ │ │ └── pythonscript.json │ │ ├── sparksubmit │ │ │ └── sparksubmit.json │ │ └── README.md │ ├── jobdefs-inactive │ │ ├── sparksubmit-test-expectations.json │ │ ├── spark2-tests-expectations.json │ │ ├── spark2-tests-def.json │ │ └── sparksubmit-test-def.json │ ├── jobdefs │ │ ├── jarjob-test-expectations.json │ │ ├── pythonwheel-test-expectations.json │ │ ├── pythonscript-test-expectations.json │ │ ├── pythonwheel-test-def.json │ │ ├── jarjob-test-def.json │ │ ├── pythonscript-test-def.json │ │ └── hive3-tests-expectations.json │ ├── search.py │ └── README.md ├── environment │ ├── datasets │ │ ├── azsql.sql │ │ └── sqlpool.sql │ ├── requirements.txt │ ├── sources │ │ ├── sql.bicep │ │ ├── sqlserver.bicep │ │ ├── adx.bicep │ │ ├── adlsg2.bicep │ │ ├── synapse.bicep │ │ ├── mysql.bicep │ │ └── postgres.bicep │ └── dbfs │ │ ├── mounts.py │ │ └── create-job.py └── deployment │ └── test_arm_mapping_matches_json.py ├── function-app └── adb-to-purview │ ├── omnisharp.json │ ├── tests │ ├── unit-tests │ │ ├── settings.json │ │ ├── Function.Domain │ │ │ ├── Helpers │ │ │ │ ├── Common │ │ │ │ │ └── AutoMoqDataAttribute.cs │ │ │ │ └── Parser │ │ │ │ │ └── UnitTestData.cs │ │ │ └── Services │ │ │ │ └── OlFilterTests.cs │ │ └── unit-tests.csproj │ └── tools │ │ ├── QualifiedNameConfigTester │ │ ├── configReadMe.txt │ │ └── QualifiedNameConfigTester.csproj │ │ ├── localsettingsdutils-del.py │ │ └── Dev_Utilities.py │ ├── .vscode │ ├── extensions.json │ ├── settings.json │ ├── launch.json │ └── tasks.json │ └── src │ ├── Function.Domain │ ├── Models │ │ └── Parser │ │ │ ├── OpenLineage │ │ │ ├── Job.cs │ │ │ ├── Run.cs │ │ │ ├── IInputsOutputs.cs │ │ │ ├── LogicalPlan.cs │ │ │ ├── MountPoint.cs │ │ │ ├── Inputs.cs │ │ │ ├── EnvironmentPropsParent.cs │ │ │ ├── SparkVer.cs │ │ │ ├── Outputs.cs │ │ │ ├── Facets.cs │ │ │ ├── Event.cs │ │ │ ├── EnrichedEvent.cs │ │ │ ├── OutputFacets.cs │ │ │ ├── Project.cs │ │ │ ├── Plan.cs │ │ │ ├── ColumnLineageFacets.cs │ │ │ └── EnvironmentProps.cs │ │ │ ├── Settings │ │ │ ├── OlToPurviewColumnMapping.cs │ │ │ ├── ColParserSettings.cs │ │ │ ├── ParserSettings.cs │ │ │ ├── OlTableEntity.cs │ │ │ ├── PurviewColumnMapping.cs │ │ │ ├── ParserCondition.cs │ │ │ └── NewParserSettings.cs │ │ │ ├── Adb │ │ │ ├── ClusterInstance.cs │ │ │ ├── JobType.cs │ │ │ ├── SparkPythonTask.cs │ │ │ ├── NotebookTask.cs │ │ │ ├── SparkJarTask.cs │ │ │ ├── PythonWheelTask.cs │ │ │ └── AdbRoot.cs │ │ │ └── Purview │ │ │ ├── RelationshipAttribute.cs │ │ │ ├── uniqueAttributes.cs │ │ │ ├── IDatabricksTask.cs │ │ │ ├── DatabricksTaskRelationshipAttributes.cs │ │ │ ├── DatabricksJobRelationshipAttributes.cs │ │ │ ├── DatabricksProcessRelationshipAttributes.cs │ │ │ ├── DatabricksNotebookRelationshipAttributes.cs │ │ │ ├── BaseAttributes.cs │ │ │ ├── InputOutput.cs │ │ │ ├── AtlasEntityWithExtInfo.cs │ │ │ ├── DatabricksNotebookTaskRelationshipAttributes.cs │ │ │ ├── DatabricksPythonTaskAttributes.cs │ │ │ ├── IDatabricksJobTaskAttributes.cs │ │ │ ├── DatabricksNotebookTaskAttributes.cs │ │ │ ├── DatabricksJob.cs │ │ │ ├── DatabricksJobAttributes.cs │ │ │ ├── DatabricksWorkspace.cs │ │ │ ├── PurviewIdentifier.cs │ │ │ ├── DatabricksNotebook.cs │ │ │ ├── DatabricksSparkJarTask.cs │ │ │ ├── DatabricksPythonTask.cs │ │ │ ├── DatabricksNotebookAttributes.cs │ │ │ ├── DatabricksSparkJarTaskAttributes.cs │ │ │ ├── DatabricksPythonWheelTask.cs │ │ │ ├── DatabricksNotebookTask.cs │ │ │ ├── DatabricksPythonWheelTaskAttributes.cs │ │ │ ├── DatabricksJobTaskAtrributes.cs │ │ │ ├── DatabricksProcess.cs │ │ │ ├── DatabricksProcessAttributes.cs │ │ │ ├── Asset.cs │ │ │ └── ColumnLevelAttributes.cs │ ├── Services │ │ ├── IOlFilter.cs │ │ ├── IOlConsolodateEnrich.cs │ │ ├── IOlToPurviewParsingService.cs │ │ └── IPurviewIngestion.cs │ ├── Helpers │ │ ├── parser │ │ │ ├── IQnParser.cs │ │ │ ├── IColParser.cs │ │ │ ├── IDatabricksToPurviewParser.cs │ │ │ └── Exceptions.cs │ │ ├── OlProcessing │ │ │ ├── IValidateOlEvent.cs │ │ │ ├── IOlMessageEnrichment.cs │ │ │ └── IOlMessageConsolodation.cs │ │ ├── IHttpHelper.cs │ │ ├── HttpHelper.cs │ │ └── IPurviewClientHelper.cs │ ├── Providers │ │ ├── IAdbClientProvider.cs │ │ └── IHttpClientProvider.cs │ └── Constants │ │ └── Constants.cs │ ├── host.json │ ├── Program.cs │ └── adb-to-purview.csproj ├── .vscode └── settings.json ├── assets ├── img │ ├── deploy │ │ ├── ShellIcon.png │ │ ├── SelectBash.png │ │ ├── EditSettings.png │ │ ├── FunctionKeys.png │ │ ├── GetAdbEndpoint.png │ │ ├── UploadFilesPs.png │ │ ├── SelectPowerShell.png │ │ ├── UploadFilesBash.png │ │ ├── CloudShellConfirm.png │ │ └── BashCloudShellConfirm.png │ ├── readme │ │ ├── ShellIcon.png │ │ ├── lineage.png │ │ ├── EAE_Header.png │ │ ├── SelectBash.png │ │ ├── spark_plan.png │ │ ├── UploadFilesPs.png │ │ ├── browse_assets.png │ │ ├── lineage_view.png │ │ ├── SelectPowerShell.png │ │ ├── UploadFilesBash.png │ │ ├── CloudShellConfirm.png │ │ ├── FunctionAppConfig.png │ │ ├── BashCloudShellConfirm.png │ │ ├── databricks_task_related.png │ │ └── updatePurviewAccountName.png │ ├── estimator │ │ └── spark-jobs-in-notebook.png │ ├── extend-source │ │ ├── oltopurviewmappings.png │ │ └── QualifiedName.svg │ └── private-endpoint │ │ └── privateendpoint.png └── estimator │ ├── cost-estimator.xlsx │ └── README.md ├── deployment ├── infra │ ├── exampleInputB.csv │ ├── exampleInputA.csv │ ├── settings.sh │ ├── abfss-in-abfss-out-olsample.scala │ └── Custom_Types.json └── util │ ├── mappings-remove-spaces.py │ ├── README.md │ └── mappings-update-arm.py ├── adb-to-purview.code-workspace ├── docs ├── mappings │ ├── snowflake.json │ ├── adlsg1.json │ ├── README.md │ └── az-sql.json ├── README.md ├── powershell-alternatives.md └── release-checklist.md ├── CODE_OF_CONDUCT.md ├── .github └── ISSUE_TEMPLATE │ ├── feature_request.md │ └── bug_report.md ├── LICENSE.txt ├── PRIVACY.md └── CONTRIBUTING.md /tests/integration/requirements.txt: -------------------------------------------------------------------------------- 1 | pyapacheatlas==0.12.0 2 | -------------------------------------------------------------------------------- /tests/integration/spark-apps/wheeljobs/abfssInAbfssOut/abfssintest/__init__.py: -------------------------------------------------------------------------------- 1 | from .main import runapp -------------------------------------------------------------------------------- /tests/integration/jobdefs-inactive/sparksubmit-test-expectations.json: -------------------------------------------------------------------------------- 1 | [ 2 | "SparkApp.ReadWrite.App" 3 | ] -------------------------------------------------------------------------------- /function-app/adb-to-purview/omnisharp.json: -------------------------------------------------------------------------------- 1 | { 2 | "msbuild": { 3 | "useBundledOnly": true 4 | } 5 | } -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "azureFunctions.projectSubpath": "function-app\\adb-to-purview\\src\\bin\\Debug\\net6.0" 3 | } -------------------------------------------------------------------------------- /assets/img/deploy/ShellIcon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Purview-ADB-Lineage-Solution-Accelerator/HEAD/assets/img/deploy/ShellIcon.png -------------------------------------------------------------------------------- /assets/img/readme/ShellIcon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Purview-ADB-Lineage-Solution-Accelerator/HEAD/assets/img/readme/ShellIcon.png -------------------------------------------------------------------------------- /assets/img/readme/lineage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Purview-ADB-Lineage-Solution-Accelerator/HEAD/assets/img/readme/lineage.png -------------------------------------------------------------------------------- /assets/img/deploy/SelectBash.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Purview-ADB-Lineage-Solution-Accelerator/HEAD/assets/img/deploy/SelectBash.png -------------------------------------------------------------------------------- /assets/img/readme/EAE_Header.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Purview-ADB-Lineage-Solution-Accelerator/HEAD/assets/img/readme/EAE_Header.png -------------------------------------------------------------------------------- /assets/img/readme/SelectBash.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Purview-ADB-Lineage-Solution-Accelerator/HEAD/assets/img/readme/SelectBash.png -------------------------------------------------------------------------------- /assets/img/readme/spark_plan.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Purview-ADB-Lineage-Solution-Accelerator/HEAD/assets/img/readme/spark_plan.png -------------------------------------------------------------------------------- /function-app/adb-to-purview/tests/unit-tests/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "dotnet-test-explorer.testProjectPath": "**/*tests.@(csproj|vbproj|fsproj)" 3 | } -------------------------------------------------------------------------------- /assets/estimator/cost-estimator.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Purview-ADB-Lineage-Solution-Accelerator/HEAD/assets/estimator/cost-estimator.xlsx -------------------------------------------------------------------------------- /assets/img/deploy/EditSettings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Purview-ADB-Lineage-Solution-Accelerator/HEAD/assets/img/deploy/EditSettings.png -------------------------------------------------------------------------------- /assets/img/deploy/FunctionKeys.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Purview-ADB-Lineage-Solution-Accelerator/HEAD/assets/img/deploy/FunctionKeys.png -------------------------------------------------------------------------------- /assets/img/deploy/GetAdbEndpoint.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Purview-ADB-Lineage-Solution-Accelerator/HEAD/assets/img/deploy/GetAdbEndpoint.png -------------------------------------------------------------------------------- /assets/img/deploy/UploadFilesPs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Purview-ADB-Lineage-Solution-Accelerator/HEAD/assets/img/deploy/UploadFilesPs.png -------------------------------------------------------------------------------- /assets/img/readme/UploadFilesPs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Purview-ADB-Lineage-Solution-Accelerator/HEAD/assets/img/readme/UploadFilesPs.png -------------------------------------------------------------------------------- /assets/img/readme/browse_assets.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Purview-ADB-Lineage-Solution-Accelerator/HEAD/assets/img/readme/browse_assets.png -------------------------------------------------------------------------------- /assets/img/readme/lineage_view.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Purview-ADB-Lineage-Solution-Accelerator/HEAD/assets/img/readme/lineage_view.png -------------------------------------------------------------------------------- /assets/img/deploy/SelectPowerShell.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Purview-ADB-Lineage-Solution-Accelerator/HEAD/assets/img/deploy/SelectPowerShell.png -------------------------------------------------------------------------------- /assets/img/deploy/UploadFilesBash.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Purview-ADB-Lineage-Solution-Accelerator/HEAD/assets/img/deploy/UploadFilesBash.png -------------------------------------------------------------------------------- /assets/img/readme/SelectPowerShell.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Purview-ADB-Lineage-Solution-Accelerator/HEAD/assets/img/readme/SelectPowerShell.png -------------------------------------------------------------------------------- /assets/img/readme/UploadFilesBash.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Purview-ADB-Lineage-Solution-Accelerator/HEAD/assets/img/readme/UploadFilesBash.png -------------------------------------------------------------------------------- /assets/img/deploy/CloudShellConfirm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Purview-ADB-Lineage-Solution-Accelerator/HEAD/assets/img/deploy/CloudShellConfirm.png -------------------------------------------------------------------------------- /assets/img/readme/CloudShellConfirm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Purview-ADB-Lineage-Solution-Accelerator/HEAD/assets/img/readme/CloudShellConfirm.png -------------------------------------------------------------------------------- /assets/img/readme/FunctionAppConfig.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Purview-ADB-Lineage-Solution-Accelerator/HEAD/assets/img/readme/FunctionAppConfig.png -------------------------------------------------------------------------------- /assets/img/deploy/BashCloudShellConfirm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Purview-ADB-Lineage-Solution-Accelerator/HEAD/assets/img/deploy/BashCloudShellConfirm.png -------------------------------------------------------------------------------- /assets/img/readme/BashCloudShellConfirm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Purview-ADB-Lineage-Solution-Accelerator/HEAD/assets/img/readme/BashCloudShellConfirm.png -------------------------------------------------------------------------------- /assets/img/readme/databricks_task_related.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Purview-ADB-Lineage-Solution-Accelerator/HEAD/assets/img/readme/databricks_task_related.png -------------------------------------------------------------------------------- /assets/img/readme/updatePurviewAccountName.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Purview-ADB-Lineage-Solution-Accelerator/HEAD/assets/img/readme/updatePurviewAccountName.png -------------------------------------------------------------------------------- /assets/img/estimator/spark-jobs-in-notebook.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Purview-ADB-Lineage-Solution-Accelerator/HEAD/assets/img/estimator/spark-jobs-in-notebook.png -------------------------------------------------------------------------------- /assets/img/extend-source/oltopurviewmappings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Purview-ADB-Lineage-Solution-Accelerator/HEAD/assets/img/extend-source/oltopurviewmappings.png -------------------------------------------------------------------------------- /assets/img/private-endpoint/privateendpoint.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Purview-ADB-Lineage-Solution-Accelerator/HEAD/assets/img/private-endpoint/privateendpoint.png -------------------------------------------------------------------------------- /deployment/infra/exampleInputB.csv: -------------------------------------------------------------------------------- 1 | "id","city","stateAbbreviation" 2 | 1,"Seattle","WA" 3 | 2,"Chicago","IL" 4 | 3,"Atlanta","GA" 5 | 4,"New York City","NY" 6 | 5,"Boston","MA" 7 | -------------------------------------------------------------------------------- /function-app/adb-to-purview/tests/tools/QualifiedNameConfigTester/configReadMe.txt: -------------------------------------------------------------------------------- 1 | { "prefix", "nameSpcConParts", "nameSpcBodyParts", "nameSpcNameVals", "nameGroups" } 2 | 3 | comparison '=','!=','>','<','contains' 4 | -------------------------------------------------------------------------------- /deployment/infra/exampleInputA.csv: -------------------------------------------------------------------------------- 1 | "id","postalCode","streetAddress" 2 | 1,"11111","123 Fake St" 3 | 2,"55555","456 Not Real Ave" 4 | 3,"44444","789 Imaginary Pl" 5 | 4,"33333","1011 Fairy Tale Ln" 6 | 5,"22222","1213 No Such Pl" 7 | -------------------------------------------------------------------------------- /function-app/adb-to-purview/.vscode/extensions.json: -------------------------------------------------------------------------------- 1 | { 2 | "recommendations": [ 3 | "ms-azuretools.vscode-azurefunctions", 4 | "ms-dotnettools.csharp", 5 | "formulahendry.dotnet-test-explorer" 6 | ] 7 | } 8 | -------------------------------------------------------------------------------- /tests/integration/spark-apps/jarjobs/abfssInAbfssOut/gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Purview-ADB-Lineage-Solution-Accelerator/HEAD/tests/integration/spark-apps/jarjobs/abfssInAbfssOut/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /tests/integration/spark-apps/jarjobs/readSampleWriteSample/gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Purview-ADB-Lineage-Solution-Accelerator/HEAD/tests/integration/spark-apps/jarjobs/readSampleWriteSample/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /tests/integration/spark-apps/jarjobs/abfssInAbfssOut/gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionBase=GRADLE_USER_HOME 2 | distributionPath=wrapper/dists 3 | distributionUrl=https\://services.gradle.org/distributions/gradle-6.9.1-bin.zip 4 | zipStoreBase=GRADLE_USER_HOME 5 | zipStorePath=wrapper/dists 6 | -------------------------------------------------------------------------------- /tests/integration/spark-apps/jarjobs/readSampleWriteSample/gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionBase=GRADLE_USER_HOME 2 | distributionPath=wrapper/dists 3 | distributionUrl=https\://services.gradle.org/distributions/gradle-6.9.1-bin.zip 4 | zipStoreBase=GRADLE_USER_HOME 5 | zipStorePath=wrapper/dists 6 | -------------------------------------------------------------------------------- /adb-to-purview.code-workspace: -------------------------------------------------------------------------------- 1 | { 2 | "folders": [ 3 | { 4 | "path": "." 5 | }, 6 | { 7 | "path": "function-app\\adb-to-purview" 8 | } 9 | ], 10 | "settings": { 11 | "debug.internalConsoleOptions": "neverOpen", 12 | "dotnet-test-explorer.testProjectPath": "**/*tests.@(csproj|vbproj|fsproj)" 13 | } 14 | } -------------------------------------------------------------------------------- /tests/integration/spark-apps/notebooks/nested-parent.scala: -------------------------------------------------------------------------------- 1 | // Databricks notebook source 2 | // MAGIC %run ./nested-child 3 | 4 | // COMMAND ---------- 5 | 6 | outputDf.repartition(1).write.mode("overwrite").format("csv").save(outputRootPath+"/testcase/eight/nested-parent-folder/") 7 | 8 | // COMMAND ---------- 9 | 10 | -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/OpenLineage/Job.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | namespace Function.Domain.Models.OL 5 | { 6 | public class Job 7 | { 8 | public string Namespace = ""; 9 | public string Name = ""; 10 | } 11 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "azureFunctions.deploySubpath": "src/bin/Release/net6.0/publish", 3 | "azureFunctions.projectLanguage": "C#", 4 | "azureFunctions.projectRuntime": "~4", 5 | "azureFunctions.preDeployTask": "publish (functions)", 6 | "dotnet-test-explorer.testProjectPath": "**/*tests.@(csproj|vbproj|fsproj)" 7 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/OpenLineage/Run.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | namespace Function.Domain.Models.OL 5 | { 6 | public class Run 7 | { 8 | public string RunId = ""; 9 | public Facets Facets = new Facets(); 10 | } 11 | } 12 | 13 | -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/Settings/OlToPurviewColumnMapping.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using System.Collections.Generic; 5 | 6 | namespace Function.Domain.Models.Settings 7 | { 8 | 9 | public class OlToPurviewColumnMapping 10 | { 11 | 12 | } 13 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Services/IOlFilter.cs: -------------------------------------------------------------------------------- 1 | 2 | // Copyright (c) Microsoft Corporation. 3 | // Licensed under the MIT License. 4 | 5 | namespace Function.Domain.Services 6 | { 7 | public interface IOlFilter 8 | { 9 | bool FilterOlMessage(string strRequest); 10 | string GetJobNamespace(string strRequest); 11 | } 12 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/tests/tools/localsettingsdutils-del.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | # copy file to localsettingsdutils.py and fill in the below values 5 | 6 | PURVIEW_NAME = "" 7 | TENANT_ID = "" 8 | CLIENT_ID = "" 9 | CLIENT_SECRET = "" -------------------------------------------------------------------------------- /tests/integration/jobdefs/jarjob-test-expectations.json: -------------------------------------------------------------------------------- 1 | [ 2 | "databricks://.azuredatabricks.net/jobs/", 3 | "databricks://.azuredatabricks.net/jobs//tasks/JarJob", 4 | "databricks://.azuredatabricks.net/jobs//tasks/JarJob/processes/CA1C8F378EABC4EF08062103C5D51CBE->560CF14B3818EF6B8FF5D0BC6AF7BCE9" 5 | ] 6 | -------------------------------------------------------------------------------- /tests/integration/jobdefs/pythonwheel-test-expectations.json: -------------------------------------------------------------------------------- 1 | [ 2 | "databricks://.azuredatabricks.net/jobs/", 3 | "databricks://.azuredatabricks.net/jobs//tasks/WheelJob", 4 | "databricks://.azuredatabricks.net/jobs//tasks/WheelJob/processes/6438ED307BBA90F1285E1229E67E020B->5560AE0F6CE4403CC559ECF1821CCE47" 5 | ] -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/OpenLineage/IInputsOutputs.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | namespace Function.Domain.Models.OL 5 | { 6 | public interface IInputsOutputs 7 | { 8 | public string Name { get; set; } 9 | public string NameSpace {get; set; } 10 | } 11 | 12 | } -------------------------------------------------------------------------------- /tests/integration/spark-apps/wheeljobs/abfssInAbfssOut/setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | setuptools.setup( 4 | name='abfssintest', 5 | version='0.0.3', 6 | description='Read ABFSS and Write ABFSS', 7 | url='https://github.com/microsoft/Purview-ADB-Lineage-Solution-Accelerator', 8 | packages=['abfssintest'], 9 | author='Microsoft', 10 | zip_safe=False 11 | ) 12 | -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Helpers/parser/IQnParser.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using Function.Domain.Models.Purview; 5 | 6 | namespace Function.Domain.Helpers 7 | { 8 | public interface IQnParser 9 | { 10 | public PurviewIdentifier GetIdentifiers(string nameSpace, string name); 11 | } 12 | } -------------------------------------------------------------------------------- /tests/integration/jobdefs/pythonscript-test-expectations.json: -------------------------------------------------------------------------------- 1 | [ 2 | "databricks://.azuredatabricks.net/jobs/", 3 | "databricks://.azuredatabricks.net/jobs//tasks/PythonScriptJob", 4 | "databricks://.azuredatabricks.net/jobs//tasks/PythonScriptJob/processes/16D109EA9E8BC7329A7365311F917C1F->C862A921EE653ED2F3101026739FB936" 5 | ] -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/Adb/ClusterInstance.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using Newtonsoft.Json; 5 | 6 | namespace Function.Domain.Models.Adb 7 | { 8 | public class ClusterInstance 9 | { 10 | [JsonProperty("cluster_id")] 11 | public string ClusterId = ""; 12 | 13 | } 14 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Helpers/OlProcessing/IValidateOlEvent.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using System.Threading.Tasks; 5 | using Function.Domain.Models.OL; 6 | 7 | namespace Function.Domain.Helpers.Parser 8 | { 9 | public interface IValidateOlEvent 10 | { 11 | public bool Validate(Event olEvent); 12 | } 13 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/Purview/RelationshipAttribute.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using Newtonsoft.Json; 5 | namespace Function.Domain.Models.Purview 6 | { 7 | public class RelationshipAttribute 8 | { 9 | [JsonProperty("qualifiedName")] 10 | public string QualifiedName = ""; 11 | } 12 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/Purview/uniqueAttributes.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using Newtonsoft.Json; 5 | namespace Function.Domain.Models 6 | { 7 | public class UniqueAttributes 8 | { 9 | [JsonProperty("qualifiedName")] 10 | public string QualifiedName { get; set; } = ""; 11 | 12 | } 13 | 14 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/OpenLineage/LogicalPlan.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using System.Collections.Generic; 5 | using Newtonsoft.Json; 6 | 7 | namespace Function.Domain.Models.OL 8 | { 9 | public class LogicalPlan 10 | { 11 | public List plan { get; set; } = new List(); 12 | } 13 | } 14 | 15 | 16 | -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/Purview/IDatabricksTask.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using Newtonsoft.Json; 5 | namespace Function.Domain.Models.Purview 6 | { 7 | public interface IDatabricksTask 8 | { 9 | public string TypeName { get; set; } 10 | public IDatabricksJobTaskAttributes Attributes { get; set; } 11 | } 12 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Helpers/OlProcessing/IOlMessageEnrichment.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using System.Threading.Tasks; 5 | using Function.Domain.Models.OL; 6 | 7 | namespace Function.Domain.Helpers.Parser 8 | { 9 | public interface IOlMessageEnrichment 10 | { 11 | public Task GetEnrichedEvent(Event olEvent); 12 | } 13 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Providers/IAdbClientProvider.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using System.Threading.Tasks; 5 | using Function.Domain.Models.Adb; 6 | 7 | namespace Function.Domain.Providers 8 | { 9 | public interface IAdbClientProvider 10 | { 11 | public Task GetSingleAdbJobAsync(long runId, string adbWorkspaceUrl); 12 | } 13 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/OpenLineage/MountPoint.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using Newtonsoft.Json; 5 | 6 | namespace Function.Domain.Models.OL 7 | { 8 | public class MountPoint 9 | { 10 | [JsonProperty("MountPoint")] 11 | public string MountPointName {get; set; } = ""; 12 | public string Source {get; set; } = ""; 13 | } 14 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/OpenLineage/Inputs.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using Newtonsoft.Json; 5 | 6 | namespace Function.Domain.Models.OL 7 | { 8 | public class Inputs: IInputsOutputs 9 | { 10 | public string Name { get; set; } = ""; 11 | [JsonProperty("namespace")] 12 | public string NameSpace { get; set; } = ""; 13 | } 14 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/OpenLineage/EnvironmentPropsParent.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using Newtonsoft.Json; 5 | 6 | namespace Function.Domain.Models.OL 7 | { 8 | public class EnvironmentPropsParent 9 | { 10 | [JsonProperty("environment-properties")] 11 | public EnvironmentProps EnvironmentProperties = new EnvironmentProps(); 12 | } 13 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/Purview/DatabricksTaskRelationshipAttributes.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using Newtonsoft.Json; 5 | namespace Function.Domain.Models.Purview 6 | { 7 | public class DatabricksTaskRelationshipAttributes 8 | { 9 | [JsonProperty("job")] 10 | public RelationshipAttribute Job = new RelationshipAttribute(); 11 | } 12 | 13 | } -------------------------------------------------------------------------------- /docs/mappings/snowflake.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "snowflake", 3 | "parserConditions": [ 4 | { 5 | "op1": "prefix", 6 | "compare": "=", 7 | "op2": "snowflake" 8 | } 9 | ], 10 | "qualifiedName": "snowflake://{nameSpcBodyParts[0]}/databases/{nameGroups[0].parts[0]}/schemas/{nameGroups[0].parts[1]}/tables/{nameGroups[0].parts[2]}", 11 | "purviewDataType": "snowflake_table", 12 | "purviewPrefix": "https" 13 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/Settings/ColParserSettings.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using System.Collections.Generic; 5 | using Function.Domain.Models.Purview; 6 | 7 | namespace Function.Domain.Models.Settings 8 | { 9 | public class ColParserSettings 10 | { 11 | public List OlToPurviewMappings = new List(); 12 | } 13 | } -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Microsoft Open Source Code of Conduct 2 | 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 4 | 5 | Resources: 6 | 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/) 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns -------------------------------------------------------------------------------- /deployment/util/mappings-remove-spaces.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | 5 | if __name__ == "__main__": 6 | parser = argparse.ArgumentParser() 7 | parser.add_argument("mappings_json", help="File path of the mappings json") 8 | args, unknown_args = parser.parse_known_args() 9 | 10 | with open(args.mappings_json, 'r') as fp: 11 | mappings = fp.read() 12 | 13 | oneliner = mappings.replace("\n", "").replace(" ", "") 14 | print(oneliner) 15 | -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/Purview/DatabricksJobRelationshipAttributes.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using Newtonsoft.Json; 5 | namespace Function.Domain.Models.Purview 6 | { 7 | public class DatabricksJobRelationshipAttributes 8 | { 9 | [JsonProperty("workspace")] 10 | public RelationshipAttribute Workspace = new RelationshipAttribute(); 11 | } 12 | 13 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/Purview/DatabricksProcessRelationshipAttributes.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using Newtonsoft.Json; 5 | namespace Function.Domain.Models.Purview 6 | { 7 | public class DatabricksProcessRelationshipAttributes 8 | { 9 | [JsonProperty("task")] 10 | public RelationshipAttribute Task = new RelationshipAttribute(); 11 | } 12 | 13 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Services/IOlConsolodateEnrich.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using System.Threading.Tasks; 5 | using Function.Domain.Models.OL; 6 | 7 | namespace Function.Domain.Services 8 | { 9 | public interface IOlConsolodateEnrich 10 | { 11 | public Task ProcessOlMessage(string strEvent); 12 | public string GetJobNamespace(); 13 | } 14 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/host.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "2.0", 3 | "logging": { 4 | "applicationInsights": { 5 | "samplingSettings": { 6 | "isEnabled": true, 7 | "excludedTypes": "Request" 8 | } 9 | } 10 | }, 11 | "extensions": { 12 | "eventHubs": { 13 | "initialOffsetOptions": { 14 | "type": "fromEnd" 15 | } 16 | } 17 | } 18 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/Adb/JobType.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using System.Collections.Generic; 5 | using Newtonsoft.Json; 6 | 7 | namespace Function.Domain.Models.Adb 8 | { 9 | public enum JobType 10 | { 11 | InteractiveNotebook, 12 | JobNotebook, 13 | JobPython, 14 | JobWheel, 15 | JobJar, 16 | Unsupported 17 | } 18 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/Purview/DatabricksNotebookRelationshipAttributes.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using Newtonsoft.Json; 5 | namespace Function.Domain.Models.Purview 6 | { 7 | public class DatabricksNotebookRelationshipAttributes 8 | { 9 | [JsonProperty("workspace")] 10 | public RelationshipAttribute Workspace = new RelationshipAttribute(); 11 | } 12 | 13 | } -------------------------------------------------------------------------------- /tests/integration/spark-apps/jarjobs/abfssInAbfssOut/settings.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * This file was generated by the Gradle 'init' task. 3 | * 4 | * The settings file is used to specify which projects to include in your build. 5 | * 6 | * Detailed information about configuring a multi-project build in Gradle can be found 7 | * in the user manual at https://docs.gradle.org/6.9.1/userguide/multi_project_builds.html 8 | */ 9 | 10 | rootProject.name = 'SparkApp-Basic' 11 | include('app') 12 | -------------------------------------------------------------------------------- /tests/integration/spark-apps/jarjobs/readSampleWriteSample/settings.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * This file was generated by the Gradle 'init' task. 3 | * 4 | * The settings file is used to specify which projects to include in your build. 5 | * 6 | * Detailed information about configuring a multi-project build in Gradle can be found 7 | * in the user manual at https://docs.gradle.org/6.9.1/userguide/multi_project_builds.html 8 | */ 9 | 10 | rootProject.name = 'ReadWriteSample' 11 | include('app') 12 | -------------------------------------------------------------------------------- /tests/integration/spark-apps/pythonscript/pythonscript.py: -------------------------------------------------------------------------------- 1 | from pyspark.sql import SparkSession 2 | 3 | 4 | print("STARTING") 5 | spark = SparkSession.builder.getOrCreate() 6 | 7 | exampleA = ( 8 | spark.read.format("csv") 9 | .option("header", True) 10 | .option("inferSchema", True) 11 | .load("/mnt/rawdata/testcase/twenty/exampleInputA") 12 | ) 13 | 14 | exampleA.repartition(1).write.mode("overwrite").format("csv").save("/mnt/rawdata/testcase/twenty/output") 15 | print("COMPLETED") 16 | -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/OpenLineage/SparkVer.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using Newtonsoft.Json; 5 | 6 | namespace Function.Domain.Models.OL 7 | { 8 | public class SparkVer 9 | { 10 | [JsonProperty("spark-version")] 11 | public string SparkVersion = ""; 12 | [JsonProperty("openlineage-spark-version")] 13 | public string OpenLineageSparkVersion = ""; 14 | } 15 | 16 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Services/IOlToPurviewParsingService.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using System.Threading.Tasks; 5 | using Function.Domain.Helpers; 6 | using Function.Domain.Models.OL; 7 | 8 | namespace Function.Domain.Services 9 | { 10 | public interface IOlToPurviewParsingService 11 | { 12 | public string? GetPurviewFromOlEvent(EnrichedEvent eventData, IDatabricksToPurviewParser parser); 13 | } 14 | } -------------------------------------------------------------------------------- /tests/integration/spark-apps/jarjobs/abfssInAbfssOut/app/src/test/java/SparkApp/Basic/AppTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * This Java source file was generated by the Gradle 'init' task. 3 | */ 4 | package SparkApp.Basic; 5 | 6 | import org.junit.Test; 7 | import static org.junit.Assert.*; 8 | 9 | public class AppTest { 10 | @Test public void testAppHasAGreeting() { 11 | App classUnderTest = new App(); 12 | assertNotNull("app should have a greeting", classUnderTest.getGreeting()); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/Purview/BaseAttributes.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using System.Collections.Generic; 5 | using Newtonsoft.Json; 6 | 7 | namespace Function.Domain.Models.Purview 8 | { 9 | public class BaseAttributes 10 | { 11 | [JsonProperty("name")] 12 | public string Name = ""; 13 | [JsonProperty("qualifiedName")] 14 | public string QualifiedName = ""; 15 | } 16 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/Settings/ParserSettings.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using System.Collections.Generic; 5 | using Function.Domain.Models.Purview; 6 | 7 | namespace Function.Domain.Models.Settings 8 | { 9 | public class ParserSettings 10 | { 11 | public List OlToPurviewMappings = new List(); 12 | public string AdbWorkspaceUrl = ""; 13 | } 14 | 15 | } -------------------------------------------------------------------------------- /tests/integration/spark-apps/jarjobs/readSampleWriteSample/app/src/test/java/SparkApp/ReadWrite/AppTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * This Java source file was generated by the Gradle 'init' task. 3 | */ 4 | package SparkApp.ReadWrite; 5 | 6 | import org.junit.Test; 7 | import static org.junit.Assert.*; 8 | 9 | public class AppTest { 10 | @Test public void testAppHasAGreeting() { 11 | App classUnderTest = new App(); 12 | assertNotNull("app should have a greeting", classUnderTest.getGreeting()); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/Purview/InputOutput.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using Newtonsoft.Json; 5 | namespace Function.Domain.Models.Purview 6 | { 7 | public class InputOutput 8 | { 9 | [JsonProperty("uniqueAttributes")] 10 | public UniqueAttributes UniqueAttributes { get; set; } = new UniqueAttributes(); 11 | [JsonProperty("typeName")] 12 | public string TypeName = ""; 13 | } 14 | 15 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Helpers/IHttpHelper.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using System.Threading.Tasks; 5 | using Microsoft.Azure.Functions.Worker.Http; 6 | 7 | namespace Function.Domain.Helpers 8 | { 9 | public interface IHttpHelper 10 | { 11 | public Task CreateSuccessfulHttpResponse(HttpRequestData req, object data); 12 | public HttpResponseData CreateServerErrorHttpResponse(HttpRequestData req); 13 | } 14 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/Adb/SparkPythonTask.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using System.Collections.Generic; 5 | using Newtonsoft.Json; 6 | 7 | namespace Function.Domain.Models.Adb 8 | { 9 | public class SparkPythonTask 10 | { 11 | [JsonProperty("python_file")] 12 | public string PythonFile = ""; 13 | [JsonProperty("parameters")] 14 | public List Parameters = new List(); 15 | 16 | } 17 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/Purview/AtlasEntityWithExtInfo.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using Newtonsoft.Json; 5 | using Newtonsoft.Json.Linq; 6 | using System.Collections.Generic; 7 | 8 | namespace Function.Domain.Models.Purview 9 | { 10 | public class AtlasEntityWithExtInfo 11 | { 12 | public List ? entities; 13 | public Dictionary? referredEntities; 14 | public Asset ? entity; 15 | } 16 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Services/IPurviewIngestion.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using System.Threading.Tasks; 5 | using Newtonsoft.Json.Linq; 6 | using Function.Domain.Helpers; 7 | 8 | namespace Function.Domain.Services 9 | { 10 | public interface IPurviewIngestion 11 | { 12 | public Task SendToPurview(JArray Processes, IColParser colParser); 13 | public Task SendToPurview(JObject json, IColParser colParser); 14 | } 15 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Helpers/parser/IColParser.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using Function.Domain.Models.OL; 5 | using Function.Domain.Models.Purview; 6 | using System.Collections.Generic; 7 | 8 | namespace Function.Domain.Helpers 9 | { 10 | public interface IColParser 11 | { 12 | public List GetColIdentifiers(); 13 | public List GetColIdentifiers(Dictionary originalToMatchedFqn); 14 | } 15 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/Adb/NotebookTask.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using System.Collections.Generic; 5 | using Newtonsoft.Json; 6 | 7 | namespace Function.Domain.Models.Adb 8 | { 9 | public class NotebookTask 10 | { 11 | [JsonProperty("notebook_path")] 12 | public string NotebookPath = ""; 13 | 14 | [JsonProperty("base_parameters")] 15 | public Dictionary BaseParameters = new Dictionary(); 16 | 17 | } 18 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/OpenLineage/Outputs.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using Newtonsoft.Json; 5 | 6 | namespace Function.Domain.Models.OL 7 | { 8 | public class Outputs : IInputsOutputs 9 | { 10 | [JsonProperty("name")] 11 | public string Name { get; set; } = ""; 12 | [JsonProperty("namespace")] 13 | public string NameSpace { get; set; } = ""; 14 | [JsonProperty("facets")] 15 | public OutputFacets Facets = new OutputFacets(); 16 | } 17 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Helpers/OlProcessing/IOlMessageConsolodation.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using System.Threading.Tasks; 5 | using Function.Domain.Models.OL; 6 | 7 | namespace Function.Domain.Helpers 8 | { 9 | public interface IOlMessageConsolodation 10 | { 11 | 12 | public Task CaptureEnvironmentFromStart(Event olEvent, string jobRunId, EnvironmentPropsParent envParent); 13 | 14 | public Task ConsolodateCompleteEvent(Event olEvent, string jobRunId); 15 | 16 | } 17 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/Purview/DatabricksNotebookTaskRelationshipAttributes.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using Newtonsoft.Json; 5 | namespace Function.Domain.Models.Purview 6 | { 7 | public class DatabricksNotebookTaskRelationshipAttributes 8 | { 9 | [JsonProperty("job")] 10 | public RelationshipAttribute Job = new RelationshipAttribute(); 11 | [JsonProperty("notebook")] 12 | public RelationshipAttribute Notebook = new RelationshipAttribute(); 13 | } 14 | 15 | } -------------------------------------------------------------------------------- /tests/integration/spark-apps/notebooks/hive-in-hive-out-insert.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | # MAGIC %sql 3 | # MAGIC CREATE TABLE IF NOT EXISTS default.hiveExampleA000 ( 4 | # MAGIC tableId INT, 5 | # MAGIC x INT 6 | # MAGIC ); 7 | # MAGIC 8 | # MAGIC CREATE TABLE IF NOT EXISTS default.hiveExampleOutput000( 9 | # MAGIC tableId INT, 10 | # MAGIC x INT 11 | # MAGIC ) 12 | 13 | # COMMAND ---------- 14 | 15 | # MAGIC %sql 16 | # MAGIC INSERT INTO default.hiveExampleOutput000 (tableId, x) 17 | # MAGIC SELECT tableId, x 18 | # MAGIC FROM default.hiveExampleA000 19 | 20 | # COMMAND ---------- 21 | 22 | 23 | -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/Adb/SparkJarTask.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using System.Collections.Generic; 5 | using Newtonsoft.Json; 6 | 7 | namespace Function.Domain.Models.Adb 8 | { 9 | public class SparkJarTask 10 | { 11 | [JsonProperty("jar_uri")] 12 | public string JarUri = ""; 13 | [JsonProperty("main_class_name")] 14 | public string MainClassName = ""; 15 | [JsonProperty("parameters")] 16 | public List Parameters = new List(); 17 | 18 | } 19 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/Purview/DatabricksPythonTaskAttributes.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using System.Collections.Generic; 5 | using Newtonsoft.Json; 6 | 7 | namespace Function.Domain.Models.Purview 8 | { 9 | public class DatabricksPythonTaskAttributes : DatabricksJobTaskAttributes, IDatabricksJobTaskAttributes 10 | { 11 | [JsonProperty("pythonFile")] 12 | public string PythonFile = ""; 13 | [JsonProperty("parameters")] 14 | public List Parameters = new List(); 15 | } 16 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/Purview/IDatabricksJobTaskAttributes.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using System.Collections.Generic; 5 | using Newtonsoft.Json; 6 | 7 | namespace Function.Domain.Models.Purview 8 | { 9 | public interface IDatabricksJobTaskAttributes 10 | { 11 | public string Name { get; set; } 12 | public string QualifiedName { get; set; } 13 | public long JobId { get; set; } 14 | public string ClusterId { get; set; } 15 | public string SparkVersion { get; set; } 16 | } 17 | 18 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/tests/tools/QualifiedNameConfigTester/QualifiedNameConfigTester.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Exe 5 | net6.0 6 | disable 7 | enable 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /function-app/adb-to-purview/tests/unit-tests/Function.Domain/Helpers/Common/AutoMoqDataAttribute.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using AutoFixture; 5 | using AutoFixture.AutoMoq; 6 | using AutoFixture.Xunit2; 7 | using System.Diagnostics.CodeAnalysis; 8 | 9 | namespace UnitTests.Function.Domain.Helpers 10 | { 11 | [ExcludeFromCodeCoverage] 12 | public class AutoMoqDataAttribute : AutoDataAttribute 13 | { 14 | public AutoMoqDataAttribute() 15 | : base(() => new Fixture().Customize(new AutoMoqCustomization())) 16 | { 17 | } 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/OpenLineage/Facets.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using System; 5 | using Newtonsoft.Json; 6 | using Newtonsoft.Json.Linq; 7 | 8 | namespace Function.Domain.Models.OL 9 | { 10 | public class Facets 11 | { 12 | [JsonProperty("environment-properties")] 13 | public EnvironmentPropsParent? EnvironmentProperties; 14 | [JsonProperty("spark.logicalPlan")] 15 | public JObject SparkLogicalPlan = new JObject(); 16 | [JsonProperty("spark_version")] 17 | public SparkVer SparkVersion = new SparkVer(); 18 | } 19 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/Purview/DatabricksNotebookTaskAttributes.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using System.Collections.Generic; 5 | using Newtonsoft.Json; 6 | 7 | namespace Function.Domain.Models.Purview 8 | { 9 | public class DatabricksNotebookTaskAttributes : DatabricksJobTaskAttributes, IDatabricksJobTaskAttributes 10 | { 11 | [JsonProperty("notebookPath")] 12 | public string NotebookPath = ""; 13 | [JsonProperty("baseParameters")] 14 | public Dictionary BaseParameters = new Dictionary(); 15 | } 16 | } -------------------------------------------------------------------------------- /tests/environment/datasets/azsql.sql: -------------------------------------------------------------------------------- 1 | CREATE SCHEMA nondbo 2 | 3 | CREATE TABLE nondbo.exampleInputC ( 4 | id int 5 | ,cityPopulation int 6 | ) 7 | 8 | CREATE TABLE dbo.exampleInputB ( 9 | id int 10 | ,city varchar(30) 11 | ,stateAbbreviation varchar(2) 12 | ) 13 | 14 | CREATE TABLE dbo.exampleInputA ( 15 | id int 16 | ,postalcode varchar(5) 17 | ,street varchar(50) 18 | ) 19 | 20 | INSERT INTO nondbo.exampleInputC(id, cityPopulation) 21 | VALUES(1, 1000) 22 | 23 | INSERT INTO dbo.exampleInputB(id, city, stateAbbreviation) 24 | VALUES(1, 'Springfield', '??') 25 | 26 | INSERT INTO dbo.exampleInputA(id, postalcode, street) 27 | VALUES(1, '55555', '742 Evergreen Terrace') 28 | -------------------------------------------------------------------------------- /tests/environment/requirements.txt: -------------------------------------------------------------------------------- 1 | azure-core==1.26.1 2 | azure-identity==1.12.0 3 | azure-storage-blob==12.14.1 4 | build==0.9.0 5 | certifi==2022.12.7 6 | cffi==1.15.1 7 | charset-normalizer==2.1.1 8 | colorama==0.4.6 9 | cryptography==39.0.1 10 | idna==3.4 11 | importlib-metadata==5.1.0 12 | isodate==0.6.1 13 | msal==1.20.0 14 | msal-extensions==1.0.0 15 | msrest==0.7.1 16 | oauthlib==3.2.2 17 | packaging==22.0 18 | pep517==0.13.0 19 | portalocker==2.6.0 20 | pycparser==2.21 21 | PyJWT==2.6.0 22 | pywin32==305 23 | requests==2.28.1 24 | requests-oauthlib==1.3.1 25 | six==1.16.0 26 | tomli==2.0.1 27 | typing_extensions==4.4.0 28 | urllib3==1.26.13 29 | zipp==3.11.0 30 | -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/Purview/DatabricksJob.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using Newtonsoft.Json; 5 | namespace Function.Domain.Models.Purview 6 | { 7 | public class DatabricksJob 8 | { 9 | [JsonProperty("typeName")] 10 | public string TypeName = "databricks_job"; 11 | [JsonProperty("attributes")] 12 | public DatabricksJobAttributes Attributes = new DatabricksJobAttributes(); 13 | [JsonProperty("relationshipAttributes")] 14 | public DatabricksJobRelationshipAttributes RelationshipAttributes = new DatabricksJobRelationshipAttributes(); 15 | } 16 | } -------------------------------------------------------------------------------- /tests/environment/sources/sql.bicep: -------------------------------------------------------------------------------- 1 | @description('The name of the SQL logical server.') 2 | param serverName string = uniqueString('sql', resourceGroup().id) 3 | 4 | @description('The name of the SQL Database.') 5 | param sqlDBName string = 'SampleDB' 6 | 7 | @description('Location for all resources.') 8 | param location string = resourceGroup().location 9 | 10 | resource sqlServer 'Microsoft.Sql/servers@2022-05-01-preview' existing = { 11 | name: serverName 12 | } 13 | 14 | resource sqlDB 'Microsoft.Sql/servers/databases@2022-05-01-preview' = { 15 | parent: sqlServer 16 | name: sqlDBName 17 | location: location 18 | sku: { 19 | name: 'Basic' 20 | tier: 'Basic' 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/Purview/DatabricksJobAttributes.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using System.Collections.Generic; 5 | using Newtonsoft.Json; 6 | 7 | namespace Function.Domain.Models.Purview 8 | { 9 | public class DatabricksJobAttributes 10 | { 11 | [JsonProperty("name")] 12 | public string Name = ""; 13 | [JsonProperty("qualifiedName")] 14 | public string QualifiedName = ""; 15 | [JsonProperty("jobId")] 16 | public long JobId = 0; 17 | [JsonProperty("creatorUserName")] 18 | public string CreatorUserName = ""; 19 | } 20 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/OpenLineage/Event.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using System.Collections.Generic; 5 | using System; 6 | 7 | namespace Function.Domain.Models.OL 8 | { 9 | public class Event 10 | { 11 | public string EventType = ""; 12 | public DateTime EventTime = new DateTime(); 13 | public Run Run = new Run(); 14 | public Job Job = new Job(); 15 | public List Inputs = new List(); 16 | public List Outputs = new List(); 17 | public string Producer = ""; 18 | public string SchemaUrl = ""; 19 | } 20 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/Purview/DatabricksWorkspace.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using Newtonsoft.Json; 5 | using System.Collections.Generic; 6 | namespace Function.Domain.Models.Purview 7 | { 8 | public class DatabricksWorkspace 9 | { 10 | [JsonProperty("typeName")] 11 | public string TypeName = "databricks_workspace"; 12 | [JsonProperty("attributes")] 13 | public BaseAttributes Attributes = new BaseAttributes(); 14 | [JsonProperty("relationshipAttributes")] 15 | public Dictionary RelationshipAttributes = new Dictionary(); 16 | } 17 | } -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature Request 3 | about: Suggest a new feature you'd like to see 4 | title: '' 5 | labels: enhancement 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the feature** 11 | A clear and concise description of what the feature should do / would accomplish. 12 | 13 | **Detailed Example** 14 | Please provide a detailed example of what this feature would do. 15 | 16 | **Issues that this feature solves** 17 | List / link any pull requests that this feature request would solve. 18 | 19 | **Suggested Implementation** 20 | Provide any suggestions on how you expect this to be implemented. 21 | 22 | **Additional context** 23 | Add any other context about the problem here. 24 | -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/Purview/PurviewIdentifier.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using Newtonsoft.Json; 5 | namespace Function.Domain.Models.Purview 6 | { 7 | public class PurviewIdentifier 8 | { 9 | // The QN returned will always be without a trailing slash - the validation code must remove 10 | // trailing slashes before searching Microsoft Purview as Microsoft Purview seems to be inconsistent with regard to 11 | // trailing slashes in QN names and custom sources could have differing rules. 12 | public string QualifiedName = ""; 13 | public string PurviewType = ""; 14 | } 15 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/Settings/OlTableEntity.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using System; 5 | using Azure.Data.Tables; 6 | 7 | namespace Function.Domain.Models.Settings 8 | { 9 | // Used to save state of OL messages in Azure Table Storage for later message consolidation with incoming messages 10 | public class OlTableEntity : ITableEntity 11 | { 12 | public string EnvFacet = ""; 13 | public string PartitionKey { get; set; } = ""; 14 | public string RowKey { get; set; } = ""; 15 | public DateTimeOffset? Timestamp { get; set; } 16 | public Azure.ETag ETag { get; set; } 17 | } 18 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/Purview/DatabricksNotebook.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using Newtonsoft.Json; 5 | namespace Function.Domain.Models.Purview 6 | { 7 | public class DatabricksNotebook 8 | { 9 | [JsonProperty("typeName")] 10 | public string TypeName = "databricks_notebook"; 11 | [JsonProperty("attributes")] 12 | public DatabricksNotebookAttributes Attributes = new DatabricksNotebookAttributes(); 13 | [JsonProperty("relationshipAttributes")] 14 | public DatabricksNotebookRelationshipAttributes RelationshipAttributes = new DatabricksNotebookRelationshipAttributes(); 15 | } 16 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/Purview/DatabricksSparkJarTask.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using Newtonsoft.Json; 5 | namespace Function.Domain.Models.Purview 6 | { 7 | public class DatabricksSparkJarTask 8 | { 9 | [JsonProperty("typeName")] 10 | public string TypeName = "databricks_spark_jar_task"; 11 | [JsonProperty("attributes")] 12 | public DatabricksSparkJarTaskAttributes Attributes = new DatabricksSparkJarTaskAttributes(); 13 | [JsonProperty("relationshipAttributes")] 14 | public DatabricksTaskRelationshipAttributes RelationshipAttributes = new DatabricksTaskRelationshipAttributes(); 15 | } 16 | } -------------------------------------------------------------------------------- /tests/environment/datasets/sqlpool.sql: -------------------------------------------------------------------------------- 1 | CREATE MASTER KEY ENCRYPTION BY PASSWORD = 'xxxx' ; /* Necessary for Synapse External tables */ 2 | CREATE SCHEMA Sales 3 | 4 | CREATE TABLE Sales.Region ( 5 | id int 6 | ,regionId int 7 | ) 8 | 9 | CREATE TABLE dbo.exampleInputB ( 10 | id int 11 | ,city varchar(30) 12 | ,stateAbbreviation varchar(2) 13 | ) 14 | 15 | CREATE TABLE dbo.exampleInputA ( 16 | id int 17 | ,postalcode varchar(5) 18 | ,street varchar(50) 19 | ) 20 | 21 | 22 | 23 | INSERT INTO Sales.Region(id, regionId) 24 | VALUES(1, 1000) 25 | 26 | INSERT INTO dbo.exampleInputB(id, city, stateAbbreviation) 27 | VALUES(1, 'Springfield', '??') 28 | 29 | INSERT INTO dbo.exampleInputA(id, postalcode, street) 30 | VALUES(1, '55555', '742 Evergreen Terrace') 31 | -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/Adb/PythonWheelTask.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using System.Collections.Generic; 5 | using Newtonsoft.Json; 6 | 7 | namespace Function.Domain.Models.Adb 8 | { 9 | public class PythonWheelTask 10 | { 11 | [JsonProperty("package_name")] 12 | public string PackageName = ""; 13 | [JsonProperty("entry_point")] 14 | public string EntryPoint = ""; 15 | [JsonProperty("parameters")] 16 | public List Parameters = new List(); 17 | [JsonProperty("named_parameters")] 18 | public Dictionary NamedParameters = new Dictionary(); 19 | 20 | } 21 | } -------------------------------------------------------------------------------- /tests/integration/jobdefs/pythonwheel-test-def.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "WheelJob", 3 | "email_notifications": { 4 | "no_alert_for_skipped_runs": false 5 | }, 6 | "max_concurrent_runs": 1, 7 | "tasks": [ 8 | { 9 | "task_key": "WheelJob", 10 | "python_wheel_task": { 11 | "package_name": "abfssintest", 12 | "entry_point": "runapp" 13 | }, 14 | "existing_cluster_id": "", 15 | "libraries": [ 16 | { 17 | "whl": "dbfs:/FileStore/testcases/abfssintest-0.0.3-py3-none-any.whl" 18 | } 19 | ], 20 | "timeout_seconds": 0 21 | } 22 | ], 23 | "format": "MULTI_TASK" 24 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/Purview/DatabricksPythonTask.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using Newtonsoft.Json; 5 | namespace Function.Domain.Models.Purview 6 | { 7 | public class DatabricksPythonTask 8 | { 9 | [JsonProperty("typeName")] 10 | public string TypeName { get; set; } = "databricks_python_task"; 11 | [JsonProperty("attributes")] 12 | public DatabricksPythonTaskAttributes Attributes { get; set; } = new DatabricksPythonTaskAttributes(); 13 | [JsonProperty("relationshipAttributes")] 14 | public DatabricksTaskRelationshipAttributes RelationshipAttributes { get; set; } = new DatabricksTaskRelationshipAttributes(); 15 | } 16 | } -------------------------------------------------------------------------------- /tests/integration/jobdefs/jarjob-test-def.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "JarJob", 3 | "email_notifications": { 4 | "no_alert_for_skipped_runs": false 5 | }, 6 | "max_concurrent_runs": 1, 7 | "tasks": [ 8 | { 9 | "task_key": "JarJob", 10 | "spark_jar_task": { 11 | "jar_uri": "", 12 | "main_class_name": "SparkApp.Basic.App", 13 | "run_as_repl": true 14 | }, 15 | "existing_cluster_id": "", 16 | "libraries": [ 17 | { 18 | "jar": "dbfs:/FileStore/testcases/app.jar" 19 | } 20 | ], 21 | "timeout_seconds": 0 22 | } 23 | ], 24 | "format": "MULTI_TASK" 25 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/Purview/DatabricksNotebookAttributes.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using System.Collections.Generic; 5 | using Newtonsoft.Json; 6 | 7 | namespace Function.Domain.Models.Purview 8 | { 9 | public class DatabricksNotebookAttributes 10 | { 11 | [JsonProperty("name")] 12 | public string Name = ""; 13 | [JsonProperty("qualifiedName")] 14 | public string QualifiedName = ""; 15 | [JsonProperty("clusterName")] 16 | public string ClusterName = ""; 17 | [JsonProperty("user")] 18 | public string User = ""; 19 | [JsonProperty("sparkVersion")] 20 | public string SparkVersion = ""; 21 | } 22 | } -------------------------------------------------------------------------------- /tests/environment/sources/sqlserver.bicep: -------------------------------------------------------------------------------- 1 | @description('The name of the SQL logical server.') 2 | param serverName string = uniqueString('sql', resourceGroup().id) 3 | 4 | @description('Location for all resources.') 5 | param location string = resourceGroup().location 6 | 7 | @description('The administrator username of the SQL logical server.') 8 | param administratorLogin string 9 | 10 | @description('The administrator password of the SQL logical server.') 11 | @secure() 12 | param administratorLoginPassword string 13 | 14 | resource sqlServer 'Microsoft.Sql/servers@2022-05-01-preview' = { 15 | name: serverName 16 | location: location 17 | properties: { 18 | administratorLogin: administratorLogin 19 | administratorLoginPassword: administratorLoginPassword 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/Purview/DatabricksSparkJarTaskAttributes.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using System.Collections.Generic; 5 | using Newtonsoft.Json; 6 | 7 | namespace Function.Domain.Models.Purview 8 | { 9 | public class DatabricksSparkJarTaskAttributes : DatabricksJobTaskAttributes, IDatabricksJobTaskAttributes 10 | { 11 | [JsonProperty("mainClassName")] 12 | public string MainClassName = ""; 13 | [JsonProperty("jarUri")] 14 | public string JarUri = ""; 15 | [JsonProperty("jar")] 16 | public string Jar = ""; 17 | [JsonProperty("parameters")] 18 | public List Parameters = new List(); 19 | } 20 | 21 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/OpenLineage/EnrichedEvent.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using System; 5 | using System.Collections.Generic; 6 | using Function.Domain.Models.Adb; 7 | 8 | namespace Function.Domain.Models.OL 9 | { 10 | public class EnrichedEvent 11 | { 12 | public Event? OlEvent = null; 13 | public AdbRoot? AdbRoot = null; 14 | public AdbRoot? AdbParentRoot = null; 15 | public bool IsInteractiveNotebook = false; 16 | public EnrichedEvent(Event olEvent, AdbRoot? adbRoot, AdbRoot? adbParentRoot) 17 | { 18 | OlEvent = olEvent; 19 | AdbRoot = adbRoot; 20 | AdbParentRoot = adbParentRoot; 21 | } 22 | } 23 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/Purview/DatabricksPythonWheelTask.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using Newtonsoft.Json; 5 | namespace Function.Domain.Models.Purview 6 | { 7 | public class DatabricksPythonWheelTask 8 | { 9 | [JsonProperty("typeName")] 10 | public string TypeName { get; set; } = "databricks_python_wheel_task"; 11 | [JsonProperty("attributes")] 12 | public DatabricksPythonWheelTaskAttributes Attributes { get; set; } = new DatabricksPythonWheelTaskAttributes(); 13 | [JsonProperty("relationshipAttributes")] 14 | public DatabricksTaskRelationshipAttributes RelationshipAttributes { get; set; } = new DatabricksTaskRelationshipAttributes(); 15 | } 16 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/Settings/PurviewColumnMapping.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using System.Collections.Generic; 5 | 6 | namespace Function.Domain.Models.Settings 7 | { 8 | public class PurviewColumnMapping 9 | { 10 | public DatasetMapping DatasetMapping { get; set; } = new DatasetMapping(); 11 | public List ColumnMapping { get; set; } = new List(); 12 | } 13 | 14 | public class DatasetMapping 15 | { 16 | public string Source = ""; 17 | public string Sink = ""; 18 | } 19 | 20 | public class ColumnMapping 21 | { 22 | public string Source = ""; 23 | public string Sink = ""; 24 | } 25 | 26 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/Purview/DatabricksNotebookTask.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using Newtonsoft.Json; 5 | namespace Function.Domain.Models.Purview 6 | { 7 | public class DatabricksNotebookTask 8 | { 9 | [JsonProperty("typeName")] 10 | public string TypeName { get; set; } = "databricks_notebook_task"; 11 | [JsonProperty("attributes")] 12 | public DatabricksNotebookTaskAttributes Attributes { get; set; } = new DatabricksNotebookTaskAttributes(); 13 | [JsonProperty("relationshipAttributes")] 14 | public DatabricksNotebookTaskRelationshipAttributes RelationshipAttributes { get; set; } = new DatabricksNotebookTaskRelationshipAttributes(); 15 | } 16 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/Purview/DatabricksPythonWheelTaskAttributes.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using System.Collections.Generic; 5 | using Newtonsoft.Json; 6 | 7 | namespace Function.Domain.Models.Purview 8 | { 9 | public class DatabricksPythonWheelTaskAttributes : DatabricksJobTaskAttributes, IDatabricksJobTaskAttributes 10 | { 11 | [JsonProperty("packageName")] 12 | public string PackageName = ""; 13 | [JsonProperty("entryPoint")] 14 | public string EntryPoint = ""; 15 | [JsonProperty("wheel")] 16 | public string Wheel = ""; 17 | [JsonProperty("parameters")] 18 | public List Parameters = new List(); 19 | } 20 | 21 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/OpenLineage/OutputFacets.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using System; 5 | using Newtonsoft.Json; 6 | using Newtonsoft.Json.Linq; 7 | 8 | namespace Function.Domain.Models.OL 9 | { 10 | [JsonObject("facets")] 11 | public class OutputFacets 12 | { 13 | [JsonProperty("lifeCycleStateChange")] 14 | public LifeCycleStateChangeClass LifeCycleStateChange = new LifeCycleStateChangeClass(); 15 | [JsonProperty("columnLineage")] 16 | public ColumnLineageFacetsClass ColFacets = new ColumnLineageFacetsClass(); 17 | } 18 | 19 | public class LifeCycleStateChangeClass 20 | { 21 | [JsonProperty("lifeCycleStateChange")] 22 | public string LifeCycleStateChange = ""; 23 | } 24 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/Purview/DatabricksJobTaskAtrributes.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using System.Collections.Generic; 5 | using Newtonsoft.Json; 6 | 7 | namespace Function.Domain.Models.Purview 8 | { 9 | public class DatabricksJobTaskAttributes 10 | { 11 | [JsonProperty("name")] 12 | public string Name { get; set; } = ""; 13 | [JsonProperty("qualifiedName")] 14 | public string QualifiedName { get; set; } = ""; 15 | [JsonProperty("jobId")] 16 | public long JobId { get; set; } = 0; 17 | [JsonProperty("clusterId")] 18 | public string ClusterId { get; set; } = ""; 19 | [JsonProperty("sparkVersion")] 20 | public string SparkVersion { get; set; } = ""; 21 | } 22 | } -------------------------------------------------------------------------------- /tests/integration/spark-apps/notebooks/hive+mgd+not+default-in-hive+mgd+not+default-out-insert.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | # MAGIC %sql 3 | # MAGIC CREATE DATABASE IF NOT EXISTS notdefault; 4 | 5 | # COMMAND ---------- 6 | 7 | # MAGIC %sql 8 | # MAGIC CREATE TABLE IF NOT EXISTS notdefault.hiveExampleA ( 9 | # MAGIC tableId INT, 10 | # MAGIC x INT 11 | # MAGIC ); 12 | 13 | # MAGIC CREATE TABLE IF NOT EXISTS notdefault.hiveExampleOutput( 14 | # MAGIC tableId INT, 15 | # MAGIC x INT 16 | # MAGIC ) 17 | 18 | # COMMAND ---------- 19 | 20 | # MAGIC %sql 21 | # MAGIC INSERT INTO notdefault.hiveExampleA (tableId, x) VALUES(1,2) 22 | 23 | # COMMAND ---------- 24 | 25 | # MAGIC %sql 26 | # MAGIC INSERT INTO notdefault.hiveExampleOutput (tableId, x) 27 | # MAGIC SELECT tableId, x 28 | # MAGIC FROM notdefault.hiveExampleA 29 | 30 | # COMMAND ---------- 31 | -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/Settings/ParserCondition.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using Newtonsoft.Json; 5 | 6 | 7 | namespace Function.Domain.Models.Settings 8 | { 9 | public class ParserCondition 10 | { 11 | [JsonProperty("op1")] 12 | public string OlOpRaw = ""; // Dictionary uses this as a key to find type value {prefix, constr, suffix, path, etc.} 13 | [JsonProperty("compare")] 14 | public string Compare = ""; // Supports "=", "!=", ">", "<", "contains" 15 | [JsonProperty("op2")] 16 | public string ValOp2 = ""; // Supports string or int 17 | public ConfigValue Op1 { 18 | get 19 | { 20 | return OlToPurviewMapping.GetConfigValue(OlOpRaw); 21 | } 22 | } 23 | } 24 | } -------------------------------------------------------------------------------- /docs/mappings/adlsg1.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "adlsg1", 3 | "parserConditions": [ 4 | { 5 | "op1": "prefix", 6 | "compare": "=", 7 | "op2": "adl" 8 | }, 9 | { 10 | "op1": "nameSpcBodyParts", 11 | "compare": ">", 12 | "op2": "1" 13 | } 14 | ], 15 | "qualifiedName": "adl://{nameSpcBodyParts[0]}/{nameSpaceBodyJoinedBySlashFrom[1]}/{nameGroups[0]}", 16 | "purviewDataType": "azure_datalake_gen1_path", 17 | "purviewPrefix": "adl" 18 | }, 19 | { 20 | "name": "adlsg1", 21 | "parserConditions": [ 22 | { 23 | "op1": "prefix", 24 | "compare": "=", 25 | "op2": "adl" 26 | } 27 | ], 28 | "qualifiedName": "adl://{nameSpcBodyParts[0]}/{nameGroups[0]}", 29 | "purviewDataType": "azure_datalake_gen1_path", 30 | "purviewPrefix": "adl" 31 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/Purview/DatabricksProcess.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using Newtonsoft.Json; 5 | using System.Collections.Generic; 6 | namespace Function.Domain.Models.Purview 7 | { 8 | public class DatabricksProcess 9 | { 10 | [JsonProperty("typeName")] 11 | public string TypeName = "databricks_process"; 12 | [JsonProperty("attributes")] 13 | public DatabricksProcessAttributes Attributes = new DatabricksProcessAttributes(); 14 | [JsonProperty("relationshipAttributes")] 15 | public DatabricksProcessRelationshipAttributes RelationshipAttributes = new DatabricksProcessRelationshipAttributes(); 16 | [JsonProperty("columnAttributes")] 17 | public List ColumnLevel = new List(); 18 | } 19 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Helpers/HttpHelper.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using System.Threading.Tasks; 5 | using System.Net; 6 | using Microsoft.Azure.Functions.Worker.Http; 7 | 8 | namespace Function.Domain.Helpers 9 | { 10 | public class HttpHelper : IHttpHelper 11 | { 12 | public async Task CreateSuccessfulHttpResponse(HttpRequestData req, object data) 13 | { 14 | var response = req.CreateResponse(HttpStatusCode.OK); 15 | await response.WriteAsJsonAsync(data); 16 | 17 | return response; 18 | } 19 | public HttpResponseData CreateServerErrorHttpResponse(HttpRequestData req) 20 | { 21 | var response = req.CreateResponse(HttpStatusCode.InternalServerError); 22 | 23 | return response; 24 | } 25 | } 26 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/Purview/DatabricksProcessAttributes.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using System.Collections.Generic; 5 | using Newtonsoft.Json; 6 | 7 | namespace Function.Domain.Models.Purview 8 | { 9 | public class DatabricksProcessAttributes 10 | { 11 | [JsonProperty("name")] 12 | public string Name = ""; 13 | [JsonProperty("qualifiedName")] 14 | public string QualifiedName = ""; 15 | [JsonProperty("columnMapping")] 16 | public string ColumnMapping = ""; 17 | [JsonProperty("sparkPlan")] 18 | public string SparkPlan = ""; 19 | [JsonProperty("inputs")] 20 | public List? Inputs = new List(); 21 | [JsonProperty("outputs")] 22 | public List? Outputs = new List(); 23 | } 24 | 25 | } -------------------------------------------------------------------------------- /deployment/infra/settings.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright (c) Microsoft Corporation. 4 | # Licensed under the MIT License. 5 | 6 | # Fill values in here. 7 | 8 | rg="" 9 | prefix="" 10 | clientid="" 11 | clientsecret="" 12 | tenantid="" 13 | purviewlocation="" 14 | 15 | # For Resource Tags use the following format: '{"Name":"Value","Name2":"Value2"}' 16 | 17 | resourcetagtalues="" 18 | 19 | ############################################# 20 | # For purviewlocation please use one of the following region: 21 | # NorthEurope, WestEurope, UKSouth, FranceCentral, EastUS, EastUS2, CentralUS, NorthCentralUS, SouthCentralUS, WestCentralUS 22 | # WestUS, WestUS2, CanadaCentral, BrazilSouth, PacificEastAsia, SoutheastAsia, CentralIndia, JapanEast, KoreaCentral, AustraliaSoutheast 23 | # AustraliaEast, SouthAfricaNorth, UAENorth 24 | ############################################# 25 | -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/OpenLineage/Project.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using System; 5 | using System.Collections.Generic; 6 | 7 | using System.Globalization; 8 | using Newtonsoft.Json; 9 | using Newtonsoft.Json.Converters; 10 | 11 | namespace Function.Domain.Models.OL 12 | { 13 | 14 | public class Project 15 | { 16 | [JsonProperty("class")] 17 | public string ApacheClass { get; set; } = ""; 18 | 19 | [JsonProperty("num-children")] 20 | public long NumChildren { get; set; } 21 | 22 | [JsonProperty("name")] 23 | public string Name { get; set; } = ""; 24 | 25 | [JsonProperty("dataType")] 26 | public string DataType { get; set; } = ""; 27 | 28 | [JsonProperty("qualifier")] 29 | public List Qualifier { get; set; } = new List(); 30 | } 31 | 32 | 33 | 34 | } 35 | -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/OpenLineage/Plan.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using System.Collections.Generic; 5 | using Newtonsoft.Json; 6 | 7 | namespace Function.Domain.Models.OL 8 | { 9 | public class Plan 10 | { 11 | [JsonProperty("class")] 12 | public string ApacheClass { get; set; } = ""; 13 | 14 | [JsonProperty("num-children")] 15 | public int NumChildren { get; set; } 16 | public string outputColumnNames { get; set; } = ""; 17 | 18 | [JsonProperty("projectList", NullValueHandling = NullValueHandling.Ignore)] 19 | public List> projectList { get; set; } = new List>(); 20 | 21 | [JsonProperty("aggregateExpressions", NullValueHandling = NullValueHandling.Ignore)] 22 | public List> aggregateExpressions { get; set; } = new List>(); 23 | } 24 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/Purview/Asset.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using Newtonsoft.Json; 5 | using Newtonsoft.Json.Linq; 6 | using System.Collections.Generic; 7 | using System; 8 | 9 | 10 | namespace Function.Domain.Models.Purview 11 | { 12 | public class Asset 13 | { 14 | public string typeName = ""; 15 | public string lastModifiedTS = "1"; 16 | public string guid = ""; 17 | public string status = "ACTIVE"; 18 | public string createdBy = ""; 19 | public string updatedBy = ""; 20 | public Int64 createTime = 0; 21 | public Int64 updateTime = 0; 22 | public int version = 0; 23 | public Dictionary? sourceDetails; 24 | public Dictionary? relationshipAttributes; 25 | public Dictionary ? attributes; 26 | public string source = ""; 27 | } 28 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/tests/unit-tests/Function.Domain/Services/OlFilterTests.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using Xunit; 5 | using Microsoft.Extensions.Logging.Abstractions; 6 | using Function.Domain.Services; 7 | 8 | namespace UnitTests.Function.Domain.Services 9 | { 10 | public class OlFilterTests{ 11 | 12 | private NullLoggerFactory _mockLoggerFactory; 13 | 14 | public OlFilterTests() 15 | { 16 | _mockLoggerFactory = new NullLoggerFactory(); 17 | } 18 | 19 | [Theory] 20 | [ClassData(typeof(FilterOlEventTestData))] 21 | public void FilterOlEvent_bool_FilterGoodEvents(string msgEvent, bool expectedResult) 22 | { 23 | IOlFilter filterOlEvent = new OlFilter(_mockLoggerFactory); 24 | var rslt = filterOlEvent.FilterOlMessage(msgEvent); 25 | 26 | Xunit.Assert.Equal(expectedResult, rslt); 27 | } 28 | } 29 | } -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # Azure Databricks to Purview Solution Accelerator Documentation 2 | 3 | ## Deployment Instructions 4 | 5 | * [Deploy the Demo](../deploy-demo.md) of the Solution Accelerator. Great for trying out the solution before applying it to your own environment. 6 | * [Deploy the Connector Only](../deploy-base.md) if you already have a Microsoft Purview and Databricks instance and are ready to work with your own notebooks and data sources. 7 | 8 | ## Advanced Use Cases 9 | 10 | * [Troubleshooting](../TROUBLESHOOTING.md) provides solutions to common errors while installing or running the Solution Accelerator 11 | * [Extending Source Support](./extending-source-support.md) to create custom mappings for your data sources. 12 | * [Mappings](./mappings/) provides sample mappings for use in the `OlToPurviewMappings` app setting to enable parsing additional data sources. 13 | * [Advanced Configuration](./configuration.md) 14 | 15 | ## Release Documentation 16 | 17 | * [Release Checklist](./release-checklist.md) 18 | -------------------------------------------------------------------------------- /tests/integration/spark-apps/pythonscript/pythonscript.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "PythonScriptJob", 3 | "new_cluster": { 4 | "num_workers": 1, 5 | "spark_version": "9.1.x-scala2.12", 6 | "spark_conf": { 7 | "spark.openlineage.url.param.code": "{{secrets/purview-to-adb-kv/Ol-Output-Api-Key}}", 8 | "spark.openlineage.host": "https://YOURFUNCTION.azurewebsites.net", 9 | "spark.openlineage.namespace": "adb-123.1#ABC123", 10 | "spark.openlineage.version": "1" 11 | }, 12 | "node_type_id": "Standard_DS3_v2", 13 | "init_scripts": { 14 | "dbfs": { 15 | "destination": "dbfs:/databricks/openlineagehardcoded/release-candidate.sh" 16 | } 17 | } 18 | }, 19 | "libraries": [], 20 | "timeout_seconds": 3600, 21 | "max_retries": 1, 22 | "spark_python_task": { 23 | "python_file": "dbfs:/FileStore/testcases/pythonscript.py", 24 | "parameters": [] 25 | } 26 | } -------------------------------------------------------------------------------- /tests/environment/sources/adx.bicep: -------------------------------------------------------------------------------- 1 | @description('Cluster Name for Azure Data Explorer') 2 | param clusterName string = uniqueString('adx', resourceGroup().id) 3 | 4 | @description('Database Name for Azure Data Explorer Cluster') 5 | param databaseName string = 'database01' 6 | 7 | @description('Location for all resources.') 8 | param location string = resourceGroup().location 9 | 10 | resource symbolicname 'Microsoft.Kusto/clusters@2022-11-11' = { 11 | name: clusterName 12 | location: location 13 | sku: { 14 | capacity: 1 15 | name: 'Dev(No SLA)_Standard_D11_v2' 16 | tier: 'Basic' 17 | } 18 | identity: { 19 | type: 'SystemAssigned' 20 | } 21 | properties: { 22 | enableAutoStop: true 23 | engineType: 'V3' 24 | publicIPType: 'IPv4' 25 | publicNetworkAccess: 'Enabled' 26 | } 27 | resource symbolicname 'databases@2022-11-11' = { 28 | name: databaseName 29 | location: location 30 | kind: 'ReadWrite' 31 | // For remaining properties, see clusters/databases objects 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /tests/integration/search.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | import json 5 | import os 6 | 7 | PURVIEW_NAME = os.environ.get("PURVIEW_INTEGRATION_TARGET", "purview-to-adb-purview") 8 | 9 | from pyapacheatlas.core.client import PurviewClient 10 | from azure.identity import AzureCliCredential 11 | 12 | 13 | cred = AzureCliCredential() 14 | client = PurviewClient( 15 | account_name=PURVIEW_NAME, 16 | authentication=cred 17 | ) 18 | 19 | 20 | resp = client.discovery.search_entities( 21 | "*", search_filter={"or": [ 22 | {"entityType": "spark_process"}, 23 | {"entityType": "spark_application"}, 24 | {"entityType": "databricks_job"}, 25 | {"entityType": "databricks_notebook"}, 26 | {"entityType": "databricks_notebook_task"}, 27 | {"entityType": "databricks_process"}, 28 | ]}) 29 | results = [e["qualifiedName"] for e in resp] 30 | 31 | print(json.dumps(results, indent=2)) 32 | -------------------------------------------------------------------------------- /function-app/adb-to-purview/tests/unit-tests/unit-tests.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | net6.0 5 | unit_tests 6 | enable 7 | 8 | false 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /tests/integration/spark-apps/sparksubmit/sparksubmit.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "SparkSubmitForBasicApp", 3 | "new_cluster": { 4 | "num_workers": 1, 5 | "spark_version": "9.1.x-scala2.12", 6 | "spark_conf": { 7 | "spark.openlineage.url.param.code": "{{secrets/purview-to-adb-kv/Ol-Output-Api-Key}}", 8 | "spark.openlineage.host": "https://YOURFUNCTION.azurewebsites.net", 9 | "spark.openlineage.namespace": "YOURNAMESPACE#JOBNAME", 10 | "spark.openlineage.version": "1" 11 | }, 12 | "node_type_id": "Standard_DS3_v2", 13 | "init_scripts": { 14 | "dbfs": { 15 | "destination": "dbfs:/databricks/openlineagehardcoded/release-candidate.sh" 16 | } 17 | } 18 | }, 19 | "libraries": [], 20 | "timeout_seconds": 3600, 21 | "max_retries": 1, 22 | "spark_submit_task": { 23 | "parameters": [ 24 | "--class", 25 | "SparkApp.ReadWrite.App", 26 | "dbfs:/FileStore/testcases/rwapp.jar" 27 | ] 28 | } 29 | } -------------------------------------------------------------------------------- /docs/powershell-alternatives.md: -------------------------------------------------------------------------------- 1 | # Powershell Alternative Scripts 2 | 3 | In some cases, you're not able to use the cloud shell or you don't have access to a machine that can run wsl / curl. This doc provides alternatives to select 4 | 5 | ## Upload Custom Types 6 | 7 | Assumes you are in the `deployment/infra` folder of the repo. 8 | 9 | ```powershell 10 | $purview_endpoint="https://PURVIEW_ACCOUNT_NAME.purview.azure.com" 11 | $TENANT_ID="TENANT_ID" 12 | $CLIENT_ID="CLIENT_ID" 13 | $CLIENT_SECRET="CLIENT_SECRET" 14 | 15 | $get_token=(Invoke-RestMethod -Method 'Post' -Uri "https://login.microsoftonline.com/$TENANT_ID/oauth2/token" -Body "resource=https://purview.azure.net&client_id=$CLIENT_ID&client_secret=$CLIENT_SECRET&grant_type=client_credentials") 16 | $token=$get_token.access_token 17 | $body=(Get-Content -Path .\Custom_Types.json) 18 | $headers = @{ 19 | 'Content-Type'='application/json' 20 | 'Authorization'= "Bearer $token" 21 | } 22 | 23 | Invoke-RestMethod -Method 'Post' -Uri "$purview_endpoint/catalog/api/atlas/v2/types/typedefs" -Body $body -Headers $headers 24 | 25 | ``` 26 | -------------------------------------------------------------------------------- /tests/integration/spark-apps/notebooks/hive+mnt-in-hive+mnt-out-insert.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | # spark.sparkContext.setLogLevel("DEBUG") 3 | 4 | # COMMAND ---------- 5 | 6 | # MAGIC %sql 7 | # MAGIC CREATE TABLE IF NOT EXISTS default.hiveExampleMnt001 ( 8 | # MAGIC tableId INT, 9 | # MAGIC x INT 10 | # MAGIC ) 11 | # MAGIC LOCATION '/mnt/rawdata/testcase/twentyone/exampleInputA/' 12 | # MAGIC ; 13 | # MAGIC 14 | # MAGIC CREATE TABLE IF NOT EXISTS default.hiveExampleMntOutput001( 15 | # MAGIC tableId INT, 16 | # MAGIC x INT 17 | # MAGIC ) 18 | # MAGIC LOCATION '/mnt/rawdata/testcase/twentyone/exampleOutput/' 19 | # MAGIC ; 20 | 21 | # COMMAND ---------- 22 | 23 | # %sql 24 | # INSERT INTO default.hiveExampleMnt001 (tableId, x) VALUES(1,2) 25 | 26 | # COMMAND ---------- 27 | 28 | # MAGIC %sql 29 | # MAGIC INSERT INTO default.hiveExampleMntOutput001 (tableId, x) 30 | # MAGIC SELECT tableId, x 31 | # MAGIC FROM default.hiveExampleMnt001 32 | 33 | # COMMAND ---------- 34 | 35 | # spark.read.table("default.hiveExampleOutput001").inputFiles() 36 | 37 | # COMMAND ---------- 38 | 39 | 40 | -------------------------------------------------------------------------------- /tests/environment/sources/adlsg2.bicep: -------------------------------------------------------------------------------- 1 | @description('Location of the data factory.') 2 | param location string = resourceGroup().location 3 | 4 | @description('Name of the Azure storage account that contains the input/output data.') 5 | param storageAccountName string = 'storage${uniqueString(resourceGroup().id)}' 6 | 7 | resource storageAccount 'Microsoft.Storage/storageAccounts@2021-08-01' = { 8 | name: storageAccountName 9 | location: location 10 | sku: { 11 | name: 'Standard_LRS' 12 | } 13 | kind: 'StorageV2' 14 | properties:{ 15 | isHnsEnabled: true 16 | } 17 | 18 | } 19 | 20 | resource rawdataContainer 'Microsoft.Storage/storageAccounts/blobServices/containers@2021-08-01' = { 21 | name: '${storageAccount.name}/default/rawdata' 22 | } 23 | 24 | resource writeToRootContainer 'Microsoft.Storage/storageAccounts/blobServices/containers@2021-08-01' = { 25 | name: '${storageAccount.name}/default/writetoroot' 26 | } 27 | 28 | resource outputdataContainer 'Microsoft.Storage/storageAccounts/blobServices/containers@2021-08-01' = { 29 | name: '${storageAccount.name}/default/outputdata' 30 | } 31 | -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Helpers/IPurviewClientHelper.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using System; 5 | using System.Collections.Generic; 6 | using System.Net.Http; 7 | using System.Text; 8 | using System.Threading.Tasks; 9 | using Function.Domain.Models; 10 | using Function.Domain.Models.Purview; 11 | 12 | namespace Function.Domain.Helpers 13 | { 14 | public interface IPurviewClientHelper 15 | { 16 | public Task PostEntitiesToPurview(string correlationId, string token, dynamic batchUpdatePayload, string bulkUpdateEndpoint); 17 | public Task GetEntitiesFromPurview(string correlationId, string qualifiedName, string purviewSearchEndpoint, string token); 18 | public Task DeleteEntityByGuidInPurview(string correlationId, string token, string entityGuid, string purviewDeleteEndPoint); 19 | public Task> GetEntityFromPurview(string correlationId, string qualifiedName, string purviewSearchEndpoint, string token, string typeName); 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/Purview/ColumnLevelAttributes.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using System.Collections.Generic; 5 | using Newtonsoft.Json; 6 | 7 | namespace Function.Domain.Models.Purview 8 | { 9 | //Column level Attribues Model for Purview out 10 | public class ColumnLevelAttributes 11 | { 12 | [JsonProperty("DatasetMapping")] 13 | public DatasetMappingClass datasetMapping = new DatasetMappingClass(); 14 | [JsonProperty("ColumnMapping")] 15 | public List columnMapping = new List(); 16 | 17 | } 18 | 19 | public class DatasetMappingClass 20 | { 21 | [JsonProperty("Source")] 22 | public string source = ""; 23 | [JsonProperty("Sink")] 24 | public string sink = ""; 25 | } 26 | 27 | public class ColumnMappingClass 28 | { 29 | [JsonProperty("Source")] 30 | public string source = ""; 31 | [JsonProperty("Sink")] 32 | public string sink = ""; 33 | } 34 | 35 | 36 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Helpers/parser/IDatabricksToPurviewParser.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using Function.Domain.Models.Purview; 5 | using Function.Domain.Models.Adb; 6 | 7 | namespace Function.Domain.Helpers 8 | { 9 | public interface IDatabricksToPurviewParser 10 | { 11 | public DatabricksWorkspace GetDatabricksWorkspace(); 12 | public DatabricksJob GetDatabricksJob(string workspaceQn); 13 | public DatabricksNotebook GetDatabricksNotebook(string workspaceQn, bool isInteractive); 14 | public DatabricksNotebookTask GetDatabricksNotebookTask(string notebookQn, string workspaceQn); 15 | public DatabricksPythonTask GetDatabricksPythonTask(string jobQn); 16 | public DatabricksPythonWheelTask GetDatabricksPythonWheelTask(string jobQn); 17 | public DatabricksSparkJarTask GetDatabricksSparkJarTask(string jobQn); 18 | public DatabricksProcess GetDatabricksProcess(string taskQn); 19 | public JobType GetJobType(); 20 | public IColParser GetColumnParser(); 21 | } 22 | } -------------------------------------------------------------------------------- /tests/integration/spark-apps/jarjobs/readSampleWriteSample/app/src/main/java/SparkApp/ReadWrite/App.java: -------------------------------------------------------------------------------- 1 | package SparkApp.ReadWrite; 2 | 3 | import org.apache.spark.sql.Dataset; 4 | import org.apache.spark.sql.Row; 5 | import org.apache.spark.sql.SparkSession; 6 | import org.slf4j.Logger; 7 | import org.slf4j.LoggerFactory; 8 | 9 | public class App { 10 | private static final Logger log = LoggerFactory.getLogger("MyLogger"); 11 | 12 | public String getGreeting() { 13 | return "Hello World!"; 14 | } 15 | 16 | public static void main(String[] args) { 17 | 18 | SparkSession spark = SparkSession 19 | .builder() 20 | .appName("readWriteSample") 21 | .getOrCreate(); 22 | System.out.println(new App().getGreeting()); 23 | 24 | 25 | Dataset df = spark.read().format("csv") 26 | .option("header", true) 27 | .option("inferSchema", true) 28 | .load("/mnt/rawdata/testcase/nineteen/exampleInputA"); 29 | 30 | df.repartition(1).write().mode("overwrite").format("csv").save("/mnt/rawdata/testcase/nineteen/output"); 31 | 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /deployment/util/README.md: -------------------------------------------------------------------------------- 1 | # Utilities for Deployment 2 | 3 | ## mappings-remove-spaces 4 | 5 | Used in the Github Action for creating a deployment artifact that is easier to copy / paste or upload into an app setting for Azure Functions. 6 | 7 | ``` 8 | usage: mappings-remove-spaces.py [-h] mappings_json output_path 9 | 10 | positional arguments: 11 | mappings_json File path of the mappings json 12 | ``` 13 | 14 | Sample: 15 | ``` 16 | python ./deployment/util/mappings-remove-spaces.py ./deployment/infra/OlToPurviewMappings.json > test.json 17 | ``` 18 | 19 | ## mappings-update-arm 20 | 21 | Used to update the ARM template in a standardized way 22 | 23 | ``` 24 | usage: mappings-update-arm.py [-h] mappings_json template_file output_path 25 | 26 | positional arguments: 27 | mappings_json File path of the mappings json 28 | template_file File path to the ARM template to be updated 29 | output_path File path to the output 30 | ``` 31 | 32 | Sample: 33 | ``` 34 | python ./deployment/util/mappings-update-arm.py ./deployment/infra/OlToPurviewMappings.json ./deployment/infra/newdeploymenttemp.json ./deployment/infra/newdeploymenttemp.json 35 | ``` 36 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Microsoft Corporation. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE 22 | -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/Settings/NewParserSettings.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using System.Collections.Generic; 5 | using Function.Domain.Models.Purview; 6 | 7 | namespace Function.Domain.Models.Settings 8 | { 9 | public class NewParserSettings 10 | { 11 | public List OlToPurviewMappings = new List(); 12 | } 13 | 14 | public class SettingsItem 15 | { 16 | public List Conditions = new List(); 17 | public PurviewIdentifier Transformation = new PurviewIdentifier(); // used to build identifier 18 | } 19 | 20 | public class Condition 21 | { 22 | public string Op1 = ""; // Dictionary uses this as a key to find type value {prefix, constr, suffix, path, etc.} 23 | public string Op2 = ""; 24 | public string Conditional = ""; // Supports "=" and "!=" 25 | } 26 | 27 | public class PurviewIdentifierMapping 28 | { 29 | public string PurviewType = ""; 30 | public string Prefix = ""; 31 | public string Connection = ""; 32 | public string Path = ""; 33 | } 34 | 35 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Helpers/parser/Exceptions.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using System; 5 | 6 | namespace Function.Domain.Helpers 7 | { 8 | /// 9 | /// Exception thrown when the current run of the function cannot continue due to missing or invalid configuration. 10 | /// 11 | public class ConfigMismatchException : Exception 12 | { 13 | public ConfigMismatchException(string message) 14 | : base(message) { } 15 | 16 | public ConfigMismatchException() { } 17 | 18 | public ConfigMismatchException(string message, Exception inner) 19 | : base(message, inner) { } 20 | 21 | } 22 | /// 23 | /// Exception thrown when the current run of the function cannot continue due to mismatched or missing data values. 24 | /// 25 | public class MissingCriticalDataException : Exception 26 | { 27 | public MissingCriticalDataException(string message) 28 | : base(message) { } 29 | 30 | public MissingCriticalDataException() { } 31 | 32 | public MissingCriticalDataException(string message, Exception inner) 33 | : base(message, inner) { } 34 | } 35 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "0.2.0", 3 | "configurations": [ 4 | 5 | { 6 | "name": "Attach to .NET Functions", 7 | "type": "coreclr", 8 | "request": "attach", 9 | "processId": "${command:azureFunctions.pickProcess}" 10 | }, 11 | { 12 | "name": ".NET Core Launch (console)", 13 | "type": "coreclr", 14 | "request": "launch", 15 | "preLaunchTask": "build", 16 | "program": "${workspaceFolder}/tests/customAsset_test/bin/Debug/net6.0/customAsset_test.exe", 17 | "args": [], 18 | "cwd": "${workspaceFolder}", 19 | "stopAtEntry": false, 20 | "console": "internalConsole" 21 | }, 22 | { 23 | "name": "QualifiedNameConfigTester (console)", 24 | "type": "coreclr", 25 | "request": "launch", 26 | "program": "${workspaceFolder}/tests/tools/QualifiedNameConfigTester/bin/Debug/net6.0/QualifiedNameConfigTester.exe", 27 | "args": [], 28 | "cwd": "${workspaceFolder}/tests/tools/QualifiedNameConfigTester", 29 | "stopAtEntry": false, 30 | "console": "integratedTerminal" 31 | } 32 | ] 33 | } -------------------------------------------------------------------------------- /docs/mappings/README.md: -------------------------------------------------------------------------------- 1 | # Gallery of Mappings 2 | 3 | This directory contains a "gallery" of sample OpenLineage to Purview Mappings that can be used in the `OlToPurviewMappings` App Setting on the solution's Azure Function. 4 | 5 | ## Azure Data Lake Gen 1 6 | 7 | * [ADLS Gen 1 Path](./adlsg1.json) 8 | * Supports mapping the `adl://` path to an ADLS Gen 1 Path in Purview. 9 | * OpenLineage returns a DataSet with `{"namespace":"adl://adblineagetesting.azuredatalakestore.net", "name":"/folder/path"}`. 10 | * Microsoft Purview expects a fully qualified name of `adl://adblineagetesting.azuredatalakestore.net/folder/path` for the `azure_datalake_gen1_path`. 11 | 12 | ## Azure SQL 13 | 14 | * [Prioritize Azure SQL Non DBO](./az-sql.json) 15 | * Default mappings treat an Azure SQL table named `myschema.mytable` as schema of `myschema` and table of `mytable`. 16 | * If you remove the `azureSQLNonDboNoDotsInNames` mapping, the above example would default to `dbo.[myschema.mytable]`. 17 | 18 | ## Snowflake 19 | 20 | * [Snowflake](./snowflake.json) 21 | * Supports mapping Snowflake tables in Purview. 22 | * OpenLineage returns a DataSet with `"namespace":"snowflake://","name":"..` 23 | * Microsoft Purview expects a fully qualified name of `snowflake:///databases//schemas//tables/
` -------------------------------------------------------------------------------- /tests/environment/dbfs/mounts.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | import os 3 | 4 | storage_acct_name = os.environ.get("STORAGE_SERVICE_NAME") 5 | configs = {"fs.azure.account.auth.type": "OAuth", 6 | "fs.azure.account.oauth.provider.type": "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider", 7 | "fs.azure.account.oauth2.client.id": dbutils.secrets.get("purview-to-adb-kv", 'clientIdKey'), 8 | "fs.azure.account.oauth2.client.secret": dbutils.secrets.get("purview-to-adb-kv", 'clientSecretKey'), 9 | "fs.azure.account.oauth2.client.endpoint": f"https://login.microsoftonline.com/{dbutils.secrets.get('purview-to-adb-kv', 'tenant-id')}/oauth2/token"} 10 | 11 | # COMMAND ---------- 12 | 13 | # Optionally, you can add to the source URI of your mount point. 14 | try: 15 | dbutils.fs.mount( 16 | source = f"abfss://rawdata@{storage_acct_name}.dfs.core.windows.net/", 17 | mount_point = "/mnt/rawdata", 18 | extra_configs = configs) 19 | except Exception as e: 20 | print(e) 21 | 22 | # COMMAND ---------- 23 | 24 | try: 25 | dbutils.fs.mount( 26 | source = f"abfss://outputdata@{storage_acct_name}.dfs.core.windows.net/", 27 | mount_point = "/mnt/outputdata", 28 | extra_configs = configs) 29 | except Exception as e: 30 | print(e) 31 | 32 | # COMMAND ---------- 33 | 34 | 35 | -------------------------------------------------------------------------------- /PRIVACY.md: -------------------------------------------------------------------------------- 1 | # Privacy 2 | 3 | When you deploy this template, Microsoft is able to identify the installation of the software with the Azure resources that are deployed. Microsoft is able to correlate the Azure resources that are used to support the software. Microsoft collects this information to provide the best experiences with their products and to operate their business. The data is collected and governed by Microsoft's privacy policies, which can be found at [Microsoft Privacy Statement](https://go.microsoft.com/fwlink/?LinkID=824704). 4 | 5 | To disable this, simply remove the following section from all ARM templates before deploying the resources to Azure: 6 | 7 | ```json 8 | { 9 | "apiVersion": "2018-02-01", 10 | "name": "pid-1e23d6fb-478f-4b04-bfa3-70db11929652", 11 | "type": "Microsoft.Resources/deployments", 12 | "properties": { 13 | "mode": "Incremental", 14 | "template": { 15 | "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#", 16 | "contentVersion": "1.0.0.0", 17 | "resources": [] 18 | } 19 | } 20 | } 21 | ``` 22 | 23 | Information on opt out for specific templates is included in the deployment documentation for that part of the solution. 24 | You can see more information on this at https://docs.microsoft.com/en-us/azure/marketplace/azure-partner-customer-usage-attribution 25 | -------------------------------------------------------------------------------- /deployment/util/mappings-update-arm.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | 4 | if __name__ == "__main__": 5 | parser = argparse.ArgumentParser() 6 | parser.add_argument("mappings_json", help="File path of the mappings json") 7 | parser.add_argument("template_file", help="File path to the ARM template to be updated") 8 | parser.add_argument("output_path", help="File path to the output") 9 | args, unknown_args = parser.parse_known_args() 10 | 11 | with open(args.mappings_json, 'r') as fp: 12 | mappings = json.load(fp) 13 | 14 | with open(args.template_file, 'r') as arm_input: 15 | arm = json.load(arm_input) 16 | 17 | for resource in arm["resources"]: 18 | if resource["type"] != "Microsoft.Web/sites": 19 | continue 20 | 21 | child_resources = resource["resources"] 22 | for child_resource in child_resources: 23 | if child_resource["type"] != "config": 24 | continue 25 | 26 | for setting in child_resource["properties"]["appSettings"]: 27 | if setting["name"] != "OlToPurviewMappings": 28 | continue 29 | setting["value"] = json.dumps(mappings).replace(" ", "") 30 | print("Successfully updated mappings setting") 31 | 32 | 33 | with open(args.output_path, 'w') as output: 34 | json.dump(arm, output, indent="\t") 35 | -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Constants/Constants.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using System.Diagnostics.CodeAnalysis; 5 | 6 | namespace Function.Domain.Constants 7 | { 8 | public struct AuthenticationConstants 9 | { 10 | public const string Audience = "Authentication:Audience"; 11 | public const string Authority = "Authentication:Authority"; 12 | public const string Domain = "Authentication:Domain"; 13 | public const string Bearer = "Bearer"; 14 | 15 | } 16 | public struct PurviewAPIConstants 17 | { 18 | //Purview API Constants 19 | // Setting to 1000 (the max) will force the search scores to be 1 and thus suboptimal search results 20 | // Reducing from 1000 to 100 will enable better search results 21 | public const string DefaultSearchLimit = "100"; 22 | public const string DefaultOffset = "100"; 23 | } 24 | 25 | public struct HttpConstants 26 | { 27 | public const int ClientTimeoutHour = 0; 28 | public const int ClientTimeoutMinutes = 10; 29 | public const int ClientTimeoutSeconds = 0; 30 | public const string ContentType = "Content-Type"; 31 | public const string ContentTypeJson = "application/json"; 32 | } 33 | 34 | public struct ParserConstants 35 | { 36 | public const string DBFS = "dbfs"; 37 | public const string DBFS2 = "file"; 38 | } 39 | 40 | } -------------------------------------------------------------------------------- /tests/integration/spark-apps/notebooks/abfss-in-hive+saveAsTable-out.scala: -------------------------------------------------------------------------------- 1 | // Databricks notebook source 2 | // Seq( 3 | // ("someId001", "Foo", 2, true), 4 | // ("someId002", "Bar", 2, false) 5 | // ).toDF("id","name","age","isAlive") 6 | // .write.format("delta") 7 | // .save(abfssRootPath+"/testcase/abfss-in-hive+saveAsTable-out/exampleInputA/") 8 | 9 | // COMMAND ---------- 10 | 11 | //spark.sparkContext.setLogLevel("DEBUG") 12 | 13 | // COMMAND ---------- 14 | 15 | import org.apache.spark.sql.types.{StructType, StructField, IntegerType, StringType} 16 | 17 | // COMMAND ---------- 18 | 19 | val storageServiceName = sys.env("STORAGE_SERVICE_NAME") 20 | val storageContainerName = "rawdata" 21 | val ouptutContainerName = "outputdata" 22 | val abfssRootPath = "abfss://"+storageContainerName+"@"+storageServiceName+".dfs.core.windows.net" 23 | val outputRootPath = "abfss://"+ouptutContainerName+"@"+storageServiceName+".dfs.core.windows.net" 24 | 25 | val storageKey = dbutils.secrets.get("purview-to-adb-kv", "storage-service-key") 26 | 27 | spark.conf.set("fs.azure.account.key."+storageServiceName+".dfs.core.windows.net", storageKey) 28 | 29 | // COMMAND ---------- 30 | 31 | val exampleA = ( 32 | spark.read.format("delta") 33 | .load(abfssRootPath+"/testcase/abfss-in-hive+saveAsTable-out/exampleInputA/") 34 | ) 35 | 36 | // COMMAND ---------- 37 | 38 | exampleA.write.mode("overwrite").saveAsTable("abfssInHiveSaveAsTableOut") 39 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. e.g. data sources and destination being used 16 | 2. e.g. code snippet to generate the error 17 | 18 | **Expected behavior** 19 | A clear and concise description of what you expected to happen. 20 | 21 | **Logs** 22 | 1. Please include any Spark code being ran that generates this error 23 | 2. Please [include a gist](https://docs.github.com/en/get-started/writing-on-github/editing-and-sharing-content-with-gists/creating-gists) to the OpenLineageIn and PurviewOut logs 24 | 3. See how to stream [Azure Function Logs](https://docs.microsoft.com/en-us/azure/azure-functions/streaming-logs) 25 | 26 | **Screenshots** 27 | If applicable, add screenshots to help explain your problem. 28 | 29 | **Desktop (please complete the following information):** 30 | - OS: [e.g. Windows, Mac] 31 | - OpenLineage Version: [e.g. name of jar] 32 | - Databricks Runtime Version: [e.g. 9.1, 10.1, 11.3] 33 | - Cluster Type: [e.g. Job, Interactive] 34 | - Cluster Mode: [e.g. Standard, High Concurrency, Single] 35 | - Using Credential Passthrough: [e.g. Yes, No] 36 | 37 | 38 | **Additional context** 39 | Add any other context about the problem here. 40 | -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/OpenLineage/ColumnLineageFacets.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using System; 5 | using Newtonsoft.Json; 6 | using Newtonsoft.Json.Linq; 7 | using System.Collections.Generic; 8 | 9 | namespace Function.Domain.Models.OL 10 | { 11 | //Model for gett column data from Openlineae in 12 | [JsonObject("facets")] 13 | public class ColumnLineageFacets 14 | { 15 | [JsonProperty("columnLineage")] 16 | public ColumnLineageFacetsClass LifeCycleStateChange = new ColumnLineageFacetsClass(); 17 | } 18 | 19 | public class ColumnLineageFacetsClass 20 | { 21 | [JsonProperty("fields")] 22 | public Dictionary fields = new Dictionary < string, ColumnLineageInputFieldClass > (); 23 | } 24 | [JsonObject("inputFields")] 25 | public class ColumnLineageInputFieldClass 26 | { 27 | 28 | public List inputFields = new List(); 29 | } 30 | 31 | public class ColumnLineageIdentifierClass 32 | { 33 | [JsonProperty("namespace")] 34 | public string nameSpace { get; set; } = ""; 35 | [JsonProperty("name")] 36 | public string name { get; set; } = ""; 37 | [JsonProperty("field")] 38 | public string field { get; set; } = ""; 39 | } 40 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/Adb/AdbRoot.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using Newtonsoft.Json; 5 | 6 | namespace Function.Domain.Models.Adb 7 | { 8 | public class AdbRoot 9 | { 10 | [JsonProperty("job_id")] 11 | public long JobId = 0; 12 | [JsonProperty("run_id")] 13 | public long RunId = 0; 14 | [JsonProperty("start_time")] 15 | public long StartTime = 0; 16 | [JsonProperty("setup_duration")] 17 | public long SetupDuration = 0; 18 | [JsonProperty("execution_duration")] 19 | public long ExecutionDuration = 0; 20 | [JsonProperty("cleanup_duration")] 21 | public long CleanupDuration = 0; 22 | [JsonProperty("end_time")] 23 | public long EndTime = 0; 24 | [JsonProperty("trigger")] 25 | public string Trigger = ""; 26 | [JsonProperty("creator_user_name")] 27 | public string CreatorUserName = ""; 28 | [JsonProperty("run_name")] 29 | public string RunName = ""; 30 | [JsonProperty("run_page_url")] 31 | public string RunPageUrl = ""; 32 | [JsonProperty("run_type")] 33 | public string RunType = ""; 34 | [JsonProperty("parent_run_id")] 35 | public long ParentRunId = 0; 36 | [JsonProperty("tasks")] 37 | public JobTask[]? JobTasks = null; 38 | 39 | } 40 | } -------------------------------------------------------------------------------- /tests/integration/spark-apps/notebooks/postgres-in-postgres-out.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | host = dbutils.secrets.get("purview-to-adb-kv", "postgres-host") 3 | port = "5432" 4 | dbname = "postgres" 5 | user = dbutils.secrets.get("purview-to-adb-kv", "postgres-admin-user") 6 | password = dbutils.secrets.get("purview-to-adb-kv", "postgres-admin-password") 7 | table_in = "people" # hardcoded based on populate-data-postgres. 8 | table_out = "fruits" 9 | sslmode = "require" 10 | 11 | # COMMAND ---------- 12 | 13 | df = spark.read \ 14 | .format("jdbc") \ 15 | .option("url", f"jdbc:postgresql://{host}:{port}/{dbname}") \ 16 | .option("dbtable", table_in) \ 17 | .option("user", user) \ 18 | .option("password", password) \ 19 | .option("driver", "org.postgresql.Driver") \ 20 | .option("ssl", False) \ 21 | .load() 22 | 23 | # COMMAND ---------- 24 | 25 | df.show() 26 | 27 | # COMMAND ---------- 28 | 29 | df=df.withColumn("age", df.age-100) 30 | 31 | # COMMAND ---------- 32 | 33 | df.show() 34 | 35 | # COMMAND ---------- 36 | 37 | df.write \ 38 | .format("jdbc") \ 39 | .option("url", f"jdbc:postgresql://{host}:{port}/{dbname}") \ 40 | .option("dbtable", table_out) \ 41 | .option("user", user) \ 42 | .option("password", password) \ 43 | .option("driver", "org.postgresql.Driver") \ 44 | .mode("overwrite") \ 45 | .option("ssl", False) \ 46 | .save() 47 | 48 | # COMMAND ---------- 49 | 50 | 51 | -------------------------------------------------------------------------------- /tests/integration/spark-apps/notebooks/name-with-periods.scala: -------------------------------------------------------------------------------- 1 | // Databricks notebook source 2 | import org.apache.spark.sql.types.{StructType, StructField, IntegerType, StringType} 3 | 4 | // COMMAND ---------- 5 | 6 | val storageServiceName = sys.env("STORAGE_SERVICE_NAME") 7 | val storageContainerName = "rawdata" 8 | val ouptutContainerName = "outputdata" 9 | val abfssRootPath = "abfss://"+storageContainerName+"@"+storageServiceName+".dfs.core.windows.net" 10 | val outputRootPath = "abfss://"+ouptutContainerName+"@"+storageServiceName+".dfs.core.windows.net" 11 | 12 | val storageKey = dbutils.secrets.get("purview-to-adb-kv", "storage-service-key") 13 | 14 | spark.conf.set("fs.azure.account.key."+storageServiceName+".dfs.core.windows.net", storageKey) 15 | 16 | // COMMAND ---------- 17 | 18 | val exampleASchema = StructType( 19 | StructField("id", IntegerType, true) :: 20 | StructField("postalCode", StringType, false) :: 21 | StructField("streetAddress", StringType, false) :: Nil) 22 | 23 | val exampleA = ( 24 | spark.read.format("csv") 25 | .schema(exampleASchema) 26 | .option("header", true) 27 | .load(abfssRootPath+"/testcase/namewithperiods/exampleInputA/exampleInputA.csv") 28 | ) 29 | 30 | // COMMAND ---------- 31 | 32 | val outputDf = exampleA.drop(exampleA("id")) 33 | 34 | outputDf.repartition(1).write.mode("overwrite").format("parquet").save(outputRootPath+"/testcase/namewithperiods/badname.parquet") 35 | 36 | // COMMAND ---------- 37 | 38 | 39 | -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Models/Parser/OpenLineage/EnvironmentProps.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using System.Collections.Generic; 5 | using Newtonsoft.Json; 6 | 7 | namespace Function.Domain.Models.OL 8 | { 9 | public class EnvironmentProps 10 | { 11 | [JsonProperty("spark.databricks.clusterUsageTags.clusterName")] 12 | public string SparkDatabricksClusterUsageTagsClusterName = ""; 13 | [JsonProperty("spark.databricks.job.runId")] 14 | public string SparkDatabricksJobRunId = ""; 15 | [JsonProperty("spark.databricks.job.type")] 16 | public string SparkDatabricksJobType = ""; 17 | [JsonProperty("spark.databricks.clusterUsageTags.azureSubscriptionId")] 18 | public string SparkDatabricksClusterUsageTagsAzureSubscriptionId = ""; 19 | [JsonProperty("spark.databricks.notebook.path")] 20 | public string SparkDatabricksNotebookPath = ""; 21 | [JsonProperty("spark.databricks.clusterUsageTags.clusterOwnerOrgId")] 22 | public string SparkDatabricksClusterUsageTagsClusterOwnerOrgId = ""; 23 | 24 | //Other MountPoint Structure 25 | //public Dictionary MountPoints = new Dictionary(); 26 | public List MountPoints = new List(); 27 | public string User = ""; 28 | public string UserId = ""; 29 | public string OrgId = ""; 30 | } 31 | } -------------------------------------------------------------------------------- /tests/integration/spark-apps/notebooks/mysql-in-mysql-out.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | # MAGIC %scala 3 | # MAGIC Class.forName("com.mysql.cj.jdbc.Driver") 4 | 5 | # COMMAND ---------- 6 | 7 | host = dbutils.secrets.get("purview-to-adb-kv", "mysql-hostname") 8 | user = dbutils.secrets.get("purview-to-adb-kv", "mysql-user") 9 | password = dbutils.secrets.get("purview-to-adb-kv", "mysql-password") 10 | database = "mydatabase" # hardcoded based on populate-data-mysql notebook. 11 | table = "people" # hardcoded based on populate-data-mysql notebook. 12 | port = "3306" # update if you use a non-default port 13 | driver = "com.mysql.cj.jdbc.Driver" 14 | 15 | # COMMAND ---------- 16 | 17 | url = f"jdbc:mysql://{host}:{port}/{database}" 18 | 19 | df = (spark.read 20 | .format("jdbc") 21 | .option("driver", driver) 22 | .option("url", url) 23 | .option("dbtable", table) 24 | .option("user", user) 25 | .option("ssl", False) 26 | .option("password", password) 27 | .load() 28 | ) 29 | 30 | # COMMAND ---------- 31 | 32 | df.show() 33 | 34 | # COMMAND ---------- 35 | 36 | df=df.withColumn("age", df.age-100) 37 | 38 | # COMMAND ---------- 39 | 40 | df.show() 41 | 42 | # COMMAND ---------- 43 | 44 | df.write \ 45 | .format("jdbc") \ 46 | .option("driver", driver) \ 47 | .option("url", url) \ 48 | .option("dbtable", "fruits") \ 49 | .option("user", user) \ 50 | .option("ssl", False) \ 51 | .mode("overwrite") \ 52 | .option("password", password) \ 53 | .save() 54 | 55 | # COMMAND ---------- 56 | 57 | 58 | -------------------------------------------------------------------------------- /tests/integration/spark-apps/notebooks/hive+abfss-in-hive+abfss-out-insert.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | import os 3 | 4 | storageServiceName = os.environ.get("STORAGE_SERVICE_NAME") 5 | storageContainerName = "rawdata" 6 | ouptutContainerName = "outputdata" 7 | abfssRootPath = "abfss://"+storageContainerName+"@"+storageServiceName+".dfs.core.windows.net" 8 | outputRootPath = "abfss://"+ouptutContainerName+"@"+storageServiceName+".dfs.core.windows.net" 9 | 10 | storageKey = dbutils.secrets.get("purview-to-adb-kv", "storage-service-key") 11 | 12 | spark.conf.set("fs.azure.account.key."+storageServiceName+".dfs.core.windows.net", storageKey) 13 | spark.conf.set('spark.query.rootPath',abfssRootPath) 14 | spark.conf.set('query.outputPath',outputRootPath) 15 | 16 | # COMMAND ---------- 17 | 18 | spark.sql(f""" 19 | CREATE TABLE IF NOT EXISTS default.testSample ( 20 | tableId INT, 21 | x INT 22 | ) 23 | LOCATION 'abfss://rawdata@{storageServiceName}.dfs.core.windows.net/testcase/twentyone/exampleInputA/' 24 | ; 25 | """ 26 | ) 27 | 28 | # COMMAND ---------- 29 | 30 | spark.sql(f""" 31 | CREATE TABLE IF NOT EXISTS default.hiveExampleOutput001 ( 32 | tableId INT, 33 | x INT 34 | ) 35 | LOCATION 'abfss://rawdata@{storageServiceName}.dfs.core.windows.net/testcase/twentyone/exampleOutput/' 36 | ; 37 | """ 38 | ) 39 | 40 | # COMMAND ---------- 41 | 42 | # MAGIC %sql 43 | # MAGIC INSERT INTO default.hiveExampleOutput001 (tableId, x) 44 | # MAGIC SELECT tableId, x 45 | # MAGIC FROM default.hiveExampleA001 46 | 47 | # COMMAND ---------- 48 | 49 | 50 | -------------------------------------------------------------------------------- /assets/estimator/README.md: -------------------------------------------------------------------------------- 1 | # Estimating Cost of the Azure Databricks to Purview Lineage Connector 2 | 3 | The Excel spreadsheet in this directory provides a loose cost estimate that can be adjusted to your unique workload behaviors. Before running this solution, you should verify pricing is accurate based on your own organization's and primary region's official pricing sheets. 4 | 5 | ## Key Metrics Requiring Input 6 | 7 | The below metrics require customization and research based on your workload's behavior and design. 8 | 9 | * **Notebooks Executed per Hour**: For a given hour, how many notebooks / jobs do you expect to run? 10 | * **Notebooks Running Concurrently**: For that hour, how many notebooks would be running at the same time? 11 | * **Average Spark Job per Notebook**: On average, how many spark jobs occur per notebook? 12 | * This can be observed in the Spark Notebook user interface or aggregated in the Spark UI. 13 | ![Number of Spark Jobs in the Spark Notebooks User Interface](../img/estimator/spark-jobs-in-notebook.png) 14 | * **Average Inputs per Notebook**: How many input sources are you connecting to on average? 15 | * **Average Outputs per Notebook**: How many output sources are you connecting to on average? 16 | * **Average Message Size Per Event (Bytes)**: How large are the messages from OpenLineage to Purview? 17 | * More complex Spark logical plans (e.g. large amounts of transformations) will result in larger messages. 18 | * You can estimate this by recording a sample of OpenLineage messages in the Databricks Driver Logs and calculating the number of bytes used. 19 | -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Program.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using Microsoft.Extensions.Hosting; 5 | using Microsoft.Extensions.Logging; 6 | using Function.Domain.Helpers; 7 | using Function.Domain.Services; 8 | using Microsoft.Extensions.DependencyInjection; 9 | using Function.Domain.Middleware; 10 | 11 | namespace TestFunc 12 | { 13 | public class Program 14 | { 15 | public static void Main() 16 | { 17 | var host = new HostBuilder() 18 | .ConfigureLogging((context, builder) => 19 | { 20 | var key = context.Configuration["APPINSIGHTS_INSTRUMENTATIONKEY"]; 21 | builder.AddApplicationInsights(key); 22 | }) 23 | .ConfigureFunctionsWorkerDefaults(workerApplication => 24 | { 25 | workerApplication.UseMiddleware(); 26 | }) 27 | .ConfigureServices(s => 28 | { 29 | s.AddScoped(); 30 | s.AddScoped(); 31 | s.AddScoped(); 32 | s.AddScoped(); 33 | s.AddScoped(); 34 | }) 35 | .Build(); 36 | host.Run(); 37 | } 38 | } 39 | } -------------------------------------------------------------------------------- /tests/integration/spark-apps/jarjobs/readSampleWriteSample/app/build.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * This file was generated by the Gradle 'init' task. 3 | * 4 | * This generated file contains a sample Java application project to get you started. 5 | * For more details take a look at the 'Building Java & JVM projects' chapter in the Gradle 6 | * User Manual available at https://docs.gradle.org/6.9.1/userguide/building_java_projects.html 7 | */ 8 | 9 | plugins { 10 | // Apply the java-library plugin for API and implementation separation. 11 | id 'application' 12 | id 'java' 13 | } 14 | 15 | ext { 16 | sparkVersion = '3.1.2' 17 | scalaVersion = '2.12' 18 | } 19 | 20 | repositories { 21 | // Use Maven Central for resolving dependencies. 22 | mavenCentral() 23 | } 24 | 25 | java { 26 | toolchain { 27 | languageVersion = JavaLanguageVersion.of(8) 28 | } 29 | 30 | } 31 | archivesBaseName="rwapp" 32 | 33 | dependencies { 34 | // Use JUnit test framework. 35 | testImplementation 'junit:junit:4.13' 36 | 37 | implementation "org.apache.spark:spark-core_$scalaVersion:$sparkVersion" 38 | implementation "org.apache.spark:spark-sql_$scalaVersion:$sparkVersion" 39 | 40 | // https://mvnrepository.com/artifact/org.slf4j/slf4j-api 41 | implementation group: 'org.slf4j', name: 'slf4j-api', version: '1.7.32' 42 | // https://mvnrepository.com/artifact/org.slf4j/slf4j-simple 43 | testImplementation group: 'org.slf4j', name: 'slf4j-simple', version: '1.7.32' 44 | } 45 | 46 | application { 47 | // Define the main class for the application. 48 | mainClass = 'SparkApp.ReadWrite.App' 49 | } 50 | -------------------------------------------------------------------------------- /tests/integration/jobdefs-inactive/spark2-tests-expectations.json: -------------------------------------------------------------------------------- 1 | [ 2 | "databricks://.azuredatabricks.net/jobs/", 3 | "databricks://.azuredatabricks.net/jobs//tasks/spark2-abfss-in-abfss-out", 4 | "databricks://.azuredatabricks.net/jobs//tasks/spark2-abfss-in-abfss-out/processes/58C1F24BA6C6FF7592F786C9FA8A3451->BA6B11F82FDCE37E849D25D545E6FB7A", 5 | "databricks://.azuredatabricks.net/notebooks/Shared/examples/abfss-in-abfss-out", 6 | "databricks://.azuredatabricks.net/jobs//tasks/spark2-abfss-oauth", 7 | "databricks://.azuredatabricks.net/jobs//tasks/spark2-abfss-oauth/processes/BD4A7A895E605BF6C4DE003D3F6B3F39->A3B52DA733083E4642E1C3DB6B093E84", 8 | "databricks://.azuredatabricks.net/notebooks/Shared/examples/abfss-in-abfss-out-oauth", 9 | "databricks://.azuredatabricks.net/jobs//tasks/spark2-mnt", 10 | "databricks://.azuredatabricks.net/jobs//tasks/spark2-mnt/processes/336D6FD3010382DAB8351BFF026B2CBE->C60C4BAB82567905C64B99E2DCBCA711", 11 | "databricks://.azuredatabricks.net/notebooks/Shared/examples/mnt-in-mnt-out", 12 | "databricks://.azuredatabricks.net/jobs//tasks/spark2-Synapse-wasbs-synapse", 13 | "databricks://.azuredatabricks.net/jobs//tasks/spark2-Synapse-wasbs-synapse/processes/B596CF432EE21C0349CD0770BC839867->F1AD7C08349CD0A30B47392F787D6364", 14 | "databricks://.azuredatabricks.net/notebooks/Shared/examples/synapse-wasbs-in-synapse-out" 15 | ] -------------------------------------------------------------------------------- /tests/integration/jobdefs/pythonscript-test-def.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "PythonScriptJob", 3 | "email_notifications": {}, 4 | "max_concurrent_runs": 1, 5 | "tasks": [ 6 | { 7 | "task_key": "PythonScriptJob", 8 | "spark_python_task": { 9 | "python_file": "dbfs:/FileStore/testcases/pythonscript.py" 10 | }, 11 | "new_cluster": { 12 | "spark_version": "9.1.x-scala2.12", 13 | "spark_conf": { 14 | "spark.openlineage.url.param.code": "{{secrets/purview-to-adb-kv/Ol-Output-Api-Key}}", 15 | "spark.openlineage.host": "https://.azurewebsites.net", 16 | "spark.openlineage.namespace": "#ABC123", 17 | "spark.openlineage.version": "v1" 18 | }, 19 | "node_type_id": "Standard_DS3_v2", 20 | "enable_elastic_disk": true, 21 | "init_scripts": [ 22 | { 23 | "dbfs": { 24 | "destination": "dbfs:/databricks/openlineage/open-lineage-init-script.sh" 25 | } 26 | } 27 | ], 28 | "azure_attributes": { 29 | "availability": "ON_DEMAND_AZURE" 30 | }, 31 | "num_workers": 1 32 | }, 33 | "max_retries": 1, 34 | "min_retry_interval_millis": 0, 35 | "retry_on_timeout": false, 36 | "timeout_seconds": 3600 37 | } 38 | ], 39 | "format": "MULTI_TASK" 40 | } -------------------------------------------------------------------------------- /tests/environment/dbfs/create-job.py: -------------------------------------------------------------------------------- 1 | # https://learn.microsoft.com/en-us/azure/databricks/dev-tools/api/latest/workspace#--import 2 | import argparse 3 | import configparser 4 | import json 5 | import os 6 | 7 | import requests 8 | 9 | 10 | if __name__ == "__main__": 11 | parser = argparse.ArgumentParser() 12 | parser.add_argument("--folder", default="./tests/integration/jobdefs") 13 | parser.add_argument("--ini", default="./tests/environment/config.ini") 14 | args = parser.parse_args() 15 | 16 | cfp = configparser.ConfigParser() 17 | 18 | cfp.read(args.ini) 19 | db_host_id = cfp["DEFAULT"]["databricks_workspace_host_id"] 20 | db_pat = cfp["DEFAULT"]["databricks_personal_access_token"] 21 | 22 | JOB_URL = f"https://{db_host_id}.azuredatabricks.net/api/2.1/jobs/create" 23 | for job_def in os.listdir(args.folder): 24 | if not job_def.endswith("-def.json"): 25 | continue 26 | 27 | print(job_def) 28 | with open(os.path.join(args.folder, job_def), 'r') as fp: 29 | job_json = json.load(fp) 30 | 31 | job_str = json.dumps(job_json) 32 | if job_def.startswith("spark2"): 33 | job_str = job_str.replace("", cfp["DEFAULT"]["databricks_spark2_cluster"]) 34 | else: 35 | job_str = job_str.replace("", cfp["DEFAULT"]["databricks_spark3_cluster"]) 36 | 37 | job_json_to_submit = json.loads(job_str) 38 | 39 | resp = requests.post( 40 | url=JOB_URL, 41 | json=job_json_to_submit, 42 | headers={ 43 | "Authorization": f"Bearer {db_pat}" 44 | } 45 | ) 46 | print(resp.content) 47 | 48 | 49 | -------------------------------------------------------------------------------- /docs/mappings/az-sql.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "azureSQLNonDbo", 4 | "parserConditions": [ 5 | { 6 | "op1": "prefix", 7 | "compare": "=", 8 | "op2": "sqlserver" 9 | }, 10 | { 11 | "op1": "nameGroups", 12 | "compare": ">", 13 | "op2": "1" 14 | } 15 | ], 16 | "qualifiedName": "mssql://{nameSpcBodyParts[0]}/{nameSpcNameVals['database']}/{nameGroups[0]}/{nameGroups[1]}", 17 | "purviewDataType": "azure_sql_table", 18 | "purviewPrefix": "mssql" 19 | }, 20 | { 21 | "name": "azureSQLNonDboNoDotsInNames", 22 | "parserConditions": [ 23 | { 24 | "op1": "prefix", 25 | "compare": "=", 26 | "op2": "sqlserver" 27 | }, 28 | { 29 | "op1": "nameGroups[0].parts", 30 | "compare": ">", 31 | "op2": "1" 32 | } 33 | ], 34 | "qualifiedName": "mssql://{nameSpcBodyParts[0]}/{nameSpcNameVals['database']}/{nameGroups[0].parts[0]}/{nameGroups[0].parts[1]}", 35 | "purviewDataType": "azure_sql_table", 36 | "purviewPrefix": "mssql" 37 | }, 38 | { 39 | "name": "azureSQL", 40 | "parserConditions": [ 41 | { 42 | "op1": "prefix", 43 | "compare": "=", 44 | "op2": "sqlserver" 45 | } 46 | ], 47 | "qualifiedName": "mssql://{nameSpcBodyParts[0]}/{nameSpcNameVals['database']}/dbo/{nameGroups[0]}", 48 | "purviewDataType": "azure_sql_table", 49 | "purviewPrefix": "mssql" 50 | } 51 | ] -------------------------------------------------------------------------------- /tests/deployment/test_arm_mapping_matches_json.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | import json 5 | 6 | if __name__ == "__main__": 7 | """ 8 | Confirm that the OlToPurviewMappings.json matches the arm template app settings 9 | """ 10 | ARM_AND_OlToPurviewMappings_MATCHES = False 11 | 12 | with open('./deployment\infra\OlToPurviewMappings.json', 'r') as mapping: 13 | mapping_json = json.load(mapping) 14 | 15 | with open('./deployment/infra/newdeploymenttemp.json', 'r') as arm: 16 | arm_body = json.load(arm) 17 | 18 | for resource in arm_body.get("resources", []): 19 | if resource["type"] != "Microsoft.Web/sites": 20 | continue 21 | 22 | if resource["name"] != "[variables('functionAppName')]": 23 | continue 24 | 25 | web_config={} 26 | for child_resource in resource.get("resources", []): 27 | if child_resource.get("type") == "config" and child_resource.get("name") == "web": 28 | web_config = child_resource 29 | break 30 | 31 | app_settings = web_config.get("properties", {}).get("appSettings", []) 32 | for setting in app_settings: 33 | if setting["name"] != "OlToPurviewMappings": 34 | continue 35 | 36 | arm_mappings_value = json.loads(setting["value"]) 37 | 38 | if arm_mappings_value == mapping_json: 39 | ARM_AND_OlToPurviewMappings_MATCHES = True 40 | break 41 | 42 | if ARM_AND_OlToPurviewMappings_MATCHES == True: 43 | break 44 | 45 | print(ARM_AND_OlToPurviewMappings_MATCHES) 46 | if ARM_AND_OlToPurviewMappings_MATCHES == False: 47 | exit(1) 48 | -------------------------------------------------------------------------------- /tests/integration/spark-apps/jarjobs/abfssInAbfssOut/app/build.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * This file was generated by the Gradle 'init' task. 3 | * 4 | * This generated file contains a sample Java application project to get you started. 5 | * For more details take a look at the 'Building Java & JVM projects' chapter in the Gradle 6 | * User Manual available at https://docs.gradle.org/6.9.1/userguide/building_java_projects.html 7 | */ 8 | 9 | plugins { 10 | // Apply the java-library plugin for API and implementation separation. 11 | id 'application' 12 | id 'java' 13 | } 14 | 15 | ext { 16 | sparkVersion = '3.2.0' 17 | scalaVersion = '2.12' 18 | } 19 | 20 | repositories { 21 | // Use Maven Central for resolving dependencies. 22 | mavenCentral() 23 | } 24 | 25 | java { 26 | toolchain { 27 | languageVersion = JavaLanguageVersion.of(8) 28 | } 29 | } 30 | 31 | 32 | dependencies { 33 | // Use JUnit test framework. 34 | testImplementation 'junit:junit:4.13' 35 | 36 | implementation "org.apache.spark:spark-core_$scalaVersion:$sparkVersion" 37 | implementation "org.apache.spark:spark-sql_$scalaVersion:$sparkVersion" 38 | 39 | // https://mvnrepository.com/artifact/com.databricks/dbutils-api 40 | implementation group: 'com.databricks', name: 'dbutils-api_2.12', version: '0.0.5' 41 | 42 | // https://mvnrepository.com/artifact/org.slf4j/slf4j-api 43 | implementation group: 'org.slf4j', name: 'slf4j-api', version: '1.7.32' 44 | // https://mvnrepository.com/artifact/org.slf4j/slf4j-simple 45 | testImplementation group: 'org.slf4j', name: 'slf4j-simple', version: '1.7.32' 46 | } 47 | 48 | application { 49 | // Define the main class for the application. 50 | mainClass = 'SparkApp.Basic.App' 51 | } 52 | -------------------------------------------------------------------------------- /tests/integration/spark-apps/notebooks/wasbs-in-wasbs-out.scala: -------------------------------------------------------------------------------- 1 | // Databricks notebook source 2 | import org.apache.spark.sql.types.{StructType, StructField, IntegerType, StringType} 3 | 4 | // COMMAND ---------- 5 | 6 | val storageServiceName = sys.env("STORAGE_SERVICE_NAME") 7 | val storageContainerName = "rawdata" 8 | val wasbsRootPath = "wasbs://"+storageContainerName+"@"+storageServiceName+".blob.core.windows.net" 9 | 10 | val storageKey = dbutils.secrets.get("purview-to-adb-kv", "storage-service-key") 11 | 12 | spark.conf.set("fs.azure.account.key."+storageServiceName+".blob.core.windows.net", storageKey) 13 | 14 | // COMMAND ---------- 15 | 16 | val exampleASchema = StructType( 17 | StructField("id", IntegerType, true) :: 18 | StructField("postalCode", StringType, false) :: 19 | StructField("streetAddress", StringType, false) :: Nil) 20 | 21 | val exampleA = ( 22 | spark.read.format("csv") 23 | .schema(exampleASchema) 24 | .option("header", true) 25 | .load(wasbsRootPath+"/testcase/wasinwasout/exampleInputA/") 26 | ) 27 | 28 | 29 | val exampleBSchema = StructType( 30 | StructField("id", IntegerType, true) :: 31 | StructField("city", StringType, false) :: 32 | StructField("stateAbbreviation", StringType, false) :: Nil) 33 | 34 | val exampleB = ( 35 | spark.read.format("csv") 36 | .schema(exampleBSchema) 37 | .option("header", true) 38 | .load(wasbsRootPath+"/testcase/wasinwasout/exampleInputB/") 39 | ) 40 | 41 | // COMMAND ---------- 42 | 43 | val outputDf = exampleA.join(exampleB, exampleA("id") === exampleB("id"), "inner").drop(exampleB("id")) 44 | 45 | outputDf.repartition(1).write.mode("overwrite").format("csv").save(wasbsRootPath+"/testcase/wasinwasout/exampleOutputWASBS/") 46 | 47 | // COMMAND ---------- 48 | 49 | -------------------------------------------------------------------------------- /tests/integration/spark-apps/notebooks/mnt-in-mnt-out.scala: -------------------------------------------------------------------------------- 1 | // Databricks notebook source 2 | import org.apache.spark.sql.types.{StructType, StructField, IntegerType, StringType} 3 | 4 | // COMMAND ---------- 5 | 6 | val storageServiceName = sys.env("STORAGE_SERVICE_NAME") 7 | val storageContainerName = "rawdata" 8 | val abfssRootPath = "abfss://"+storageContainerName+"@"+storageServiceName+".dfs.core.windows.net" 9 | 10 | val storageKey = dbutils.secrets.get("purview-to-adb-kv", "storage-service-key") 11 | 12 | spark.conf.set("fs.azure.account.key."+storageServiceName+".dfs.core.windows.net", storageKey) 13 | 14 | // COMMAND ---------- 15 | 16 | val exampleASchema = StructType( 17 | StructField("id", IntegerType, true) :: 18 | StructField("postalCode", StringType, false) :: 19 | StructField("streetAddress", StringType, false) :: Nil) 20 | 21 | val exampleA = ( 22 | spark.read.format("csv") 23 | .schema(exampleASchema) 24 | .option("header", true) 25 | .load("/mnt/rawdata/testcase/seven/exampleInputA/exampleInputA.csv") 26 | ) 27 | 28 | 29 | val exampleBSchema = StructType( 30 | StructField("id", IntegerType, true) :: 31 | StructField("city", StringType, false) :: 32 | StructField("stateAbbreviation", StringType, false) :: Nil) 33 | 34 | val exampleB = ( 35 | spark.read.format("csv") 36 | .schema(exampleBSchema) 37 | .option("header", true) 38 | .load("/mnt/rawdata/testcase/seven/exampleInputB/exampleInputB.csv") 39 | ) 40 | 41 | // COMMAND ---------- 42 | 43 | val outputDf = exampleA.join(exampleB, exampleA("id") === exampleB("id"), "inner").drop(exampleB("id")) 44 | 45 | outputDf.repartition(1).write.mode("overwrite").format("csv").save("/mnt/outputdata/testcase/seven/mnt-in-mnt-out-folder/") 46 | 47 | // COMMAND ---------- 48 | 49 | -------------------------------------------------------------------------------- /tests/integration/spark-apps/notebooks/nested-child.scala: -------------------------------------------------------------------------------- 1 | // Databricks notebook source 2 | import org.apache.spark.sql.types.{StructType, StructField, IntegerType, StringType} 3 | 4 | // COMMAND ---------- 5 | 6 | val storageServiceName = sys.env("STORAGE_SERVICE_NAME") 7 | val storageContainerName = "rawdata" 8 | val ouptutContainerName = "outputdata" 9 | val abfssRootPath = "abfss://"+storageContainerName+"@"+storageServiceName+".dfs.core.windows.net" 10 | val outputRootPath = "abfss://"+ouptutContainerName+"@"+storageServiceName+".dfs.core.windows.net" 11 | 12 | val storageKey = dbutils.secrets.get("purview-to-adb-kv", "storage-service-key") 13 | 14 | spark.conf.set("fs.azure.account.key."+storageServiceName+".dfs.core.windows.net", storageKey) 15 | 16 | // COMMAND ---------- 17 | 18 | val exampleASchema = StructType( 19 | StructField("id", IntegerType, true) :: 20 | StructField("postalCode", StringType, false) :: 21 | StructField("streetAddress", StringType, false) :: Nil) 22 | 23 | val exampleA = ( 24 | spark.read.format("csv") 25 | .schema(exampleASchema) 26 | .option("header", true) 27 | .load(abfssRootPath+"/testcase/eight/exampleInputA/exampleInputA.csv") 28 | ) 29 | 30 | 31 | val exampleBSchema = StructType( 32 | StructField("id", IntegerType, true) :: 33 | StructField("city", StringType, false) :: 34 | StructField("stateAbbreviation", StringType, false) :: Nil) 35 | 36 | val exampleB = ( 37 | spark.read.format("csv") 38 | .schema(exampleBSchema) 39 | .option("header", true) 40 | .load(abfssRootPath+"/testcase/eight/exampleInputB/exampleInputB.csv") 41 | ) 42 | 43 | // COMMAND ---------- 44 | 45 | val outputDf = exampleA.join(exampleB, exampleA("id") === exampleB("id"), "inner").drop(exampleB("id")) 46 | 47 | // COMMAND ---------- 48 | 49 | -------------------------------------------------------------------------------- /tests/integration/spark-apps/notebooks/delta-in-delta-out-abfss.scala: -------------------------------------------------------------------------------- 1 | // Databricks notebook source 2 | val storageServiceName = sys.env("STORAGE_SERVICE_NAME") 3 | val storageContainerName = "rawdata" 4 | val ouptutContainerName = "outputdata" 5 | val abfssRootPath = "abfss://"+storageContainerName+"@"+storageServiceName+".dfs.core.windows.net" 6 | val outputRootPath = "abfss://"+ouptutContainerName+"@"+storageServiceName+".dfs.core.windows.net" 7 | 8 | val storageKey = dbutils.secrets.get("purview-to-adb-kv", "storage-service-key") 9 | 10 | spark.conf.set("fs.azure.account.auth.type."+storageServiceName+".dfs.core.windows.net", "OAuth") 11 | spark.conf.set("fs.azure.account.oauth.provider.type."+storageServiceName+".dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider") 12 | spark.conf.set("fs.azure.account.oauth2.client.id."+storageServiceName+".dfs.core.windows.net", dbutils.secrets.get("purview-to-adb-kv", "clientIdKey")) 13 | spark.conf.set("fs.azure.account.oauth2.client.secret."+storageServiceName+".dfs.core.windows.net", dbutils.secrets.get("purview-to-adb-kv", "clientSecretKey")) 14 | spark.conf.set("fs.azure.account.oauth2.client.endpoint."+storageServiceName+".dfs.core.windows.net", "https://login.microsoftonline.com/"+dbutils.secrets.get("purview-to-adb-kv", "tenant-id")+"/oauth2/token") 15 | 16 | // COMMAND ---------- 17 | 18 | val exampleA = ( 19 | spark.read.format("delta") 20 | .load(abfssRootPath+"/testcase/four/exampleInputA") 21 | ) 22 | 23 | val exampleB = ( 24 | spark.read.format("delta") 25 | .load(abfssRootPath+"/testcase/four/exampleInputB") 26 | ) 27 | 28 | // COMMAND ---------- 29 | 30 | val outputDf = exampleA.join(exampleB, exampleA("id") === exampleB("id"), "inner").drop(exampleB("id")) 31 | 32 | //DOESN'T WORK 33 | outputDf.write.format("delta").mode("append").save(outputRootPath+"/testcase/four/exampleOutput/") -------------------------------------------------------------------------------- /deployment/infra/abfss-in-abfss-out-olsample.scala: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | // Databricks notebook source 5 | import org.apache.spark.sql.types.{StructType, StructField, IntegerType, StringType} 6 | 7 | // COMMAND ---------- 8 | 9 | val storageServiceName = spark.conf.get("spark.openlineage.samplestorageaccount") 10 | val storageContainerName = spark.conf.get("spark.openlineage.samplestoragecontainer") 11 | val adlsRootPath = "wasbs://"+storageContainerName+"@"+storageServiceName+".blob.core.windows.net" 12 | 13 | val storageKey = dbutils.secrets.get("purview-to-adb-kv", "storageAccessKey") 14 | 15 | spark.conf.set("fs.azure.account.key."+storageServiceName+".blob.core.windows.net", storageKey) 16 | 17 | 18 | // COMMAND ---------- 19 | 20 | val exampleASchema = StructType( 21 | StructField("id", IntegerType, true) :: 22 | StructField("postalCode", StringType, false) :: 23 | StructField("streetAddress", StringType, false) :: Nil) 24 | 25 | val exampleA = ( 26 | spark.read.format("csv") 27 | .schema(exampleASchema) 28 | .option("header", true) 29 | .load(adlsRootPath+"/examples/data/csv/exampleInputA/exampleInputA.csv") 30 | ) 31 | 32 | 33 | val exampleBSchema = StructType( 34 | StructField("id", IntegerType, true) :: 35 | StructField("city", StringType, false) :: 36 | StructField("stateAbbreviation", StringType, false) :: Nil) 37 | 38 | val exampleB = ( 39 | spark.read.format("csv") 40 | .schema(exampleBSchema) 41 | .option("header", true) 42 | .load(adlsRootPath+"/examples/data/csv/exampleInputB/exampleInputB.csv") 43 | ) 44 | 45 | // COMMAND ---------- 46 | 47 | val outputDf = exampleA.join(exampleB, exampleA("id") === exampleB("id"), "inner").drop(exampleB("id")) 48 | 49 | outputDf.repartition(1).write.mode("overwrite").format("csv").save(adlsRootPath+"/examples/data/csv/exampleOutput/") 50 | -------------------------------------------------------------------------------- /tests/integration/spark-apps/notebooks/abfss-in-abfss-out-root.scala: -------------------------------------------------------------------------------- 1 | // Databricks notebook source 2 | import org.apache.spark.sql.types.{StructType, StructField, IntegerType, StringType} 3 | 4 | // COMMAND ---------- 5 | 6 | val storageServiceName = sys.env("STORAGE_SERVICE_NAME") 7 | val storageContainerName = "rawdata" 8 | val ouptutContainerName = "writetoroot" 9 | val abfssRootPath = "abfss://"+storageContainerName+"@"+storageServiceName+".dfs.core.windows.net" 10 | val outputAbfssRootPath = "abfss://"+ouptutContainerName+"@"+storageServiceName+".dfs.core.windows.net/root" 11 | 12 | val storageKey = dbutils.secrets.get("purview-to-adb-kv", "storage-service-key") 13 | 14 | spark.conf.set("fs.azure.account.key."+storageServiceName+".dfs.core.windows.net", storageKey) 15 | 16 | // COMMAND ---------- 17 | 18 | val exampleASchema = StructType( 19 | StructField("id", IntegerType, true) :: 20 | StructField("postalCode", StringType, false) :: 21 | StructField("streetAddress", StringType, false) :: Nil) 22 | 23 | val exampleA = ( 24 | spark.read.format("csv") 25 | .schema(exampleASchema) 26 | .option("header", true) 27 | .load(abfssRootPath+"/testcase/three/exampleInputA/exampleInputA.csv") 28 | ) 29 | 30 | 31 | val exampleBSchema = StructType( 32 | StructField("id", IntegerType, true) :: 33 | StructField("city", StringType, false) :: 34 | StructField("stateAbbreviation", StringType, false) :: Nil) 35 | 36 | val exampleB = ( 37 | spark.read.format("csv") 38 | .schema(exampleBSchema) 39 | .option("header", true) 40 | .load(abfssRootPath+"/testcase/three/exampleInputB/exampleInputB.csv") 41 | ) 42 | 43 | // COMMAND ---------- 44 | 45 | val outputDf = exampleA.join(exampleB, exampleA("id") === exampleB("id"), "inner").drop(exampleB("id")) 46 | 47 | outputDf.repartition(1).write.mode("overwrite").format("csv").save(outputAbfssRootPath) 48 | 49 | // COMMAND ---------- 50 | 51 | -------------------------------------------------------------------------------- /deployment/infra/Custom_Types.json: -------------------------------------------------------------------------------- 1 | { 2 | "entityDefs": [ 3 | { 4 | "category": "ENTITY", 5 | "name": "purview_custom_connector_generic_column", 6 | "description": "purview_custom_connector_generic_column", 7 | "serviceType": "Purview Custom Connector", 8 | "options": { 9 | "schemaAttributes": "data_type" 10 | }, 11 | "attributeDefs": [ 12 | { 13 | "name": "data_type", 14 | "typeName": "string", 15 | "isOptional": false, 16 | "cardinality": "SINGLE", 17 | "valuesMinCount": 1, 18 | "valuesMaxCount": 1, 19 | "isUnique": false, 20 | "isIndexable": false, 21 | "includeInNotification": false 22 | } 23 | ], 24 | "superTypes": [ 25 | "DataSet" 26 | ], 27 | "subTypes": [], 28 | "relationshipAttributeDefs": [] 29 | }, 30 | { 31 | "category": "ENTITY", 32 | "name": "purview_custom_connector_generic_entity_with_columns", 33 | "description": "purview_custom_connector_generic_entity_with_columns", 34 | "serviceType": "Purview Custom Connector", 35 | "options": { 36 | "schemaElementsAttribute": "columns" 37 | }, 38 | "attributeDefs": [ 39 | { 40 | "name": "purview_qualifiedName", 41 | "typeName": "string", 42 | "isOptional": true, 43 | "cardinality": "SINGLE", 44 | "valuesMinCount": 0, 45 | "valuesMaxCount": 1, 46 | "isUnique": false, 47 | "isIndexable": false, 48 | "includeInNotification": false 49 | }, 50 | { 51 | "name": "original_source", 52 | "typeName": "string", 53 | "isOptional": true, 54 | "cardinality": "SINGLE", 55 | "valuesMinCount": 0, 56 | "valuesMaxCount": 1, 57 | "isUnique": false, 58 | "isIndexable": false, 59 | "includeInNotification": false 60 | } 61 | ], 62 | "superTypes": [ 63 | "DataSet" 64 | ], 65 | "subTypes": [], 66 | "relationshipAttributeDefs": [] 67 | } 68 | ] 69 | } -------------------------------------------------------------------------------- /tests/integration/spark-apps/notebooks/abfss-in-abfss-out.scala: -------------------------------------------------------------------------------- 1 | // Databricks notebook source 2 | import org.apache.spark.sql.types.{StructType, StructField, IntegerType, StringType} 3 | 4 | // COMMAND ---------- 5 | 6 | val storageServiceName = sys.env("STORAGE_SERVICE_NAME") 7 | val storageContainerName = "rawdata" 8 | val ouptutContainerName = "outputdata" 9 | val abfssRootPath = "abfss://"+storageContainerName+"@"+storageServiceName+".dfs.core.windows.net" 10 | val outputRootPath = "abfss://"+ouptutContainerName+"@"+storageServiceName+".dfs.core.windows.net" 11 | 12 | val storageKey = dbutils.secrets.get("purview-to-adb-kv", "storage-service-key") 13 | 14 | spark.conf.set("fs.azure.account.key."+storageServiceName+".dfs.core.windows.net", storageKey) 15 | 16 | // COMMAND ---------- 17 | 18 | val exampleASchema = StructType( 19 | StructField("id", IntegerType, true) :: 20 | StructField("postalCode", StringType, false) :: 21 | StructField("streetAddress", StringType, false) :: Nil) 22 | 23 | val exampleA = ( 24 | spark.read.format("csv") 25 | .schema(exampleASchema) 26 | .option("header", true) 27 | .load(abfssRootPath+"/testcase/one/exampleInputA/exampleInputA.csv") 28 | ) 29 | 30 | 31 | val exampleBSchema = StructType( 32 | StructField("id", IntegerType, true) :: 33 | StructField("city", StringType, false) :: 34 | StructField("stateAbbreviation", StringType, false) :: Nil) 35 | 36 | val exampleB = ( 37 | spark.read.format("csv") 38 | .schema(exampleBSchema) 39 | .option("header", true) 40 | .load(abfssRootPath+"/testcase/one/exampleInputB/exampleInputB.csv") 41 | ) 42 | 43 | // COMMAND ---------- 44 | 45 | val outputDf = exampleA.join(exampleB, exampleA("id") === exampleB("id"), "inner").drop(exampleB("id")) 46 | 47 | outputDf.repartition(1).write.mode("overwrite").format("csv").save(outputRootPath+"/testcase/one/abfss-in-abfss-out-folder/") 48 | 49 | // COMMAND ---------- 50 | 51 | -------------------------------------------------------------------------------- /tests/integration/spark-apps/notebooks/call-via-adf-spark2.scala: -------------------------------------------------------------------------------- 1 | // Databricks notebook source 2 | import org.apache.spark.sql.types.{StructType, StructField, IntegerType, StringType} 3 | 4 | // COMMAND ---------- 5 | 6 | val storageServiceName = sys.env("STORAGE_SERVICE_NAME") 7 | val storageContainerName = "rawdata" 8 | val ouptutContainerName = "outputdata" 9 | val abfssRootPath = "abfss://"+storageContainerName+"@"+storageServiceName+".dfs.core.windows.net" 10 | val outputRootPath = "abfss://"+ouptutContainerName+"@"+storageServiceName+".dfs.core.windows.net" 11 | 12 | val storageKey = dbutils.secrets.get("purview-to-adb-kv", "storage-service-key") 13 | 14 | spark.conf.set("fs.azure.account.key."+storageServiceName+".dfs.core.windows.net", storageKey) 15 | 16 | // COMMAND ---------- 17 | 18 | val exampleASchema = StructType( 19 | StructField("id", IntegerType, true) :: 20 | StructField("postalCode", StringType, false) :: 21 | StructField("streetAddress", StringType, false) :: Nil) 22 | 23 | val exampleA = ( 24 | spark.read.format("csv") 25 | .schema(exampleASchema) 26 | .option("header", true) 27 | .load(abfssRootPath+"/testcase/thirteen/exampleInputA/exampleInputA.csv") 28 | ) 29 | 30 | 31 | val exampleBSchema = StructType( 32 | StructField("id", IntegerType, true) :: 33 | StructField("city", StringType, false) :: 34 | StructField("stateAbbreviation", StringType, false) :: Nil) 35 | 36 | val exampleB = ( 37 | spark.read.format("csv") 38 | .schema(exampleBSchema) 39 | .option("header", true) 40 | .load(abfssRootPath+"/testcase/thirteen/exampleInputB/exampleInputB.csv") 41 | ) 42 | 43 | // COMMAND ---------- 44 | 45 | val outputDf = exampleA.join(exampleB, exampleA("id") === exampleB("id"), "inner").drop(exampleB("id")) 46 | 47 | outputDf.repartition(1).write.mode("overwrite").format("csv").save(outputRootPath+"/testcase/thirteen/call-via-adf-spark2-folder/") 48 | 49 | // COMMAND ---------- 50 | 51 | -------------------------------------------------------------------------------- /tests/integration/spark-apps/notebooks/call-via-adf-spark3.scala: -------------------------------------------------------------------------------- 1 | // Databricks notebook source 2 | import org.apache.spark.sql.types.{StructType, StructField, IntegerType, StringType} 3 | 4 | // COMMAND ---------- 5 | 6 | val storageServiceName = sys.env("STORAGE_SERVICE_NAME") 7 | val storageContainerName = "rawdata" 8 | val ouptutContainerName = "outputdata" 9 | val abfssRootPath = "abfss://"+storageContainerName+"@"+storageServiceName+".dfs.core.windows.net" 10 | val outputRootPath = "abfss://"+ouptutContainerName+"@"+storageServiceName+".dfs.core.windows.net" 11 | 12 | val storageKey = dbutils.secrets.get("purview-to-adb-kv", "storage-service-key") 13 | 14 | spark.conf.set("fs.azure.account.key."+storageServiceName+".dfs.core.windows.net", storageKey) 15 | 16 | // COMMAND ---------- 17 | 18 | val exampleASchema = StructType( 19 | StructField("id", IntegerType, true) :: 20 | StructField("postalCode", StringType, false) :: 21 | StructField("streetAddress", StringType, false) :: Nil) 22 | 23 | val exampleA = ( 24 | spark.read.format("csv") 25 | .schema(exampleASchema) 26 | .option("header", true) 27 | .load(abfssRootPath+"/testcase/fourteen/exampleInputA/exampleInputA.csv") 28 | ) 29 | 30 | 31 | val exampleBSchema = StructType( 32 | StructField("id", IntegerType, true) :: 33 | StructField("city", StringType, false) :: 34 | StructField("stateAbbreviation", StringType, false) :: Nil) 35 | 36 | val exampleB = ( 37 | spark.read.format("csv") 38 | .schema(exampleBSchema) 39 | .option("header", true) 40 | .load(abfssRootPath+"/testcase/fourteen/exampleInputB/exampleInputB.csv") 41 | ) 42 | 43 | // COMMAND ---------- 44 | 45 | val outputDf = exampleA.join(exampleB, exampleA("id") === exampleB("id"), "inner").drop(exampleB("id")) 46 | 47 | outputDf.repartition(1).write.mode("overwrite").format("csv").save(outputRootPath+"/testcase/fourteen/call-via-adf-spark2-folder/") 48 | 49 | // COMMAND ---------- 50 | 51 | -------------------------------------------------------------------------------- /tests/integration/spark-apps/jarjobs/abfssInAbfssOut/app/src/main/java/SparkApp/Basic/App.java: -------------------------------------------------------------------------------- 1 | package SparkApp.Basic; 2 | 3 | import org.apache.spark.sql.Dataset; 4 | import org.apache.spark.sql.Row; 5 | import org.apache.spark.sql.SparkSession; 6 | import org.slf4j.Logger; 7 | import org.slf4j.LoggerFactory; 8 | import com.databricks.dbutils_v1.DBUtilsHolder; 9 | import com.databricks.dbutils_v1.DBUtilsV1; 10 | 11 | 12 | 13 | public class App { 14 | private static final Logger log = LoggerFactory.getLogger("MyLogger"); 15 | 16 | public String getGreeting() { 17 | return "Hello World!"; 18 | } 19 | 20 | public static void main(String[] args) { 21 | String storageServiceName = System.getenv("STORAGE_SERVICE_NAME"); 22 | String storageContainerName = "rawdata"; 23 | String ouptutContainerName = "outputdata"; 24 | String abfssRootPath = "abfss://"+storageContainerName+"@"+storageServiceName+".dfs.core.windows.net"; 25 | String outputRootPath = "abfss://"+ouptutContainerName+"@"+storageServiceName+".dfs.core.windows.net"; 26 | 27 | SparkSession spark = SparkSession 28 | .builder() 29 | .appName("JavaSparkPi") 30 | .getOrCreate(); 31 | DBUtilsV1 dbutils = DBUtilsHolder.dbutils(); 32 | System.out.println(new App().getGreeting()); 33 | 34 | 35 | String storageKey = dbutils.secrets().get("purview-to-adb-kv", "storage-service-key"); 36 | 37 | spark.conf().set("fs.azure.account.key."+storageServiceName+".dfs.core.windows.net", storageKey); 38 | 39 | Dataset df = spark.read().format("csv") 40 | .option("header", true) 41 | .option("inferSchema", true) 42 | .load(abfssRootPath+"/testcase/eighteen/exampleInputA/exampleInputA.csv"); 43 | 44 | df.repartition(1).write().mode("overwrite").format("csv").save(outputRootPath+"/testcase/eighteen/abfss-in-abfss-out-java-jar-app/"); 45 | 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /tests/integration/spark-apps/notebooks/delta-in-delta-out-mnt.scala: -------------------------------------------------------------------------------- 1 | // Databricks notebook source 2 | val storageServiceName = sys.env("STORAGE_SERVICE_NAME") 3 | val storageContainerName = "rawdata" 4 | val ouptutContainerName = "outputdata" 5 | val abfssRootPath = "abfss://"+storageContainerName+"@"+storageServiceName+".dfs.core.windows.net" 6 | val outputRootPath = "abfss://"+ouptutContainerName+"@"+storageServiceName+".dfs.core.windows.net" 7 | 8 | val storageKey = dbutils.secrets.get("purview-to-adb-kv", "storage-service-key") 9 | 10 | //spark.conf.set("fs.azure.account.key."+storageServiceName+".dfs.core.windows.net", storageKey) 11 | spark.conf.set("fs.azure.account.auth.type."+storageServiceName+".dfs.core.windows.net", "OAuth") 12 | spark.conf.set("fs.azure.account.oauth.provider.type."+storageServiceName+".dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider") 13 | spark.conf.set("fs.azure.account.oauth2.client.id."+storageServiceName+".dfs.core.windows.net", dbutils.secrets.get("purview-to-adb-kv", "clientIdKey")) 14 | spark.conf.set("fs.azure.account.oauth2.client.secret."+storageServiceName+".dfs.core.windows.net", dbutils.secrets.get("purview-to-adb-kv", "clientSecretKey")) 15 | spark.conf.set("fs.azure.account.oauth2.client.endpoint."+storageServiceName+".dfs.core.windows.net", "https://login.microsoftonline.com/"+dbutils.secrets.get("purview-to-adb-kv", "tenant-id")+"/oauth2/token") 16 | 17 | // COMMAND ---------- 18 | 19 | val exampleA = ( 20 | spark.read.format("delta") 21 | .load(abfssRootPath+"/testcase/six/exampleInputA") 22 | ) 23 | 24 | val exampleB = ( 25 | spark.read.format("delta") 26 | .load(abfssRootPath+"/testcase/six/exampleInputB") 27 | ) 28 | 29 | // COMMAND ---------- 30 | 31 | val outputDf = exampleA.join(exampleB, exampleA("id") === exampleB("id"), "inner").drop(exampleB("id")) 32 | 33 | //WORKS 34 | outputDf.write.format("delta").mode("append").save("/mnt/outputdata/testcase/six/delta-in-delta-out-mnt-folder") -------------------------------------------------------------------------------- /tests/integration/spark-apps/notebooks/delta-in-delta-out-fs.scala: -------------------------------------------------------------------------------- 1 | // Databricks notebook source 2 | val storageServiceName = sys.env("STORAGE_SERVICE_NAME") 3 | val storageContainerName = "rawdata" 4 | val ouptutContainerName = "outputdata" 5 | val abfssRootPath = "abfss://"+storageContainerName+"@"+storageServiceName+".dfs.core.windows.net" 6 | val outputRootPath = "abfss://"+ouptutContainerName+"@"+storageServiceName+".dfs.core.windows.net" 7 | 8 | val storageKey = dbutils.secrets.get("purview-to-adb-kv", "storage-service-key") 9 | 10 | //spark.conf.set("fs.azure.account.key."+storageServiceName+".dfs.core.windows.net", storageKey) 11 | spark.conf.set("fs.azure.account.auth.type."+storageServiceName+".dfs.core.windows.net", "OAuth") 12 | spark.conf.set("fs.azure.account.oauth.provider.type."+storageServiceName+".dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider") 13 | spark.conf.set("fs.azure.account.oauth2.client.id."+storageServiceName+".dfs.core.windows.net", dbutils.secrets.get("purview-to-adb-kv", "clientIdKey")) 14 | spark.conf.set("fs.azure.account.oauth2.client.secret."+storageServiceName+".dfs.core.windows.net", dbutils.secrets.get("purview-to-adb-kv", "clientSecretKey")) 15 | spark.conf.set("fs.azure.account.oauth2.client.endpoint."+storageServiceName+".dfs.core.windows.net", "https://login.microsoftonline.com/"+dbutils.secrets.get("purview-to-adb-kv", "tenant-id")+"/oauth2/token") 16 | 17 | // COMMAND ---------- 18 | 19 | val exampleA = ( 20 | spark.read.format("delta") 21 | .load(abfssRootPath+"/testcase/five/exampleInputA") 22 | ) 23 | 24 | val exampleB = ( 25 | spark.read.format("delta") 26 | .load(abfssRootPath+"/testcase/five/exampleInputB") 27 | ) 28 | 29 | // COMMAND ---------- 30 | 31 | val outputDf = exampleA.join(exampleB, exampleA("id") === exampleB("id"), "inner").drop(exampleB("id")) 32 | 33 | //WORKS 34 | outputDf.write.format("delta").mode("append").save(outputRootPath + "/testcase/five/delta-in-delta-out-fs-folder") -------------------------------------------------------------------------------- /function-app/adb-to-purview/.vscode/tasks.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "2.0.0", 3 | "tasks": [ 4 | { 5 | "label": "clean (functions)", 6 | "command": "dotnet", 7 | "options": { 8 | "cwd": "src/" 9 | }, 10 | "args": [ 11 | "clean", 12 | "/property:GenerateFullPaths=true", 13 | "/consoleloggerparameters:NoSummary" 14 | ], 15 | "type": "process", 16 | "problemMatcher": "$msCompile" 17 | }, 18 | { 19 | "label": "build (functions)", 20 | "command": "dotnet", 21 | "options": { 22 | "cwd": "src/" 23 | }, 24 | "args": [ 25 | "build", 26 | "/property:GenerateFullPaths=true", 27 | "/consoleloggerparameters:NoSummary" 28 | ], 29 | "type": "process", 30 | "dependsOn": "clean (functions)", 31 | "group": { 32 | "kind": "build", 33 | "isDefault": true 34 | }, 35 | "problemMatcher": "$msCompile" 36 | }, 37 | { 38 | "label": "clean release (functions)", 39 | "command": "dotnet", 40 | "options": { 41 | "cwd": "src/" 42 | }, 43 | "args": [ 44 | "clean", 45 | "--configuration", 46 | "Release", 47 | "/property:GenerateFullPaths=true", 48 | "/consoleloggerparameters:NoSummary" 49 | ], 50 | "type": "process", 51 | "problemMatcher": "$msCompile" 52 | }, 53 | { 54 | "label": "publish (functions)", 55 | "command": "dotnet", 56 | "options": { 57 | "cwd": "src/" 58 | }, 59 | "args": [ 60 | "publish", 61 | "--configuration", 62 | "Release", 63 | "/property:GenerateFullPaths=true", 64 | "/consoleloggerparameters:NoSummary" 65 | ], 66 | "type": "process", 67 | "dependsOn": "clean release (functions)", 68 | "problemMatcher": "$msCompile" 69 | }, 70 | { 71 | "type": "func", 72 | "dependsOn": "build (functions)", 73 | "options": { 74 | "cwd": "${workspaceFolder}/src/bin/Debug/net6.0" 75 | }, 76 | "command": "host start", 77 | "isBackground": true, 78 | "problemMatcher": "$func-dotnet-watch" 79 | } 80 | ] 81 | } -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/adb-to-purview.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | net6.0 4 | v4 5 | Exe 6 | enable 7 | false 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | PreserveNewest 28 | 29 | 30 | PreserveNewest 31 | Never 32 | 33 | 34 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | Welcome, and thank you for your interest in contributing. There are many ways to contribute: 4 | - [Submit issues](https://github.com/microsoft/Purview-ADB-Lineage-Solution-Accelerator/issues) to report bugs and make suggestions. 5 | - Review the [source code changes](https://github.com/microsoft/Purview-ADB-Lineage-Solution-Accelerator/pulls). 6 | - Contribute features and fixes by forking the repository and creating a [pull request](https://github.com/microsoft/Purview-ADB-Lineage-Solution-Accelerator/compare/main...). 7 | 8 | 9 | ## Contributing New Data Source Parsing 10 | 11 | If you are interested in supporting an additional data source, see the [extending source support](./docs/extending-source-support.md) doc. A mapping can be added to the `docs/mappings/` set of json arrays and indexed on the `docs/mappings/README.md`. 12 | 13 | If you are interested in contributing a data source to OpenLineage, please see the [OpenLineage contributing guide](https://github.com/OpenLineage/OpenLineage/blob/main/CONTRIBUTING.md). 14 | 15 | 16 | ## Contributor License Agreement 17 | 18 | This project welcomes contributions and suggestions. Most contributions require you to agree to a 19 | Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us 20 | the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com. 21 | 22 | When you submit a pull request, a CLA bot will automatically determine whether you need to provide 23 | a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions 24 | provided by the bot. You will only need to do this once across all repos using our CLA. 25 | 26 | 27 | ## Microsoft Open Source Code of Conduct 28 | 29 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 30 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or 31 | contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. 32 | -------------------------------------------------------------------------------- /tests/integration/spark-apps/README.md: -------------------------------------------------------------------------------- 1 | # Integration Tests for Purview Spark Connector 2 | 3 | The examples are intended for the internal team to verify the solution works correctly across our [limited set of scenarios](../LIMITATIONS.md). 4 | 5 | However, they also offer a wide set of samples that you can use to test this solution with some modification. 6 | 7 | ## Cluster Environment Variables 8 | 9 | Set the following environment variables on your cluster(s) to run the tests: 10 | * STORAGE_SERVICE_NAME 11 | * SYNAPSE_STORAGE_SERVICE_NAME 12 | * SYNAPSE_SERVICE_NAME 13 | 14 | ## Notebook Jobs 15 | 16 | Each notebook job tends to point to an Azure Blob Storage Account which should be created before running the sample. 17 | 18 | In addition some mount points and spark sql tables should be instantiated in advance 19 | 20 | ``` 21 | /mnt/rawdata 22 | /mnt/outputdata 23 | ``` 24 | 25 | ```sql 26 | CREATE TABLE testcasesixteen ( 27 | id int, 28 | postalcode string, 29 | streetaddress string, 30 | city string, 31 | stateAbbreviation string 32 | ) 33 | USING DELTA 34 | LOCATION "abfss://CONTAINER@ACCOUNT.dfs.core.windows.net/PATH/" 35 | ``` 36 | 37 | ## Wheel Jobs 38 | 39 | The wheel job samples can be built and uploaded by running: 40 | 41 | ```bash 42 | # Run this command one time to create a wheels directory 43 | # dbfs mkdirs dbfs:/wheels 44 | cd wheeljobs/abfssInAbfssOut 45 | python -m setup bdist_wheel 46 | dbfs cp ./dist/abfssintest-0.0.3-py3-none-any.whl dbfs:/wheels --overwrite 47 | ``` 48 | 49 | You will also need to create a databricks job definition for each wheel job. A sample is at `spark-tests\wheeljobs\abfssInAbfssOut\db-job-def.json`. 50 | 51 | ## Jar Jobs 52 | 53 | The jar job can be built by 54 | ```bash 55 | cd jarjobs/abfssInAbfssOut/ 56 | ./gradlew clean build 57 | dbfs cp ./app/build/libs/app.jar dbfs:/jars/abfssInAbfssOut.jar --overwrite 58 | ``` 59 | 60 | # Data Factory Jobs 61 | 62 | Two notebooks are meant to be ran from Data Factory `call-via-adf-spark2` and `call-via-adf-spark3`. 63 | 64 | You should create a pipeline that runs these two notebooks on separate Spark2 and Spark3 clusters. 65 | -------------------------------------------------------------------------------- /tests/integration/spark-apps/notebooks/abfss-in-hive+notmgd+saveAsTable-out.scala: -------------------------------------------------------------------------------- 1 | // Databricks notebook source 2 | // Seq( 3 | // ("someId001", "Foo", 2, true), 4 | // ("someId002", "Bar", 2, false) 5 | // ).toDF("id","name","age","isAlive") 6 | // .write.format("delta") 7 | // .save(abfssRootPath+"/testcase/abfss-in-hive+notmgd+saveAsTable-out/exampleInputA/") 8 | 9 | // COMMAND ---------- 10 | 11 | import org.apache.spark.sql.types.{StructType, StructField, IntegerType, StringType} 12 | 13 | // COMMAND ---------- 14 | 15 | val storageServiceName = sys.env("STORAGE_SERVICE_NAME") 16 | val storageContainerName = "rawdata" 17 | val ouptutContainerName = "outputdata" 18 | val abfssRootPath = "abfss://"+storageContainerName+"@"+storageServiceName+".dfs.core.windows.net" 19 | val outputRootPath = "abfss://"+ouptutContainerName+"@"+storageServiceName+".dfs.core.windows.net" 20 | 21 | val storageKey = dbutils.secrets.get("purview-to-adb-kv", "storage-service-key") 22 | 23 | spark.conf.set("fs.azure.account.key."+storageServiceName+".dfs.core.windows.net", storageKey) 24 | 25 | // COMMAND ---------- 26 | 27 | // %sql 28 | // DROP TABLE IF EXISTS default.abfssInHiveNotMgdSaveAsTableOut; 29 | // CREATE TABLE IF NOT EXISTS default.abfssInHiveNotMgdSaveAsTableOut ( 30 | // id String, 31 | // name String, 32 | // age Integer, 33 | // isAlive Boolean 34 | // ) 35 | // LOCATION 'abfss://outputdata@.dfs.core.windows.net/testcase/abfss-in-hive+notmgd+saveAsTable-out/notMgdTable/' 36 | 37 | // COMMAND ---------- 38 | 39 | // spark.sparkContext.setLogLevel("DEBUG") 40 | 41 | // COMMAND ---------- 42 | 43 | val exampleA = ( 44 | spark.read.format("delta") 45 | .load(abfssRootPath+"/testcase/abfss-in-hive+notmgd+saveAsTable-out/exampleInputA/") 46 | ) 47 | 48 | // COMMAND ---------- 49 | 50 | exampleA.write.mode("append").saveAsTable("abfssInHiveNotMgdSaveAsTableOut") 51 | 52 | // COMMAND ---------- 53 | 54 | // MAGIC %md 55 | // MAGIC # Explore file paths 56 | 57 | // COMMAND ---------- 58 | 59 | // val df = spark.sql("SELECT * FROM abfssInHiveNotMgdSaveAsTableOut") 60 | // df.inputFiles 61 | 62 | // COMMAND ---------- 63 | 64 | 65 | -------------------------------------------------------------------------------- /tests/integration/spark-apps/wheeljobs/abfssInAbfssOut/abfssintest/main.py: -------------------------------------------------------------------------------- 1 | # Databricks notebook source 2 | from pyspark.sql import SparkSession 3 | from pyspark.sql.types import StructField, IntegerType, StringType, StructType 4 | from pyspark.sql import functions as pyf 5 | from pyspark.dbutils import DBUtils 6 | import os 7 | 8 | 9 | def runapp(): 10 | print("STARTING") 11 | spark = SparkSession.builder.getOrCreate() 12 | dbutils = DBUtils(spark) 13 | 14 | storageServiceName = os.environ.get("STORAGE_SERVICE_NAME") 15 | storageContainerName = "rawdata" 16 | ouptutContainerName = "outputdata" 17 | abfssRootPath = "abfss://"+storageContainerName+"@"+storageServiceName+".dfs.core.windows.net" 18 | outputRootPath = "abfss://"+ouptutContainerName+"@"+storageServiceName+".dfs.core.windows.net" 19 | 20 | storageKey = dbutils.secrets.get("purview-to-adb-kv", "storage-service-key") 21 | 22 | spark.conf.set("fs.azure.account.key."+storageServiceName+".dfs.core.windows.net", storageKey) 23 | 24 | 25 | exampleASchema = StructType([ 26 | StructField("id", IntegerType(), True), 27 | StructField("postalCode", StringType(), False), 28 | StructField("streetAddress", StringType(), False) 29 | ]) 30 | 31 | exampleA = ( 32 | spark.read.format("csv") 33 | .schema(exampleASchema) 34 | .option("header", True) 35 | .load(abfssRootPath+"/testcase/seventeen/exampleInputA/exampleInputA.csv") 36 | ) 37 | 38 | 39 | exampleBSchema = StructType([ 40 | StructField("id", IntegerType(), True), 41 | StructField("city", StringType(), False), 42 | StructField("stateAbbreviation", StringType(), False) 43 | ]) 44 | 45 | exampleB = ( 46 | spark.read.format("csv") 47 | .schema(exampleBSchema) 48 | .option("header", True) 49 | .load(abfssRootPath+"/testcase/seventeen/exampleInputB/exampleInputB.csv") 50 | ) 51 | 52 | outputDf = exampleA.join(exampleB, ["id"], "inner") 53 | 54 | outputDf.repartition(1).write.mode("overwrite").format("csv").save(outputRootPath+"/testcase/seventeen/abfss-in-abfss-out-folder/") 55 | print("COMPLETED") 56 | 57 | -------------------------------------------------------------------------------- /function-app/adb-to-purview/tests/tools/Dev_Utilities.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | # PyApacheAtlas packages 5 | # Connect to Atlas via a Service Principal 6 | from pyapacheatlas.auth import ServicePrincipalAuthentication 7 | from pyapacheatlas.core import PurviewClient 8 | from sys import exc_info 9 | from traceback import format_exception 10 | 11 | # COMMAND ---------- 12 | 13 | 14 | import localsettingsdutils as lsdu 15 | 16 | 17 | # COMMAND ---------- 18 | 19 | # Authenticate against your Atlas server 20 | oauth = ServicePrincipalAuthentication( 21 | tenant_id= lsdu.TENANT_ID, 22 | client_id=lsdu.CLIENT_ID, 23 | client_secret=lsdu.CLIENT_SECRET 24 | 25 | ) 26 | 27 | client = PurviewClient( 28 | account_name = lsdu.PURVIEW_NAME, 29 | authentication=oauth 30 | ) 31 | 32 | # COMMAND ---------- 33 | 34 | def delete_list_entities(attribute, value): 35 | search_filter = {attribute:value} 36 | results = client.discovery.search_entities("", search_filter=search_filter) 37 | for result in results: 38 | print(result['qualifiedName']) 39 | print(result['id']) 40 | print(result['entityType']) 41 | client.delete_entity(guid=result['id']) 42 | 43 | def test_list_entities(attribute, value): 44 | search_filter = {attribute:value} 45 | results = client.discovery.search_entities("", search_filter=search_filter) 46 | cnt = 0 47 | for result in results: 48 | print(result['qualifiedName']) 49 | print(result['id']) 50 | print(result['entityType']) 51 | cnt+=1 52 | print(f"num items: {cnt}") 53 | 54 | # COMMAND EXAMPLE ---------- 55 | 56 | # Note: may have to run this more than once if there are a large number of entities 57 | # test_list_entities('assetType','Purview Custom Connector') 58 | # test_list_entities('assetType','spark') 59 | # test_list_entities('assetType','Databricks') 60 | 61 | # Uncomment the below to delete all solution entities 62 | # delete_list_entities('assetType','Purview Custom Connector') 63 | # delete_list_entities('assetType','spark') 64 | delete_list_entities('assetType','Databricks') 65 | 66 | -------------------------------------------------------------------------------- /tests/integration/README.md: -------------------------------------------------------------------------------- 1 | # Running the Integration Tests 2 | 3 | The `run-test.sh` script will run each spark job as defined in the jobdefs folder. 4 | 5 | A related `*-expectations.json` file should exist for every test definition. It provides the expected qualified names to search for. 6 | 7 | ## Notes 8 | 9 | * Synapse Output does not test the Process currently as the output asset is always different due to the TempFolder naming convention 10 | * Azure Data Factory testing is not yet automated 11 | 12 | ## Coverage 13 | 14 | |Name|Input|Output|Spark 2|Spark 3|Note| 15 | |----|----|----|----|----|----| 16 | |abfss-in-abfss-out-oauth.scala|ABFS|ABFS|✅|✅|Uses Oauth| 17 | |abfss-in-abfss-out-root.scala|ABFS|ABFS||✅|Writes to root of container| 18 | |abfss-in-abfss-out.scala|ABFS|ABFS|✅|✅|| 19 | |azuresql-in-azuresql-out.scala|AzSQL|AzSQL||✅|| 20 | |call-via-adf-spark2.scala|ABFS|ABFS|✅||Called via Azure Data Factory| 21 | |call-via-adf-spark3.scala|ABFS|ABFS||✅|Called via Azure Data Factory| 22 | |delta-in-delta-merge.scala|DELTA|DELTA|❌|❌|Uses a Merge Statement| 23 | |delta-in-delta-merge-package.py|DELTA|DELTA|❌|❌|Uses a Merge Statement| 24 | |delta-in-delta-out-abfss.scala|DELTA|DELTA||✅|| 25 | |delta-in-delta-out-fs.scala|DELTA|DELTA||✅|| 26 | |delta-in-delta-out-mnt.scala|DELTA|DELTA||✅|Uses a Mount Point| 27 | |intermix-languages.scala|ABFS|ABFS||✅|Intermixes scala and Python| 28 | |mnt-in-mnt-out.scala|ABFS|ABFS|✅|✅|Uses a Mount Point as Output| 29 | |nested-child.scala|N/A|N/A|N/A|N/A|Called by nested-parent.scala| 30 | |nested-parent.scala|ABFS|ABFS||✅|Calls nested-child.scala| 31 | |spark-sql-table-in-abfss-out.scala|SparkSQL|ABFS|❌|❌|Queries Spark SQL table| 32 | |synapse-in-synapse-out.scala|Synapse|Synapse|❌|❌|| 33 | |synapse-in-wasbs-out.scala|Synapse|WASB||✅|| 34 | |synapse-wasbs-in-synapse-out.scala|Synapse, WASB|Synapse|✅|✅|Joins Synapse and WASB data| 35 | |wasbs-in-wasbs-out-with-param.py|WASB|WASB||✅|Passes in a parameter to job| 36 | |wasbs-in-wasbs-out.scala|WASB|WASB||✅|| 37 | |JarJob - spark_jar_task|ABFS|ABFS||✅|spark_jar_task| 38 | |pythonscript|ABFS|ABFS||✅|spark_python_task| 39 | |pythonwheel|ABFS|ABFS||✅|python_wheel_task| 40 | |spark submit task|ABFS|ABFS|❌|❌|spark_submit_task| 41 | -------------------------------------------------------------------------------- /tests/integration/jobdefs-inactive/spark2-tests-def.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "test-examples-spark-2", 3 | "email_notifications": { 4 | "no_alert_for_skipped_runs": false 5 | }, 6 | "timeout_seconds": 0, 7 | "max_concurrent_runs": 2, 8 | "tasks": [ 9 | { 10 | "task_key": "spark2-abfss-in-abfss-out", 11 | "notebook_task": { 12 | "notebook_path": "/Shared/examples/abfss-in-abfss-out" 13 | }, 14 | "existing_cluster_id": "", 15 | "timeout_seconds": 0, 16 | "email_notifications": {}, 17 | "description": "" 18 | }, 19 | { 20 | "task_key": "spark2-abfss-oauth", 21 | "depends_on": [ 22 | { 23 | "task_key": "spark2-abfss-in-abfss-out" 24 | } 25 | ], 26 | "notebook_task": { 27 | "notebook_path": "/Shared/examples/abfss-in-abfss-out-oauth" 28 | }, 29 | "existing_cluster_id": "", 30 | "timeout_seconds": 0, 31 | "email_notifications": {} 32 | }, 33 | { 34 | "task_key": "spark2-mnt", 35 | "depends_on": [ 36 | { 37 | "task_key": "spark2-abfss-oauth" 38 | } 39 | ], 40 | "notebook_task": { 41 | "notebook_path": "/Shared/examples/mnt-in-mnt-out" 42 | }, 43 | "existing_cluster_id": "", 44 | "timeout_seconds": 0, 45 | "email_notifications": {} 46 | }, 47 | { 48 | "task_key": "spark2-Synapse-wasbs-synapse", 49 | "depends_on": [ 50 | { 51 | "task_key": "spark2-mnt" 52 | } 53 | ], 54 | "notebook_task": { 55 | "notebook_path": "/Shared/examples/synapse-wasbs-in-synapse-out" 56 | }, 57 | "existing_cluster_id": "", 58 | "timeout_seconds": 0, 59 | "email_notifications": {} 60 | } 61 | ], 62 | "format": "MULTI_TASK" 63 | } -------------------------------------------------------------------------------- /tests/integration/jobdefs-inactive/sparksubmit-test-def.json: -------------------------------------------------------------------------------- 1 | { 2 | "settings": { 3 | "name": "SparkSubmitForBasicApp", 4 | "email_notifications": { 5 | "no_alert_for_skipped_runs": false 6 | }, 7 | "max_concurrent_runs": 1, 8 | "tasks": [ 9 | { 10 | "task_key": "SparkSubmitForBasicApp", 11 | "spark_submit_task": { 12 | "parameters": [ 13 | "--class", 14 | "SparkApp.ReadWrite.App", 15 | "dbfs:/FileStore/testcases/rwapp.jar" 16 | ] 17 | }, 18 | "new_cluster": { 19 | "cluster_name": "", 20 | "spark_version": "9.1.x-scala2.12", 21 | "spark_conf": { 22 | "spark.openlineage.url.param.code": "{{secrets/purview-to-adb-kv/Ol-Output-Api-Key}}", 23 | "spark.openlineage.host": "https://.azurewebsites.net", 24 | "spark.openlineage.namespace": "#ABC123", 25 | "spark.openlineage.version": "1" 26 | }, 27 | "node_type_id": "Standard_DS3_v2", 28 | "enable_elastic_disk": true, 29 | "init_scripts": [ 30 | { 31 | "dbfs": { 32 | "destination": "dbfs:/databricks/openlineagehardcoded/release-candidate.sh" 33 | } 34 | } 35 | ], 36 | "azure_attributes": { 37 | "first_on_demand": 1, 38 | "availability": "ON_DEMAND_AZURE", 39 | "spot_bid_max_price": -1 40 | }, 41 | "num_workers": 1 42 | }, 43 | "max_retries": 1, 44 | "min_retry_interval_millis": 0, 45 | "retry_on_timeout": false, 46 | "timeout_seconds": 3600 47 | } 48 | ], 49 | "format": "MULTI_TASK" 50 | } 51 | } -------------------------------------------------------------------------------- /tests/integration/spark-apps/notebooks/populate-data-kusto.scala: -------------------------------------------------------------------------------- 1 | // Databricks notebook source 2 | spark.sparkContext.setLogLevel("ALL") 3 | 4 | // COMMAND ---------- 5 | 6 | import org.apache.commons.lang3.reflect.FieldUtils 7 | import org.apache.commons.lang3.reflect.MethodUtils 8 | import org.apache.spark.sql.execution.datasources.LogicalRelation 9 | import com.microsoft.kusto.spark.datasink.KustoSinkOptions 10 | import org.apache.spark.sql.{SaveMode, SparkSession} 11 | import com.microsoft.kusto.spark.datasource.KustoSourceOptions 12 | import org.apache.spark.SparkConf 13 | import org.apache.spark.sql._ 14 | import com.microsoft.azure.kusto.data.ClientRequestProperties 15 | import com.microsoft.kusto.spark.sql.extension.SparkExtension._ 16 | import com.microsoft.azure.kusto.data.ClientRequestProperties 17 | 18 | // COMMAND ---------- 19 | 20 | val appId = dbutils.secrets.get("purview-to-adb-kv", "azurekusto-appid") 21 | val appKey = dbutils.secrets.get("purview-to-adb-kv", "azurekusto-appsecret") 22 | val uri = dbutils.secrets.get("purview-to-adb-kv", "azurekusto-uri") 23 | val authorityId = dbutils.secrets.get("purview-to-adb-kv", "tenant-id") 24 | val cluster = uri.replaceAll(".kusto.windows.net", "").replaceAll("https://", "") 25 | val database = "database01" // this is hardcoded - so if changed in the bicep template, also needs to be changed here. 26 | val table = "table01" 27 | 28 | // COMMAND ---------- 29 | 30 | case class City(id: String, name: String, country: String) 31 | 32 | val df = Seq(new City("1", "Milwaukee", "USA"), new City("2", "Cairo", "Egypt"), new City("3", "Doha", "Qatar"), new City("4", "Kabul", "Afghanistan")).toDF 33 | 34 | // COMMAND ---------- 35 | 36 | df.write 37 | .format("com.microsoft.kusto.spark.datasource") 38 | .option(KustoSinkOptions.KUSTO_CLUSTER, cluster) 39 | .option(KustoSinkOptions.KUSTO_DATABASE, database) 40 | .option(KustoSinkOptions.KUSTO_TABLE, table) 41 | .option(KustoSinkOptions.KUSTO_AAD_APP_ID, appId) 42 | .option(KustoSinkOptions.KUSTO_AAD_APP_SECRET, appKey) 43 | .option(KustoSinkOptions.KUSTO_AAD_AUTHORITY_ID, authorityId) 44 | .option(KustoSinkOptions.KUSTO_TABLE_CREATE_OPTIONS, "CreateIfNotExist") 45 | .mode(SaveMode.Append) 46 | .save() 47 | 48 | // COMMAND ---------- 49 | 50 | 51 | -------------------------------------------------------------------------------- /function-app/adb-to-purview/tests/unit-tests/Function.Domain/Helpers/Parser/UnitTestData.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using Function.Domain.Models; 5 | using Function.Domain.Models.OL; 6 | using Function.Domain.Models.Settings; 7 | using Newtonsoft.Json; 8 | using System.Collections.Generic; 9 | 10 | namespace UnitTests.Function.Domain.Helpers 11 | { 12 | public static class UnitTestData 13 | { 14 | public struct QnParserTestData 15 | { 16 | public static List MountPoints = new List() 17 | { 18 | new MountPoint(){MountPointName="/databricks/mlflow-registry",Source="databricks/mlflow-registry"}, 19 | new MountPoint(){MountPointName="/databricks-datasets",Source="databricks-datasets"}, 20 | new MountPoint(){MountPointName="/mnt/rawdata",Source="abfss://rawdata@purviewexamplessa.dfs.core.windows.net/"}, 21 | new MountPoint(){MountPointName="/databricks/mlflow-tracking",Source="databricks/mlflow-tracking"}, 22 | new MountPoint(){MountPointName="/mnt/delta",Source="abfss://deltalake@purviewexamplessa.dfs.core.windows.net/"}, 23 | new MountPoint(){MountPointName="/mnt/outputdata",Source="abfss://outputdata@purviewexamplessa.dfs.core.windows.net/"}, 24 | new MountPoint(){MountPointName="/databricks-results",Source="databricks-results"}, 25 | new MountPoint(){MountPointName="/databricks-results",Source="databricks-results"}, 26 | new MountPoint(){MountPointName="/mnt/purview2/",Source="abfss://purview2@purviewexamplessa.dfs.core.windows.net/"}, 27 | new MountPoint(){MountPointName="/mnt/x/",Source="abfss://x@xsa.dfs.core.windows.net/"}, 28 | new MountPoint(){MountPointName="/mnt/x/y",Source="abfss://y@ysa.dfs.core.windows.net/"}, 29 | new MountPoint(){MountPointName="/mnt/x2/",Source="abfss://myx2@ysa.dfs.core.windows.net/subdir/"}, 30 | new MountPoint(){MountPointName="/mnt/blobx2/",Source="wasbs://myx2@ysa.blob.core.windows.net/subdir/"}, 31 | new MountPoint(){MountPointName="/mnt/adlg1/",Source="adl://gen1.azuredatalakestore.net/subdir/"} 32 | }; 33 | } 34 | } 35 | } -------------------------------------------------------------------------------- /tests/integration/spark-apps/notebooks/spark-sql-table-in-abfss-out.scala: -------------------------------------------------------------------------------- 1 | // Databricks notebook source 2 | // MAGIC %md 3 | // MAGIC # SKip This 4 | 5 | // COMMAND ---------- 6 | 7 | // MAGIC %sql 8 | // MAGIC 9 | // MAGIC -- CREATE TABLE default.mysparktable( 10 | // MAGIC -- id int 11 | // MAGIC -- ) 12 | 13 | // COMMAND ---------- 14 | 15 | // MAGIC %python 16 | // MAGIC 17 | // MAGIC # df = spark.createDataFrame([ 18 | // MAGIC # (1,),(2,),(3,),(4,) 19 | // MAGIC # ], ["id"] 20 | // MAGIC # ) 21 | // MAGIC 22 | // MAGIC # df.createOrReplaceTempView("data") 23 | // MAGIC 24 | // MAGIC # spark.sql("INSERT INTO default.mysparktable SELECT id from data") 25 | 26 | // COMMAND ---------- 27 | 28 | // %sql 29 | // SELECT * 30 | // FROM default.mysparktable 31 | 32 | // COMMAND ---------- 33 | 34 | // MAGIC %md 35 | // MAGIC # Run from here 36 | 37 | // COMMAND ---------- 38 | 39 | import org.apache.spark.sql.types.{StructType, StructField, IntegerType, StringType} 40 | 41 | // COMMAND ---------- 42 | 43 | val storageServiceName = sys.env("STORAGE_SERVICE_NAME") 44 | val storageContainerName = "rawdata" 45 | val ouptutContainerName = "outputdata" 46 | val abfssRootPath = "abfss://"+storageContainerName+"@"+storageServiceName+".dfs.core.windows.net" 47 | val outputRootPath = "abfss://"+ouptutContainerName+"@"+storageServiceName+".dfs.core.windows.net" 48 | 49 | val storageKey = dbutils.secrets.get("purview-to-adb-kv", "storage-service-key") 50 | 51 | spark.conf.set("fs.azure.account.key."+storageServiceName+".dfs.core.windows.net", storageKey) 52 | 53 | // COMMAND ---------- 54 | 55 | val exampleA = ( 56 | spark.sql("SELECT id from default.mysparktable") 57 | ) 58 | 59 | 60 | val exampleBSchema = StructType( 61 | StructField("id", IntegerType, true) :: 62 | StructField("city", StringType, false) :: 63 | StructField("stateAbbreviation", StringType, false) :: Nil) 64 | 65 | val exampleB = ( 66 | spark.read.format("csv") 67 | .schema(exampleBSchema) 68 | .option("header", true) 69 | .load(abfssRootPath+"/testcase/nine/exampleInputB/exampleInputB.csv") 70 | ) 71 | 72 | // COMMAND ---------- 73 | 74 | val outputDf = exampleA.join(exampleB, exampleA("id") === exampleB("id"), "inner").drop(exampleB("id")) 75 | 76 | outputDf.repartition(1).write.mode("overwrite").format("csv").save(outputRootPath+"/testcase/nine/spark-sql-table-in-abfss-out-folder/") 77 | 78 | // COMMAND ---------- 79 | 80 | -------------------------------------------------------------------------------- /tests/environment/sources/synapse.bicep: -------------------------------------------------------------------------------- 1 | @description('The Synapse Workspace name.') 2 | param workspaceName string = uniqueString('synwksp', resourceGroup().id) 3 | 4 | @description('Location for all resources.') 5 | param location string = resourceGroup().location 6 | 7 | @description('The administrator username of the SQL logical server.') 8 | @secure() 9 | param administratorLogin string 10 | 11 | @description('The administrator password of the SQL logical server.') 12 | @secure() 13 | param administratorLoginPassword string 14 | 15 | var supportingStorageName = '${workspaceName}sa' 16 | 17 | resource storageAccount 'Microsoft.Storage/storageAccounts@2021-08-01' = { 18 | name: supportingStorageName 19 | location: location 20 | sku: { 21 | name: 'Standard_LRS' 22 | } 23 | kind: 'StorageV2' 24 | properties:{ 25 | isHnsEnabled: true 26 | } 27 | 28 | } 29 | 30 | resource rawdataContainer 'Microsoft.Storage/storageAccounts/blobServices/containers@2021-08-01' = { 31 | name: '${storageAccount.name}/default/defaultcontainer' 32 | } 33 | 34 | resource tempContainer 'Microsoft.Storage/storageAccounts/blobServices/containers@2021-08-01' = { 35 | name: '${storageAccount.name}/default/temp' 36 | } 37 | 38 | resource synapseWorkspace 'Microsoft.Synapse/workspaces@2021-06-01' = { 39 | name: workspaceName 40 | location: location 41 | identity: { 42 | type: 'SystemAssigned' 43 | } 44 | properties: { 45 | azureADOnlyAuthentication: false 46 | defaultDataLakeStorage: { 47 | accountUrl: 'https://${storageAccount.name}.dfs.core.windows.net' 48 | createManagedPrivateEndpoint: false 49 | filesystem: 'synapsefs' 50 | resourceId: resourceId('Microsoft.Storage/storageAccounts/', storageAccount.name) 51 | } 52 | managedResourceGroupName: '${workspaceName}rg' 53 | 54 | publicNetworkAccess: 'Enabled' 55 | sqlAdministratorLogin: administratorLogin 56 | sqlAdministratorLoginPassword: administratorLoginPassword 57 | trustedServiceBypassEnabled: true 58 | } 59 | } 60 | 61 | resource symbolicname 'Microsoft.Synapse/workspaces/sqlPools@2021-06-01' = { 62 | name: 'sqlpool1' 63 | location: location 64 | sku: { 65 | name: 'DW100c' 66 | capacity: 0 67 | } 68 | parent: synapseWorkspace 69 | properties: { 70 | collation: 'SQL_Latin1_General_CP1_CI_AS' 71 | createMode: 'Default' 72 | 73 | storageAccountType: 'LRS' 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /tests/integration/spark-apps/notebooks/kusto-in-wasbs-out.scala: -------------------------------------------------------------------------------- 1 | // Databricks notebook source 2 | spark.sparkContext.setLogLevel("ALL") 3 | 4 | // COMMAND ---------- 5 | 6 | import org.apache.commons.lang3.reflect.FieldUtils 7 | import org.apache.commons.lang3.reflect.MethodUtils 8 | import org.apache.spark.sql.execution.datasources.LogicalRelation 9 | import com.microsoft.kusto.spark.datasink.KustoSinkOptions 10 | import org.apache.spark.sql.{SaveMode, SparkSession} 11 | import com.microsoft.kusto.spark.datasource.KustoSourceOptions 12 | import org.apache.spark.SparkConf 13 | import org.apache.spark.sql._ 14 | import com.microsoft.azure.kusto.data.ClientRequestProperties 15 | import com.microsoft.kusto.spark.sql.extension.SparkExtension._ 16 | import com.microsoft.azure.kusto.data.ClientRequestProperties 17 | 18 | // COMMAND ---------- 19 | 20 | val appId = dbutils.secrets.get("purview-to-adb-kv", "azurekusto-appid") 21 | val appKey = dbutils.secrets.get("purview-to-adb-kv", "azurekusto-appsecret") 22 | val uri = dbutils.secrets.get("purview-to-adb-kv", "azurekusto-uri") 23 | val authorityId = dbutils.secrets.get("purview-to-adb-kv", "tenant-id") 24 | val cluster = uri.replaceAll(".kusto.windows.net", "").replaceAll("https://", "") 25 | val database = "database01" // this is hardcoded - so if changed in the bicep template, also needs to be changed here. 26 | val table = "table01" 27 | 28 | // COMMAND ---------- 29 | 30 | val conf: Map[String, String] = Map( 31 | KustoSourceOptions.KUSTO_AAD_APP_ID -> appId, 32 | KustoSourceOptions.KUSTO_AAD_APP_SECRET -> appKey, 33 | KustoSourceOptions.KUSTO_AAD_AUTHORITY_ID -> authorityId 34 | ) 35 | 36 | val df = spark.read.kusto(cluster, database, table, conf) 37 | 38 | // COMMAND ---------- 39 | 40 | val storageServiceName = sys.env("STORAGE_SERVICE_NAME") 41 | val ouptutContainerName = "outputdata" 42 | 43 | val storageKey = dbutils.secrets.get("purview-to-adb-kv", "storage-service-key") 44 | 45 | spark.conf.set("fs.azure.account.key."+storageServiceName+".blob.core.windows.net", storageKey) 46 | 47 | // COMMAND ---------- 48 | 49 | val wasbsRootPath = "wasbs://"+ouptutContainerName+"@"+storageServiceName+".blob.core.windows.net" 50 | 51 | val file_location = wasbsRootPath+"/kusto/wasbs_out.csv" 52 | val file_type = "csv" 53 | 54 | // COMMAND ---------- 55 | 56 | df.write.mode("overwrite").option("header","true").csv(file_location) 57 | 58 | // COMMAND ---------- 59 | 60 | 61 | -------------------------------------------------------------------------------- /docs/release-checklist.md: -------------------------------------------------------------------------------- 1 | # Release Checklist 2 | 3 | * [ ] Create a branch to prepare for release (e.g. `feature/prepareForReleaseX-Y`). Make the following changes on the newly created branch. 4 | * [ ] Create a new aka.ms url to point to the next release (e.g. aka.ms/APFunctions{MAJOR}-{MINOR} aka.ms/APFunctions2-1) 5 | * [ ] Update the [newdeploymenttemp.json](../deployment/infra/newdeploymenttemp.json) with the new aka.ms url. 6 | * [ ] Perform the Pull Request from `feature/prepareForReleaseX-Y` into `main` 7 | * [ ] On `main` Create a git tag with pattern X.Y.Z where X, Y, and Z follow the [semver pattern](https://semver.org/). Then push the tag to the origin git repo (github). 8 | * ```bash 9 | git tag X.Y.Z 10 | git push origin --tags 11 | ``` 12 | * This will trigger the github action 13 | * [ ] Allow the Github action to deploy to the integration environment. 14 | * [ ] Confirm the tests pass on the integration environment. 15 | * If the tests fail, you can remove the tag from your local and github repo using: 16 | ```bash 17 | git push origin --delete X.Y.Z # Delete on Github 18 | git tag -d X.Y.Z # Delete locally 19 | ``` 20 | * Fix the errors and then repeat the steps above to recreate the tag locally and push to Github to restart the process. 21 | * [ ] Allow the Github action to deploy to the production (release) environment. 22 | * This will generate a release that includes the function zip file. 23 | * [ ] Update the newly created aka.ms url to point to the new function zip file available on the release. 24 | * [ ] Add release notes. 25 | * If creating a new release for major or minor version: 26 | * [ ] Create a new release branch with the last commit and name it 'release/X.Y` 27 | * [ ] [Update the default branch](https://docs.github.com/en/organizations/managing-organization-settings/managing-the-default-branch-name-for-repositories-in-your-organization) on the github rep to the new release branch. 28 | 29 | 30 | ## Hotfix checklist 31 | 32 | * [ ] Create a new branch named `hotfix/nameOfFix` 33 | * [ ] Create a PR for each affected release branch from your hotfix branch. 34 | * [ ] Apply the PR to each affected release. 35 | * If the hotfix affects the Azure Function code: 36 | * [ ] Create a release with an updated Patch value (e.g. going from 1.1.0 to 1.1.1) 37 | * [ ] Manually add the FunctionZip.zip file to the release. 38 | * [ ] Update the related aka.ms url to point to the latest, fixed version. 39 | -------------------------------------------------------------------------------- /tests/integration/spark-apps/notebooks/abfss-in-abfss-out-oauth.scala: -------------------------------------------------------------------------------- 1 | // Databricks notebook source 2 | import org.apache.spark.sql.types.{StructType, StructField, IntegerType, StringType} 3 | 4 | // COMMAND ---------- 5 | 6 | val storageServiceName = sys.env("STORAGE_SERVICE_NAME") 7 | val storageContainerName = "rawdata" 8 | val ouptutContainerName = "outputdata" 9 | val abfssRootPath = "abfss://"+storageContainerName+"@"+storageServiceName+".dfs.core.windows.net" 10 | val outputRootPath = "abfss://"+ouptutContainerName+"@"+storageServiceName+".dfs.core.windows.net" 11 | 12 | spark.conf.set("fs.azure.account.auth.type."+storageServiceName+".dfs.core.windows.net", "OAuth") 13 | spark.conf.set("fs.azure.account.oauth.provider.type."+storageServiceName+".dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider") 14 | spark.conf.set("fs.azure.account.oauth2.client.id."+storageServiceName+".dfs.core.windows.net", dbutils.secrets.get("purview-to-adb-kv", "clientIdKey")) 15 | spark.conf.set("fs.azure.account.oauth2.client.secret."+storageServiceName+".dfs.core.windows.net", dbutils.secrets.get("purview-to-adb-kv", "clientSecretKey")) 16 | spark.conf.set("fs.azure.account.oauth2.client.endpoint."+storageServiceName+".dfs.core.windows.net", "https://login.microsoftonline.com/"+dbutils.secrets.get("purview-to-adb-kv", "tenant-id")+"/oauth2/token") 17 | 18 | 19 | // COMMAND ---------- 20 | 21 | val exampleASchema = StructType( 22 | StructField("id", IntegerType, true) :: 23 | StructField("postalCode", StringType, false) :: 24 | StructField("streetAddress", StringType, false) :: Nil) 25 | 26 | val exampleA = ( 27 | spark.read.format("csv") 28 | .schema(exampleASchema) 29 | .option("header", true) 30 | .load(abfssRootPath+"/testcase/two/exampleInputA/exampleInputA.csv") 31 | ) 32 | 33 | 34 | val exampleBSchema = StructType( 35 | StructField("id", IntegerType, true) :: 36 | StructField("city", StringType, false) :: 37 | StructField("stateAbbreviation", StringType, false) :: Nil) 38 | 39 | val exampleB = ( 40 | spark.read.format("csv") 41 | .schema(exampleBSchema) 42 | .option("header", true) 43 | .load(abfssRootPath+"/testcase/two/exampleInputB/exampleInputB.csv") 44 | ) 45 | 46 | // COMMAND ---------- 47 | 48 | val outputDf = exampleA.join(exampleB, exampleA("id") === exampleB("id"), "inner").drop(exampleB("id")) 49 | 50 | outputDf.repartition(1).write.mode("overwrite").format("csv").save(outputRootPath+"/testcase/two/abfss-in-abfss-out-oauth-folder/") 51 | 52 | // COMMAND ---------- 53 | 54 | -------------------------------------------------------------------------------- /tests/integration/spark-apps/notebooks/intermix-languages.scala: -------------------------------------------------------------------------------- 1 | // Databricks notebook source 2 | import org.apache.spark.sql.types.{StructType, StructField, IntegerType, StringType} 3 | 4 | // COMMAND ---------- 5 | 6 | val storageServiceName = sys.env("STORAGE_SERVICE_NAME") 7 | val storageContainerName = "rawdata" 8 | val ouptutContainerName = "outputdata" 9 | val abfssRootPath = "abfss://"+storageContainerName+"@"+storageServiceName+".dfs.core.windows.net" 10 | val outputRootPath = "abfss://"+ouptutContainerName+"@"+storageServiceName+".dfs.core.windows.net" 11 | 12 | val storageKey = dbutils.secrets.get("purview-to-adb-kv", "storage-service-key") 13 | 14 | spark.conf.set("fs.azure.account.key."+storageServiceName+".dfs.core.windows.net", storageKey) 15 | 16 | // COMMAND ---------- 17 | 18 | // MAGIC %python 19 | // MAGIC import os 20 | // MAGIC storageServiceName = os.environ.get("STORAGE_SERVICE_NAME") 21 | // MAGIC storageContainerName = "rawdata" 22 | // MAGIC ouptutContainerName = "outputdata" 23 | // MAGIC abfssRootPath = "abfss://"+storageContainerName+"@"+storageServiceName+".dfs.core.windows.net" 24 | // MAGIC outputRootPath = "abfss://"+ouptutContainerName+"@"+storageServiceName+".dfs.core.windows.net" 25 | 26 | // COMMAND ---------- 27 | 28 | val exampleASchema = StructType( 29 | StructField("id", IntegerType, true) :: 30 | StructField("postalCode", StringType, false) :: 31 | StructField("streetAddress", StringType, false) :: Nil) 32 | 33 | val exampleA = ( 34 | spark.read.format("csv") 35 | .schema(exampleASchema) 36 | .option("header", true) 37 | .load(abfssRootPath+"/testcase/fifteen/exampleInputA/exampleInputA.csv") 38 | ) 39 | 40 | 41 | val exampleBSchema = StructType( 42 | StructField("id", IntegerType, true) :: 43 | StructField("city", StringType, false) :: 44 | StructField("stateAbbreviation", StringType, false) :: Nil) 45 | 46 | val exampleB = ( 47 | spark.read.format("csv") 48 | .schema(exampleBSchema) 49 | .option("header", true) 50 | .load(abfssRootPath+"/testcase/fifteen/exampleInputB/exampleInputB.csv") 51 | ) 52 | 53 | // COMMAND ---------- 54 | 55 | val outputDf = exampleA.join(exampleB, exampleA("id") === exampleB("id"), "inner").drop(exampleB("id")) 56 | outputDf.createOrReplaceTempView("temptable") 57 | 58 | // COMMAND ---------- 59 | 60 | // MAGIC %python 61 | // MAGIC pyOutputDf = spark.sql("SELECT * FROM temptable") 62 | // MAGIC pyOutputDf.repartition(1).write.mode("overwrite").format("csv").save(outputRootPath+"/testcase/fifteen/abfss-in-abfss-out-folder/") 63 | 64 | // COMMAND ---------- 65 | 66 | -------------------------------------------------------------------------------- /tests/environment/sources/mysql.bicep: -------------------------------------------------------------------------------- 1 | @description('Server Name for Azure database for MySQL') 2 | param serverName string = uniqueString('mysql', resourceGroup().id) 3 | 4 | @description('Database administrator login name') 5 | @minLength(1) 6 | param administratorLogin string 7 | 8 | @description('Database administrator password') 9 | @minLength(8) 10 | @secure() 11 | param administratorLoginPassword string 12 | 13 | @description('Azure database for MySQL compute capacity in vCores (2,4,8,16,32)') 14 | param skuCapacity int = 2 15 | 16 | @description('Azure database for MySQL sku name ') 17 | param skuName string = 'B_Gen5_2' 18 | 19 | @description('Azure database for MySQL Sku Size ') 20 | param SkuSizeMB int = 5120 21 | 22 | @description('Azure database for MySQL pricing tier') 23 | @allowed([ 24 | 'Basic' 25 | 'GeneralPurpose' 26 | 'MemoryOptimized' 27 | ]) 28 | param SkuTier string = 'Basic' 29 | 30 | @description('Azure database for MySQL sku family') 31 | param skuFamily string = 'Gen5' 32 | 33 | @description('MySQL version') 34 | @allowed([ 35 | '5.6' 36 | '5.7' 37 | '8.0' 38 | ]) 39 | param mysqlVersion string = '8.0' 40 | 41 | @description('Location for all resources.') 42 | param location string = resourceGroup().location 43 | 44 | @description('MySQL Server backup retention days') 45 | param backupRetentionDays int = 7 46 | 47 | @description('Geo-Redundant Backup setting') 48 | param geoRedundantBackup string = 'Disabled' 49 | 50 | 51 | var firewallrules = [ 52 | { 53 | Name: 'rule1' 54 | StartIpAddress: '0.0.0.0' 55 | EndIpAddress: '255.255.255.255' 56 | } 57 | ] 58 | 59 | resource mysqlDbServer 'Microsoft.DBforMySQL/servers@2017-12-01' = { 60 | name: serverName 61 | location: location 62 | sku: { 63 | name: skuName 64 | tier: SkuTier 65 | capacity: skuCapacity 66 | size: '${SkuSizeMB}' //a string is expected here but a int for the storageProfile... 67 | family: skuFamily 68 | } 69 | properties: { 70 | createMode: 'Default' 71 | version: mysqlVersion 72 | administratorLogin: administratorLogin 73 | administratorLoginPassword: administratorLoginPassword 74 | storageProfile: { 75 | storageMB: SkuSizeMB 76 | backupRetentionDays: backupRetentionDays 77 | geoRedundantBackup: geoRedundantBackup 78 | } 79 | } 80 | } 81 | 82 | @batchSize(1) 83 | resource firewallRules 'Microsoft.DBforMySQL/servers/firewallRules@2017-12-01' = [for rule in firewallrules: { 84 | name: '${mysqlDbServer.name}/${rule.Name}' 85 | properties: { 86 | startIpAddress: rule.StartIpAddress 87 | endIpAddress: rule.EndIpAddress 88 | } 89 | }] 90 | -------------------------------------------------------------------------------- /function-app/adb-to-purview/src/Function.Domain/Providers/IHttpClientProvider.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | using System; 5 | using System.Collections.Generic; 6 | using System.Net.Http; 7 | using System.Net.Http.Headers; 8 | using System.Threading.Tasks; 9 | namespace Function.Domain.Providers 10 | { 11 | public interface IHttpClientManager 12 | { 13 | /// 14 | /// PostAsync with Token 15 | /// 16 | /// 17 | /// 18 | /// 19 | /// 20 | Task PostAsync(Uri url, HttpContent content, AuthenticationHeaderValue accessToken, IDictionary? headers = null); 21 | 22 | /// 23 | /// 24 | /// 25 | /// 26 | /// 27 | /// 28 | /// 29 | /// 30 | Task PostAsync(Uri url, HttpContent content, string accessToken, IDictionary? headers = null); 31 | 32 | /// 33 | /// GetAsync 34 | /// 35 | /// 36 | /// 37 | /// 38 | Task GetAsync(Uri url, AuthenticationHeaderValue accessToken); 39 | 40 | /// 41 | /// DeleteAsync 42 | /// 43 | /// 44 | /// 45 | /// 46 | Task DeleteAsync(Uri url, string accessToken); 47 | 48 | /// 49 | /// GetAsync 50 | /// 51 | /// 52 | /// 53 | /// 54 | Task GetAsync(Uri url, string accessToken); 55 | 56 | /// 57 | /// GetAsync 58 | /// 59 | /// 60 | /// 61 | /// 62 | /// 63 | /// 64 | Task GetAsync(string url, string accessToken, IDictionary? headers = null); 65 | 66 | /// 67 | /// GetAsync 68 | /// 69 | /// 70 | /// 71 | /// 72 | Task GetAsync(string url, AuthenticationHeaderValue accessToken, IDictionary? headers = null); 73 | 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /tests/integration/jobdefs/hive3-tests-expectations.json: -------------------------------------------------------------------------------- 1 | [ 2 | "databricks://.azuredatabricks.net/jobs/", 3 | "databricks://.azuredatabricks.net/jobs//tasks/hive-in-hive-out-insert", 4 | "databricks://.azuredatabricks.net/jobs//tasks/hive-in-hive-out-insert/processes/2CE3088B4BAADD102F97D92B97F3AB79->E14B63BA5130659288E6B5DB7FC7F232", 5 | "databricks://.azuredatabricks.net/notebooks/Shared/examples/hive-in-hive-out-insert", 6 | "databricks://.azuredatabricks.net/jobs//tasks/abfss-in-hive_notmgd_saveAsTable-out", 7 | "databricks://.azuredatabricks.net/jobs//tasks/abfss-in-hive_notmgd_saveAsTable-out/processes/575BF7CF92625D35D6B9309C9561FE0A->43E1EB2B6E2B692F3AFDDDBD63762F41", 8 | "databricks://.azuredatabricks.net/notebooks/Shared/examples/abfss-in-hive+notmgd+saveAsTable-out", 9 | "databricks://.azuredatabricks.net/jobs//tasks/hive_abfss-in-hive_abfss-out-insert", 10 | "databricks://.azuredatabricks.net/jobs//tasks/hive_abfss-in-hive_abfss-out-insert/processes/0366CD2735F426A339DB69EBB00A6ABC->95F7EE6DC3AB03275F8FE27E98838D54", 11 | "databricks://.azuredatabricks.net/notebooks/Shared/examples/hive+abfss-in-hive+abfss-out-insert", 12 | "databricks://.azuredatabricks.net/jobs//tasks/hive_mgd_not_default-in-hive_mgd_not_default-out-insert", 13 | "databricks://.azuredatabricks.net/jobs//tasks/hive_mgd_not_default-in-hive_mgd_not_default-out-insert/processes/13AA3B6322616FF3E554C6A109EBAB5C->6FCA021CCAD4C906D5C29512215F86C9", 14 | "databricks://.azuredatabricks.net/notebooks/Shared/examples/hive+mgd+not+default-in-hive+mgd+not+default-out-insert", 15 | "databricks://.azuredatabricks.net/jobs//tasks/hive_mnt-in-hive_mnt-out-insert", 16 | "databricks://.azuredatabricks.net/jobs//tasks/hive_mnt-in-hive_mnt-out-insert/processes/0366CD2735F426A339DB69EBB00A6ABC->95F7EE6DC3AB03275F8FE27E98838D54", 17 | "databricks://.azuredatabricks.net/notebooks/Shared/examples/hive+mnt-in-hive+mnt-out-insert", 18 | "databricks://.azuredatabricks.net/jobs//tasks/abfss-in-hive_saveAsTable-out", 19 | "databricks://.azuredatabricks.net/jobs//tasks/abfss-in-hive_saveAsTable-out/processes/D691CD0248B7A179C249AE6DA86A9A69->1073C801CC5F362F10F1CD1FFBA1972C", 20 | "databricks://.azuredatabricks.net/notebooks/Shared/examples/abfss-in-hive+saveAsTable-out" 21 | ] 22 | -------------------------------------------------------------------------------- /tests/environment/sources/postgres.bicep: -------------------------------------------------------------------------------- 1 | @description('Server Name for Azure database for PostgreSQL') 2 | param serverName string = uniqueString('postgres', resourceGroup().id) 3 | 4 | @description('Database administrator login name') 5 | @minLength(1) 6 | param administratorLogin string 7 | 8 | @description('Database administrator password') 9 | @minLength(8) 10 | @secure() 11 | param administratorLoginPassword string 12 | 13 | @description('Azure database for PostgreSQL compute capacity in vCores (2,4,8,16,32)') 14 | param skuCapacity int = 2 15 | 16 | @description('Azure database for PostgreSQL sku name') 17 | param skuName string = 'B_Gen5_2' 18 | 19 | @description('Azure database for PostgreSQL Sku Size') 20 | param skuSizeMB int = 5120 21 | 22 | @description('Azure database for PostgreSQL pricing tier') 23 | @allowed([ 24 | 'Basic' 25 | 'GeneralPurpose' 26 | 'MemoryOptimized' 27 | ]) 28 | param skuTier string = 'Basic' 29 | 30 | @description('Azure database for PostgreSQL sku family') 31 | param skuFamily string = 'Gen5' 32 | 33 | @description('PostgreSQL version') 34 | @allowed([ 35 | '9.5' 36 | '9.6' 37 | '10' 38 | '10.0' 39 | '10.2' 40 | '11' 41 | ]) 42 | param postgresqlVersion string = '11' 43 | 44 | @description('Location for all resources.') 45 | param location string = resourceGroup().location 46 | 47 | @description('PostgreSQL Server backup retention days') 48 | param backupRetentionDays int = 7 49 | 50 | @description('Geo-Redundant Backup setting') 51 | param geoRedundantBackup string = 'Disabled' 52 | 53 | var firewallrules = [ 54 | { 55 | Name: 'rule1' 56 | StartIpAddress: '0.0.0.0' 57 | EndIpAddress: '255.255.255.255' 58 | } 59 | ] 60 | 61 | resource server 'Microsoft.DBforPostgreSQL/servers@2017-12-01' = { 62 | name: serverName 63 | location: location 64 | sku: { 65 | name: skuName 66 | tier: skuTier 67 | capacity: skuCapacity 68 | size: '${skuSizeMB}' 69 | family: skuFamily 70 | } 71 | properties: { 72 | createMode: 'Default' 73 | version: postgresqlVersion 74 | administratorLogin: administratorLogin 75 | administratorLoginPassword: administratorLoginPassword 76 | storageProfile: { 77 | storageMB: skuSizeMB 78 | backupRetentionDays: backupRetentionDays 79 | geoRedundantBackup: geoRedundantBackup 80 | } 81 | } 82 | 83 | } 84 | 85 | @batchSize(1) 86 | resource firewallRules 'Microsoft.DBforPostgreSQL/servers/firewallRules@2017-12-01' = [for rule in firewallrules: { 87 | name: '${server.name}/${rule.Name}' 88 | properties: { 89 | startIpAddress: rule.StartIpAddress 90 | endIpAddress: rule.EndIpAddress 91 | } 92 | }] 93 | -------------------------------------------------------------------------------- /assets/img/extend-source/QualifiedName.svg: -------------------------------------------------------------------------------- 1 | "qualifiedName": "mssql://{nameSpcBodyParts[0]}/{nameSpcNameVals['database']}/{nameGroups[0].parts[0]}/{nameGroups[0].parts[1]}","purviewDataType": "azure_synapse_dedicated_sql_table","QualifiedName":"mssql://purviewadbsynapsews.sql.azuresynapse.net/SQLPool1/sales/region" --------------------------------------------------------------------------------