├── CODE_OF_CONDUCT.md ├── LICENSE ├── Purview-API-Powershell.pdf ├── PurviewSDKSearchEntitySemaphore.cs ├── Purview_API_Reference.csv ├── Purview_API_Reference_Parameters.csv ├── README.md ├── SECURITY.md ├── SUPPORT.md ├── azure-purview-rest-api-specs.zip ├── purview_atlas_eventhub_sample.py └── purview_atlas_eventhub_sample_list_classifications_AB.py /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Microsoft Open Source Code of Conduct 2 | 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 4 | 5 | Resources: 6 | 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/) 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Microsoft Corporation. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE 22 | -------------------------------------------------------------------------------- /Purview-API-Powershell.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure/Azure-Purview-API-PowerShell/a9f87fe672f53e78025400d0bf4c2cdfc956fa5c/Purview-API-Powershell.pdf -------------------------------------------------------------------------------- /PurviewSDKSearchEntitySemaphore.cs: -------------------------------------------------------------------------------- 1 | using Azure; 2 | using Azure.Analytics.Purview.Catalog; 3 | using Azure.Core; 4 | using DGCM.Purview.Engine.Models; 5 | using System; 6 | using System.Collections.Concurrent; 7 | using System.Collections.Generic; 8 | using System.Linq; 9 | using System.Net; 10 | using System.Text.Json; 11 | using System.Threading; 12 | using System.Threading.Tasks; 13 | namespace DGCM.Purview.Engine.Services { 14 | public class PurviewService : IPurviewService { 15 | private readonly PurviewCatalogClient _purviewClient; 16 | public readonly int MAX_RESULTS_PER_PAGE = 1000; 17 | public PurviewService(PurviewCatalogClient purviewClient) { 18 | _purviewClient = purviewClient; 19 | } 20 | public async Task SearchByKeywords(string keywords, 21 | int limit = 50, 22 | int offset = 0) { 23 | var searchSchema = new PurviewSearchParameters { 24 | Keywords = keywords, 25 | Limit = limit, 26 | Offset = offset, 27 | }; 28 | var serializedSearchSchema = RequestContent.Create(searchSchema); 29 | var response = await _purviewClient.SearchAsync(serializedSearchSchema); 30 | if (response.Status != (int)HttpStatusCode.OK) { 31 | throw new Exception("Purview's SearchAsync has failed!"); 32 | } 33 | return await JsonSerializer.DeserializeAsync( 34 | response.Content.ToStream()); 35 | } 36 | public async Task GetAll() { 37 | var result = await SearchByKeywords("*", MAX_RESULTS_PER_PAGE, 0); 38 | if (result.SearchCount < MAX_RESULTS_PER_PAGE) { 39 | return result; 40 | } 41 | int remainingPages = 42 | (int)Math.Ceiling(result.SearchCount / (double)MAX_RESULTS_PER_PAGE) - 43 | 1; // TODO: Remove this after Purview fixes the offset limit. 44 | remainingPages = Math.Min(remainingPages, 100); 45 | var searchTasks = 46 | (Enumerable.Range(1, remainingPages)).Select(async i => { 47 | var page = await SearchByKeywords("*", MAX_RESULTS_PER_PAGE, 48 | i * MAX_RESULTS_PER_PAGE); 49 | result.Value.AddRange(page.Value); 50 | }); 51 | await Task.WhenAll(searchTasks); 52 | return result; 53 | } /// 54 | public async Task GetEntityById(string id) { 55 | try { 56 | var response = await _purviewClient.Entities.GetByGuidAsync( 57 | id, new RequestOptions()); 58 | return await JsonSerializer.DeserializeAsync( 59 | response.Content.ToStream()); 60 | } catch (RequestFailedException ex) { 61 | throw new RequestFailedException( 62 | "Purview's GetByGuidAsync has failed! Does the provided GUID exist?", 63 | ex); 64 | } 65 | } 66 | public async Task> GetAllEnriched( 67 | Action progressCallback = null, int maxConcurrentTasks = 250) { 68 | progressCallback("starting Purview search"); 69 | var purviewSearchResult = await GetAll(); 70 | progressCallback( 71 | $"Purview search done: {purviewSearchResult.Value.Count} results fetched"); 72 | var completeEntities = new ConcurrentBag(); 73 | using (var semaphore = 74 | new SemaphoreSlim(initialCount: maxConcurrentTasks)) { 75 | int count = 0; 76 | var t0 = DateTime.Now.TimeOfDay; 77 | progressCallback("starting GetEntityById"); 78 | ConcurrentBag tasks = new ConcurrentBag(); 79 | foreach (var item in purviewSearchResult.Value) { 80 | semaphore.Wait(); 81 | var t = Task.Factory.StartNew(async () => { 82 | try { 83 | var getByIdResult = await GetEntityById(item.Id); 84 | completeEntities.Add( 85 | PurviewCompleteEntity.MergeEntityAndSearchResult( 86 | getByIdResult.Entity, item)); 87 | } catch (RequestFailedException) { 88 | var getByIdResult = new PurviewGetByGuidResult() { 89 | Entity = new PurviewEntity { Guid = item.Id } 90 | }; 91 | completeEntities.Add( 92 | PurviewCompleteEntity.MergeEntityAndSearchResult( 93 | getByIdResult.Entity, item)); 94 | } finally { 95 | if (count % 100 == 0 && count != 0) { 96 | progressCallback( 97 | $"{count}/{purviewSearchResult.Value.Count} details fetched - {(DateTime.Now.TimeOfDay - t0).TotalSeconds}s"); 98 | t0 = DateTime.Now.TimeOfDay; 99 | } 100 | count++; 101 | semaphore.Release(); 102 | } 103 | }); 104 | tasks.Add(t); 105 | } 106 | await Task.WhenAll(tasks); 107 | } 108 | return completeEntities; 109 | } 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /Purview_API_Reference.csv: -------------------------------------------------------------------------------- 1 | Category,Command,Method,APIURIDomain,APIURIPath,APIVersion 2 | management,checkNameAvailability,POST,management.azure.com,/subscriptions/{subscriptionId}/providers/Microsoft.Purview/checkNameAvailability?accountName={accountName},api-version=2021-07-01 3 | management,createAccount,PUT,management.azure.com,/subscriptions/{subscriptionId}/resourceGroups/{resourceGroupName}/providers/Microsoft.Purview/accounts/{accountName},api-version=2021-07-01 4 | management,defaultAccount,GET,management.azure.com,/providers/Microsoft.Purview/getDefaultAccount,api-version=2021-07-01 5 | management,deleteAccount,DELETE,management.azure.com,/subscriptions/{subscriptionId}/resourceGroups/{resourceGroupName}/providers/Microsoft.Purview/accounts/{accountName},api-version=2021-07-01 6 | management,deletePrivateEndpoint,DELETE,management.azure.com,/subscriptions/{subscriptionId}/resourceGroups/{resourceGroupName}/providers/Microsoft.Purview/accounts/{accountName}/privateEndpointConnections/{privateEndpointConnectionName},api-version=2021-07-01 7 | management,listKeys,POST,management.azure.com,/subscriptions/{subscriptionId}/resourceGroups/{resourceGroupName}/providers/Microsoft.Purview/accounts/{accountName}/listkeys,api-version=2021-07-01 8 | management,listOperations,GET,management.azure.com,/providers/Microsoft.Purview/operations,api-version=2021-07-01 9 | management,listPrivateLinkResources,GET,management.azure.com,/subscriptions/{subscriptionId}/resourceGroups/{resourceGroupName}/providers/Microsoft.Purview/accounts/{accountName}/privateLinkResources/{groupId},api-version=2021-07-01 10 | management,putPrivateEndpoint,PUT,management.azure.com,/subscriptions/{subscriptionId}/resourceGroups/{resourceGroupName}/providers/Microsoft.Purview/accounts/{accountName}/privateEndpointConnections/{privateEndpointConnectionName},api-version=2021-07-01 11 | management,readAccount,GET,management.azure.com,/subscriptions/{subscriptionId}/resourceGroups/{resourceGroupName}/providers/Microsoft.Purview/accounts/{accountName},api-version=2021-07-01 12 | management,readAccounts,GET,management.azure.com,/subscriptions/{subscriptionId}/providers/Microsoft.Purview/accounts,api-version=2021-07-01 13 | management,readPrivateEndpoint,GET,management.azure.com,/subscriptions/{subscriptionId}/resourceGroups/{resourceGroupName}/providers/Microsoft.Purview/accounts/{accountName}/privateEndpointConnections/{privateEndpointConnectionName},api-version=2021-07-01 14 | management,readPrivateEndpoints,GET,management.azure.com,/subscriptions/{subscriptionId}/resourceGroups/{resourceGroupName}/providers/Microsoft.Purview/accounts/{accountName}/privateEndpointConnections,api-version=2021-07-01 15 | management,removeDefaultAccount,POST,management.azure.com,/providers/Microsoft.Purview/removeDefaultAccount,api-version=2021-07-01 16 | management,setDefaultAccount,POST,management.azure.com,/providers/Microsoft.Purview/setDefaultAccount,api-version=2021-07-01 17 | management,updateAccount,PATCH,management.azure.com,/subscriptions/{subscriptionId}/resourceGroups/{resourceGroupName}/providers/Microsoft.Purview/accounts/{accountName},api-version=2021-07-01 18 | credential,delete,DELETE,.proxy.purview.azure.com,/credentials/{credentialName},api-version=2021-07-01 19 | credential,put,PUT,.proxy.purview.azure.com,/credentials/{credentialName},api-version=2021-07-01 20 | credential,read,GET,.proxy.purview.azure.com,/credentials/{credentialName},api-version=2021-07-01 21 | entity,create,POST,.purview.azure.com,/catalog/atlas/v2/entity,api-version=2021-07-01 22 | entity,createBulk,POST,.purview.azure.com,/catalog/atlas/v2/entity/bulk,api-version=2021-07-01 23 | entity,createBulkClassification,POST,.purview.azure.com,/catalog/atlas/v2/entity/bulk/classification,api-version=2021-07-01 24 | entity,createBulkSetClassifications,POST,.purview.azure.com,/catalog/atlas/v2/entity/bulk/setClassifications,api-version=2021-07-01 25 | entity,createClassifications,POST,.purview.azure.com,/catalog/atlas/v2/entity/guid/{guid}/classifications,api-version=2021-07-01 26 | entity,createUniqueAttributeClassifications,POST,.purview.azure.com,/catalog/atlas/v2/entity/uniqueAttribute/type/{typeName}/classifications,api-version=2021-07-01 27 | entity,delete,DELETE,.purview.azure.com,/catalog/atlas/v2/entity/guid/{guid},api-version=2021-07-01 28 | entity,deleteBulk,DELETE,.purview.azure.com,/catalog/atlas/v2/entity/bulk,api-version=2021-07-01 29 | entity,deleteClassification,DELETE,.purview.azure.com,/catalog/atlas/v2/entity/guid/{guid}/classification/{classificationName},api-version=2021-07-01 30 | entity,deleteUniqueAttribute,DELETE,.purview.azure.com,/catalog/atlas/v2/entity/uniqueAttribute/type/{typeName},api-version=2021-07-01 31 | entity,deleteUniqueAttributeClassification,DELETE,.purview.azure.com,/catalog/atlas/v2/entity/uniqueAttribute/type/{typeName}/classification/{classificationName},api-version=2021-07-01 32 | entity,put,PUT,.purview.azure.com,/catalog/atlas/v2/entity/guid/{guid},api-version=2021-07-01 33 | entity,putClassifications,PUT,.purview.azure.com,/catalog/atlas/v2/entity/guid/{guid}/classifications,api-version=2021-07-01 34 | entity,putUniqueAttribute,PUT,.purview.azure.com,/catalog/atlas/v2/entity/uniqueAttribute/type/{typeName},api-version=2021-07-01 35 | entity,putUniqueAttributeClassifications,PUT,.purview.azure.com,/catalog/atlas/v2/entity/uniqueAttribute/type/{typeName}/classifications,api-version=2021-07-01 36 | entity,read,GET,.purview.azure.com,/catalog/atlas/v2/entity/guid/{guid},api-version=2021-07-01 37 | entity,readBulk,GET,.purview.azure.com,/catalog/atlas/v2/entity/bulk,api-version=2021-07-01 38 | entity,readBulkUniqueAttribute,GET,.purview.azure.com,/catalog/atlas/v2/entity/bulk/uniqueAttribute/type/{typeName},api-version=2021-07-01 39 | entity,readClassification,GET,.purview.azure.com,/catalog/atlas/v2/entity/guid/{guid}/classification/{classificationName},api-version=2021-07-01 40 | entity,readClassifications,GET,.purview.azure.com,/catalog/atlas/v2/entity/guid/{guid}/classifications,api-version=2021-07-01 41 | entity,readHeader,GET,.purview.azure.com,/catalog/atlas/v2/entity/guid/{guid}/header,api-version=2021-07-01 42 | entity,readUniqueAttribute,GET,.purview.azure.com,/catalog/atlas/v2/entity/uniqueAttribute/type/{typeName}?qualifiedName={qualifiedName},api-version=2021-07-01 43 | search,search,POST,.purview.azure.com,/catalog/atlas/v2/search/query?keywords={keywords}&limit={limit}&offset={offset}&filterJSON={filterJSON}&facetsJSON={facetsJSON},api-version=2021-07-01 44 | search,searchAdvanced,POST,.purview.azure.com,/catalog/api/search/advanced?keywords={keywords},api-version=2021-07-01 45 | search,searchSuggest,POST,.purview.azure.com,/catalog/api/search/suggest?keywords={keywords},api-version=2021-07-01 46 | search,searchAutocomplete,GET,.purview.azure.com,/catalog/api/search/autocomplete?keyword={keyword},api-version=2021-07-01 47 | glossary,create,POST,.purview.azure.com,/catalog/api/atlas/v2/glossary,api-version=2021-07-01 48 | glossary,createCategories,POST,.purview.azure.com,/catalog/api/atlas/v2/glossary/categories,api-version=2021-07-01 49 | glossary,createCategory,POST,.purview.azure.com,/catalog/api/atlas/v2/glossary/category,api-version=2021-07-01 50 | glossary,createTerm,POST,.purview.azure.com,/catalog/api/atlas/v2/glossary/term,api-version=2021-07-01 51 | glossary,createTerms,POST,.purview.azure.com,/catalog/api/atlas/v2/glossary/terms,api-version=2021-07-01 52 | glossary,createTermsAssignedEntities,POST,.purview.azure.com,/catalog/api/atlas/v2/glossary/terms/{termGuid}/assignedEntities,api-version=2021-07-01 53 | glossary,createTermsExport,POST,.purview.azure.com,/catalog/api/atlas/v2/glossary/{glossaryGuid}/terms/export?termGuid={termGuid},api-version=2021-07-01 54 | glossary,createTermsImport,POST,.purview.azure.com,/catalog/api/atlas/v2/glossary/{glossaryGuid}/terms/import,api-version=2021-07-01 55 | glossary,delete,DELETE,.purview.azure.com,/catalog/api/atlas/v2/glossary/{glossaryGuid},api-version=2021-07-01 56 | glossary,deleteCategory,DELETE,.purview.azure.com,/catalog/api/atlas/v2/glossary/category/{categoryGuid},api-version=2021-07-01 57 | glossary,deleteTerm,DELETE,.purview.azure.com,/catalog/api/atlas/v2/glossary/term/{termGuid},api-version=2021-07-01 58 | glossary,deleteTermsAssignedEntities,DELETE,.purview.azure.com,/catalog/api/atlas/v2/glossary/terms/{termGuid}/assignedEntities,api-version=2021-07-01 59 | glossary,update,PUT,.purview.azure.com,/catalog/api/atlas/v2/glossary/{glossaryGuid},api-version=2021-07-01 60 | glossary,putCategory,PUT,.purview.azure.com,/catalog/api/atlas/v2/glossary/category/{categoryGuid},api-version=2021-07-01 61 | glossary,putCategoryPartial,PUT,.purview.azure.com,/catalog/api/atlas/v2/glossary/category/{categoryGuid}/partial,api-version=2021-07-01 62 | glossary,putPartial,PUT,.purview.azure.com,/catalog/api/atlas/v2/glossary/{glossaryGuid}/partial,api-version=2021-07-01 63 | glossary,putTerm,PUT,.purview.azure.com,/catalog/api/atlas/v2/glossary/term/{termGuid},api-version=2021-07-01 64 | glossary,putTermPartial,PUT,.purview.azure.com,/catalog/api/atlas/v2/glossary/term/{termGuid}/partial,api-version=2021-07-01 65 | glossary,putTermsAssignedEntities,PUT,.purview.azure.com,/catalog/api/atlas/v2/glossary/terms/{termGuid}/assignedEntities,api-version=2021-07-01 66 | glossary,readAll,GET,.purview.azure.com,/catalog/api/atlas/v2/glossary,api-version=2021-07-01 67 | glossary,readOneGlossary,GET,.purview.azure.com,/catalog/api/atlas/v2/glossary/{glossaryGuid},api-version=2021-07-01 68 | glossary,readDetailed,GET,.purview.azure.com,/catalog/api/atlas/v2/glossary/{glossaryGuid}/detailed,api-version=2021-07-01 69 | glossary,readCategories,GET,.purview.azure.com,/catalog/api/atlas/v2/glossary/{glossaryGuid}/categories,api-version=2021-07-01 70 | glossary,readCategoriesHeaders,GET,.purview.azure.com,/catalog/api/atlas/v2/glossary/{glossaryGuid}/categories/headers,api-version=2021-07-01 71 | glossary,readCategory,GET,.purview.azure.com,/catalog/api/atlas/v2/glossary/category/{categoryGuid},api-version=2021-07-01 72 | glossary,readCategoryRelated,GET,.purview.azure.com,/catalog/api/atlas/v2/glossary/category/{categoryGuid}/related,api-version=2021-07-01 73 | glossary,readCategoryTerms,GET,.purview.azure.com,/catalog/api/atlas/v2/glossary/category/{categoryGuid}/terms,api-version=2021-07-01 74 | glossary,readTerm,GET,.purview.azure.com,/catalog/api/atlas/v2/glossary/term/{termGuid},api-version=2021-07-01 75 | glossary,readTerms,GET,.purview.azure.com,/catalog/api/atlas/v2/glossary/{glossaryGuid}/terms,api-version=2021-07-01 76 | glossary,readTermsAssignedEntities,GET,.purview.azure.com,/catalog/api/atlas/v2/glossary/terms/{termGuid}/assignedEntities,api-version=2021-07-01 77 | glossary,readTermsHeaders,GET,.purview.azure.com,/catalog/api/atlas/v2/glossary/{glossaryGuid}/terms/headers,api-version=2021-07-01 78 | glossary,readTermsImport,GET,.purview.azure.com,/catalog/api/atlas/v2/glossary/terms/import/{operationGuid},api-version=2021-07-01 79 | glossary,readTermsRelated,GET,.purview.azure.com,/catalog/api/atlas/v2/glossary/terms/{termGuid}/related,api-version=2021-07-01 80 | insight,fileExtensions,POST,.guardian.purview.azure.com,/reports/fileExtensions,api-version=2021-07-01 81 | insight,graphql,POST,.guardian.purview.azure.com,/graphql,api-version=2021-07-01 82 | insight,assetDataSources,POST,.purview.azure.com,/mapanddiscover/reports/asset2/dataSources,api-version=2021-07-01 83 | insight,assetDistribution,GET,.purview.azure.com,/mapanddiscover/reports/asset2/assetDistribution/getSnapshot,api-version=2021-07-01 84 | insight,fileTypeSizeTimeSeries,POST,.purview.azure.com,/mapanddiscover/reports/asset2/fileTypeSizeTimeSeries,api-version=2021-07-01 85 | insight,filesWithoutResourceSet,GET,.purview.azure.com,/mapanddiscover/reports/asset2/filesWithoutResourceSet/getSnapshot,api-version=2021-07-01 86 | insight,scanStatusSummaries,GET,.purview.azure.com,/mapanddiscover/reports/scanstatus2/summaries,api-version=2021-07-01 87 | insight,scanStatusSummariesByTs,GET,.purview.azure.com,/mapanddiscover/reports/scanstatus2/summariesbyts,api-version=2021-07-01 88 | insight,topFileTypesBySize,POST,.purview.azure.com,/mapanddiscover/reports/asset2/topFileTypesBySize,api-version=2021-07-01 89 | lineage,read,GET,.purview.azure.com,/catalog/api/atlas/v2/lineage/{guid},api-version=2021-07-01 90 | lineage,readNext,GET,.purview.azure.com,/catalog/api/atlas/v2/lineage/{guid}/next/,api-version=2021-07-01 91 | relationship,create,POST,.purview.azure.com,/catalog/api/atlas/v2/relationship,api-version=2021-07-01 92 | relationship,delete,DELETE,.purview.azure.com,/catalog/api/atlas/v2/relationship/guid/{guid},api-version=2021-07-01 93 | relationship,update,PUT,.purview.azure.com,/catalog/api/atlas/v2/relationship,api-version=2021-07-01 94 | relationship,read,GET,.purview.azure.com,/catalog/api/atlas/v2/relationship/guid/{guid},api-version=2021-07-01 95 | scan,cancelScan,POST,.purview.azure.com,/scan/datasources/{dataSourceName}/scans/{scanName}/runs/{runId}/:cancel,api-version=2018-12-01-preview 96 | scan,deleteClassificationRule,DELETE,.purview.azure.com,/scan/classificationrules/{classificationRuleName},api-version=2018-12-01-preview 97 | scan,deleteDataSource,DELETE,.purview.azure.com,/scan/datasources/{dataSourceName},api-version=2018-12-01-preview 98 | scan,deleteKeyVault,DELETE,.purview.azure.com,/scan/azureKeyVaults/{keyVaultName},api-version=2018-12-01-preview 99 | scan,deleteScan,DELETE,.purview.azure.com,/scan/datasources/{dataSourceName}/scans/{scanName},api-version=2018-12-01-preview 100 | scan,deleteScanRuleset,DELETE,.purview.azure.com,/scan/scanrulesets/{scanRulesetName},api-version=2018-12-01-preview 101 | scan,deleteTrigger,DELETE,.purview.azure.com,/scan/datasources/{dataSourceName}/scans/{scanName}/triggers/default,api-version=2018-12-01-preview 102 | scan,putClassificationRule,PUT,.purview.azure.com,/scan/classificationrules/{classificationRuleName},api-version=2018-12-01-preview 103 | scan,putDataSource,PUT,.purview.azure.com,/scan/datasources/{dataSourceName},api-version=2018-12-01-preview 104 | scan,putFilter,PUT,.purview.azure.com,/scan/datasources/{dataSourceName}/scans/{scanName}/filters/custom,api-version=2018-12-01-preview 105 | scan,putKeyVault,PUT,.purview.azure.com,/scan/azureKeyVaults/{keyVaultName},api-version=2018-12-01-preview 106 | scan,putScan,PUT,.purview.azure.com,/scan/datasources/{dataSourceName}/scans/{scanName},api-version=2018-12-01-preview 107 | scan,putScanRuleset,PUT,.purview.azure.com,/scan/scanrulesets/{scanRulesetName},api-version=2018-12-01-preview 108 | scan,putTrigger,PUT,.purview.azure.com,/scan/datasources/{dataSourceName}/scans/{scanName}/triggers/default,api-version=2018-12-01-preview 109 | scan,readClassificationRule,GET,.purview.azure.com,/scan/classificationrules/{classificationRuleName},api-version=2018-12-01-preview 110 | scan,readClassificationRuleVersions,GET,.purview.azure.com,/scan/classificationrules/{classificationRuleName}/versions,api-version=2018-12-01-preview 111 | scan,readClassificationRules,GET,.purview.azure.com,/scan/classificationrules,api-version=2018-12-01-preview 112 | scan,readDatasource,GET,.purview.azure.com,/scan/datasources/{dataSourceName}/scans,api-version=2018-12-01-preview 113 | scan,readDatasources,GET,.purview.azure.com,/scan/datasources,api-version=2018-12-01-preview 114 | scan,readFilters,GET,.purview.azure.com,/scan/datasources/{dataSourceName}/scans/{scanName}/filters/custom,api-version=2018-12-01-preview 115 | scan,readKeyVault,GET,.purview.azure.com,/scan/azureKeyVaults/{keyVaultName},api-version=2018-12-01-preview 116 | scan,readKeyVaults,GET,.purview.azure.com,/scan/azureKeyVaults,api-version=2018-12-01-preview 117 | scan,readScan,GET,.purview.azure.com,/scan/datasources/{dataSourceName}/scans/{scanName},api-version=2018-12-01-preview 118 | scan,readScanHistory,GET,.purview.azure.com,/scan/datasources/{dataSourceName}/scans/{scanName}/runs,api-version=2018-12-01-preview 119 | scan,readScanRuleset,GET,.purview.azure.com,/scan/scanrulesets/{scanRulesetName},api-version=2018-12-01-preview 120 | scan,readScanRulesets,GET,.purview.azure.com,/scan/scanrulesets,api-version=2018-12-01-preview 121 | scan,readScans,GET,.purview.azure.com,/scan/datasources/{dataSourceName}/scans,api-version=2018-12-01-preview 122 | scan,readSystemScanRuleset,GET,.purview.azure.com,/scan/systemScanRulesets/datasources/{dataSourceType},api-version=2018-12-01-preview 123 | scan,readSystemScanRulesetLatest,GET,.purview.azure.com,/scan/systemScanRulesets/versions/latest,api-version=2018-12-01-preview 124 | scan,readSystemScanRulesetVersion,GET,.purview.azure.com,/scan/systemScanRulesets/versions/{version}?dataSourceType={dataSourceType},api-version=2018-12-01-preview 125 | scan,readSystemScanRulesetVersions,GET,.purview.azure.com,/scan/systemScanRulesets/versions?dataSourceType={dataSourceType},api-version=2018-12-01-preview 126 | scan,readSystemScanRulesets,GET,.purview.azure.com,/scan/systemScanRulesets,api-version=2018-12-01-preview 127 | scan,readTrigger,GET,.purview.azure.com,/scan/datasources/{dataSourceName}/scans/{scanName}/triggers/default,api-version=2018-12-01-preview 128 | scan,createTrigger,PUT,.purview.azure.com,/scan/datasources/{dataSourceName}/scans/{scanName}/triggers/default,api-version=2018-12-01-preview 129 | scan,deleteTrigger,DELETE,.purview.azure.com,/scan/datasources/{dataSourceName}/scans/{scanName}/triggers/default,api-version=2018-12-01-preview 130 | scan,runScan,PUT,.purview.azure.com,/scan/datasources/{dataSourceName}/scans/{scanName}/runs/{uuid},api-version=2018-12-01-preview 131 | types,createTypeDefs,POST,.purview.azure.com,/catalog/api/atlas/v2/types/typedefs,api-version=2021-07-01 132 | types,deleteTypeDef,DELETE,.purview.azure.com,/catalog/api/atlas/v2/types/typedef/name/{name},api-version=2021-07-01 133 | types,deleteTypeDefs,DELETE,.purview.azure.com,/catalog/api/atlas/v2/types/typedefs,api-version=2021-07-01 134 | types,putTypeDefs,PUT,.purview.azure.com,/catalog/api/atlas/v2/types/typedefs,api-version=2021-07-01 135 | types,readClassificationDef,GET,.purview.azure.com,/catalog/api/atlas/v2/types/classificationdef/{typeDefKey}/{typeDefVal},api-version=2021-07-01 136 | types,readEntityDef,GET,.purview.azure.com,/catalog/api/atlas/v2/types/entitydef/{typeDefKey}/{typeDefVal},api-version=2021-07-01 137 | types,readEnumDef,GET,.purview.azure.com,/catalog/api/atlas/v2/types/enumdef/{typeDefKey}/{typeDefVal},api-version=2021-07-01 138 | types,readRelationshipDef,GET,.purview.azure.com,/catalog/api/atlas/v2/types/relationshipdef/{typeDefKey}/{typeDefVal},api-version=2021-07-01 139 | types,readStatistics,GET,.purview.azure.com,/catalog/api/atlas/v2/types/statistics,api-version=2021-07-01 140 | types,readStructDef,GET,.purview.azure.com,/catalog/api/atlas/v2/types/structdef/{typeDefKey}/{typeDefVal},api-version=2021-07-01 141 | types,readTermTemplateDef,GET,.purview.azure.com,/catalog/api/atlas/v2/types/termtemplatedef/{typeDefKey}/{typeDefVal},api-version=2021-07-01 142 | types,readTypeDef,GET,.purview.azure.com,/catalog/api/atlas/v2/types/typedef/{typeDefKey}/{typeDefVal},api-version=2021-07-01 143 | types,readTypeDefs,GET,.purview.azure.com,/catalog/api/atlas/v2/types/typedefs,api-version=2021-07-01 144 | types,readTypeDefsHeaders,GET,.purview.azure.com,/catalog/api/atlas/v2/types/typedefs/headers,api-version=2021-07-01 145 | metadatapolicy,readMetadataRoles,GET,.purview.azure.com,/policystore/metadataroles,api-version=2021-07-01 146 | metadatapolicy,readMetadataPolicyByCollection,GET,.purview.azure.com,/policystore/collections/{collectionName}/metadataPolicy,api-version=2021-07-01 147 | metadatapolicy,readMetadataPolicyByPolicyID,GET,.purview.azure.com,/policystore/metadataPolicies/{policyId},api-version=2021-07-01 148 | metadatapolicy,readAllMetadataPolicies,GET,.purview.azure.com,/policystore/metadataPolicies,api-version=2021-07-01 149 | metadatapolicy,putMetadataPolicy,PUT,.purview.azure.com,/policystore/metadataPolicies/{policyId},api-version=2021-07-01 150 | collections,readAllCollections,GET,.purview.azure.com,/account/collections,api-version=2019-11-01-preview 151 | collections,createOrUpdateCollection,PUT,.purview.azure.com,/account/collections/{collectionName6Chars},api-version=2019-11-01-preview 152 | collections,deleteCollection,DELETE,.purview.azure.com,/account/collections/{collectionName6Chars},api-version=2019-11-01-preview 153 | -------------------------------------------------------------------------------- /Purview_API_Reference_Parameters.csv: -------------------------------------------------------------------------------- 1 | API_PARAMETER,TYPE,DESCRIPTION 2 | {accountName},[string],Thenameoftheaccount 3 | {categoryGuid},[string],Thegloballyuniqueidentifierofthecategory 4 | {classificationName},[string],Thenameoftheclassification 5 | {classificationRuleName},[string],Nameoftheclassificationrule 6 | {dataSource},[string],Specifyadatasource(AzureBlobStorage|aws|AzureDataLakeStorageGen2) 7 | {dataSourceName},[string],Nameofthedatasource 8 | {dataSourceType},[string],Typeofdatasource 9 | {depth},[integer],Thenumberofhopsforlineage[default:3] 10 | {direction},[string],Thedirectionofthelineagewhichcouldbe:INPUT:OUTPUT:BOTH[default:BOTH] 11 | {extendedInfo},[boolean],Limitswhetherincludesextendedinformation[default:false] 12 | {extInfo},[boolean],extInfo[default:false] 13 | {facets-file},[string],Filepathtoafacetsjsonfile 14 | {fileType},[string],Specifyafiletype(csv|avro|parquet|json|snappy|pptx|docx|xlsx)[default:csv] 15 | {filter-file},[string],Filepathtoafilterjsonfile 16 | {glossaryGuid},[string],Thegloballyuniqueidentifierforglossary 17 | {glossaryName},[string],Thenameoftheglossary 18 | {groupId},[string],Thegroupidentifier 19 | {guid},[string],GloballyUniqueIdentifier(XXXXXXXXXX-YYYYY-ZZZZZZ-AAAAAAAAAAAAAA]Format 20 | {ignoreRelationships},[boolean],Whethertoignorerelationshipattributes[default:false] 21 | {includeTermHierarchy},[boolean],Includetermtemplatereferences[default:false] 22 | {includeTermTemplate},[boolean],Whethertoincludetermtemplatedef[default:false] 23 | {keyword},[string],KeywordsToSearch 24 | {keywords},[string],SearchByKeyword 25 | {keyVaultName},[string],Nameofthekeyvault 26 | {limit},[integer],Thepagesize-bydefaultthereisnopaging[default:1000] 27 | {minExtInfo},[boolean],Whethertoreturnminimalinformationforreferredentities[default:false] 28 | {name},[string],Thenameoftheattribute 29 | {numberOfDays},[integer],Trailingtimeperiodindays[default:30] 30 | {offset},[integer],Offsetforpaginationpurpose[default:0] 31 | {operationGuid},[string],Thegloballyuniqueidentifierforasyncoperation/job 32 | {payload-file},[string],FilepathtoavalidJSONdocument 33 | {privateEndpointConnectionName},[string],Thenameoftheprivateendpointconnection 34 | {purviewName},[string],AzurePurviewaccountname 35 | {qualifiedName},[string],Thequalifiednameoftheentity 36 | {resourceGroupName},[string],Thenameoftheresourcegroup 37 | {runId},[string],Theuniqueidentifieroftherun 38 | {scanName},[string],Nameofthescan 39 | {scanRulesetName},[string],Nameofthescanruleset 40 | {scope},[string],ThescopeobjectID(egsubIDortenantID) 41 | {scopeTenantId},[string],Thescopetenantinwhichthedefaultaccountisset 42 | {scopeType},[string],Thescopewherethedefaultaccountisset(TenantorSubscription) 43 | {sort},[string],ASCorDESC[default:ASC] 44 | {subscriptionId},[string],ThesubscriptionID 45 | {takeTopCount},[integer],Specifythemaximumnumberofrecordstoreturn[default:10] 46 | {termGuid},[string],Thegloballyuniqueidentifierforglossaryterm 47 | {type},[string],Typedefnameassearchfilter(classification|entity|enum|relationship|struct) 48 | {typeDefKey},[string],TypedefKey 49 | {typeDefVal},[string],TypedefVal 50 | {typeName},[string],Thenameofthetype 51 | {width},[integer],Thenumberofmaxexpandingwidthinlineage[default:6] 52 | {policyId},[string],PolicyIDForAuthZfineGrainedMetadataPolicyAPIs 53 | {collectionName},[string],CollectionNameForAuthZfineGrainedMetadataPolicyAPIs 54 | {boolean},[boolean],bool[true:false] 55 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Azure-Purview-API-via-PowerShell 2 | 3 | ************ 4 | **Execute Azure Purview RESTful APIs via PowerShell** 5 | ***************************************************** 6 | - Azure Purview REST APIs via Powershell. Based on Microsoft Official [Azure Purview REST API Documentation](https://docs.microsoft.com/en-us/rest/api/purview/) 7 | 8 | ## Download & Installation 9 | - Download & Install The Script : https://aka.ms/Purview-API-PS 10 | - Note: You need Powershell v7.x.x. Please [Download and upgrade your Powershell to v7](https://docs.microsoft.com/en-us/powershell/scripting/install/installing-powershell-core-on-windows?view=powershell-7.1). 11 | 12 | ## Usage Steps 13 | - Open PowerShell on your Windows machine. Press "Windows" Key, type powershell, click "Run as administrator". 14 | - On powershell prompt, enter "cd ~/Documents" to change to your User Home/Documents Folder. You may move to any other folder of your choice but make sure you have write permissions on whichever directory you choose. 15 | - Then run the following command(s) to execute the Purview API Utility Script. 16 | - There are two modes of operation: 17 | 1. *Interactive Mode* 18 | 2. *Batch Mode* 19 | 20 | 21 | ## Interactive & UI Mode: Usage 22 | - In the Interactive & UI Mode, lot of interactive inline help and prompts will be given to make your usage experience friendly and easy. In this mode, any of the APIs listed here : [Purview_API_Reference.csv](https://github.com/Azure/Azure-Purview-API-PowerShell/blob/main/Purview_API_Reference.csv) can be executed. However, when you need to write batch scripts or automation for Purview, or even scheduled cron jobs at regular intervals, you must use the Batch Mode (next section detailed below). 23 | - Note: Interactive Mode restricts you to the APIs listed in the CSV only since it builds the menu items from the CSV. However, in the Batch Mode, any other APIs not listed in the CSV, or any other variations of the APIs or even extra additional parameters can be supplied. 24 | - For anyone who is running first time or getting familiarized with this tool may choose to use Interactive Mode 25 | - For those who are using this utility tool regularly, Batch Mode described below is recommended. 26 | ```PowerShell 27 | PS >> Purview-API-PowerShell -PurviewAccountName {your_purview_account_name} 28 | ``` 29 | 30 | ## Batch, Scripting & Automation Mode: Usage Examples 31 | - Run Azure Purview APIs directly without any interactivity help or prompts. 32 | - Useful when building scripts or automation or scheduled cron jobs. 33 | - There are a few sample APIs shown via the commands below but you may execute any other Purview API from the Microsoft Official [Azure Purview REST API Documentation](https://docs.microsoft.com/en-us/rest/api/purview/). All APIs given in Official Purview API Docs are supported by this script in Batch Mode. 34 | ```PowerShell 35 | # Example - Get All TypeDefs In The Purview Account 36 | PS >> Purview-API-PowerShell -APIDirect -HTTPMethod GET -PurviewAPIDirectURL "https://{your-purview-account-name}.purview.azure.com/catalog/api/atlas/v2/types/typedefs?api-version=2021-07-01" -InputFile inputfile.json -OutputFile outputfile.json 37 | # Example - Get AzureKeyVaults For Registering Scans 38 | PS >> Purview-API-PowerShell -APIDirect -HTTPMethod GET -PurviewAPIDirectURL "https://{your-purview-account-name}.purview.azure.com/scan/azurekeyvaults?api-version=2021-07-01" 39 | # Example - Get All DataSources Registered In The Purview Account 40 | PS >> Purview-API-PowerShell -APIDirect -HTTPMethod GET -PurviewAPIDirectURL "https://{your-purview-account-name}.purview.azure.com/scan/datasources?api-version=2021-07-01" 41 | # Example - Get All Scan Rule Sets - User Defined Only 42 | PS >> Purview-API-PowerShell -APIDirect -HTTPMethod GET -PurviewAPIDirectURL "https://{your-purview-account-name}.purview.azure.com/scan/scanrulesets?api-version=2021-07-01" 43 | # Example - DELETE Collection 44 | PS >> Purview-API-PowerShell -APIDirect -HTTPMethod DELETE -PurviewAPIDirectURL "https://{your-purview-account-name}.purview.azure.com/account/collections/{6-char-collection-id}?&api-version=2019-11-01-preview" 45 | ``` 46 | - Note: InputFile and OutputFile Parameters: are not mandatory but recommended. 47 | - InputFile: For most PUT and POST APIs (-HTTPMethod PUT or -HTTPMethod POST) you will notice from the Purview API Documentation that JSON Body needs to be sent with the API Request. In these POST and PUT scenarios it is recommended you make one JSON file and supply the name of this file in InputFile parameter. It is the file name on your local drive that contains the JSON to be sent as request body with the API invokation. 48 | - OutputFile: Name of the file that contains the output of the API in JSON format. OutputFile parameter is not mandatory. If you do not supply OutputFile, do not worry, the script will automatically generate one file named "purview-api-output-{todays-date-and-time}.json" in your current directory. 49 | 50 | ## Samples & Usage Presentation 51 | [Purview-API-Powershell.pdf](https://github.com/Azure/Azure-Purview-API-PowerShell/blob/main/Purview-API-Powershell.pdf) 52 | 53 | ## Videos 54 | ### Installation Video [![Installation](https://www.powershellgallery.com/Content/Images/Branding/packageDefaultIcon.svg)](https://youtu.be/rrTYnEqPHgM) 55 | 56 | ### Interactive Mode: Video [![Interactive Mode](https://www.powershellgallery.com/Content/Images/Branding/packageDefaultIcon.svg)](https://youtu.be/0M6jRG77Wt8) 57 | 58 | ### Batch Script Automation Mode: Video [![Batch Mode](https://www.powershellgallery.com/Content/Images/Branding/packageDefaultIcon.svg)](https://youtu.be/VDkAFIG7Ii0) 59 | 60 | ************ 61 | **Benefits** 62 | ************ 63 | - While you can very well use cURL or Postman to Invoke Azure Purview APIs as well, it is generally cumbersome to extract the Azure OAuth2 *access_token* and use it appropriately in scripts or even during one-time API execution. 64 | - Powershell has very strong and user friendly integration interfaces with Azure Cloud and hence it makes it really useful to have a way to execute Azure Purview service via its APIs on Windows PowerShell. 65 | 66 | 67 | ## Contributing 68 | 69 | This project welcomes contributions and suggestions. Most contributions require you to agree to a 70 | Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us 71 | the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com. 72 | 73 | When you submit a pull request, a CLA bot will automatically determine whether you need to provide 74 | a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions 75 | provided by the bot. You will only need to do this once across all repos using our CLA. 76 | 77 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 78 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or 79 | contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. 80 | 81 | ## Trademarks 82 | 83 | This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft 84 | trademarks or logos is subject to and must follow 85 | [Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks/usage/general). 86 | Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship. 87 | Any use of third-party trademarks or logos are subject to those third-party's policies. 88 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Security 4 | 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/). 6 | 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://docs.microsoft.com/en-us/previous-versions/tn-archive/cc751383(v=technet.10)), please report it to us as described below. 8 | 9 | ## Reporting Security Issues 10 | 11 | **Please do not report security vulnerabilities through public GitHub issues.** 12 | 13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://msrc.microsoft.com/create-report). 14 | 15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://www.microsoft.com/en-us/msrc/pgp-key-msrc). 16 | 17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). 18 | 19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: 20 | 21 | * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) 22 | * Full paths of source file(s) related to the manifestation of the issue 23 | * The location of the affected source code (tag/branch/commit or direct URL) 24 | * Any special configuration required to reproduce the issue 25 | * Step-by-step instructions to reproduce the issue 26 | * Proof-of-concept or exploit code (if possible) 27 | * Impact of the issue, including how an attacker might exploit the issue 28 | 29 | This information will help us triage your report more quickly. 30 | 31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://microsoft.com/msrc/bounty) page for more details about our active programs. 32 | 33 | ## Preferred Languages 34 | 35 | We prefer all communications to be in English. 36 | 37 | ## Policy 38 | 39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://www.microsoft.com/en-us/msrc/cvd). 40 | 41 | -------------------------------------------------------------------------------- /SUPPORT.md: -------------------------------------------------------------------------------- 1 | # TODO: The maintainer of this repo has not yet edited this file 2 | 3 | **REPO OWNER**: Do you want Customer Service & Support (CSS) support for this product/project? 4 | 5 | - **No CSS support:** Fill out this template with information about how to file issues and get help. 6 | - **Yes CSS support:** Fill out an intake form at [aka.ms/spot](https://aka.ms/spot). CSS will work with/help you to determine next steps. More details also available at [aka.ms/onboardsupport](https://aka.ms/onboardsupport). 7 | - **Not sure?** Fill out a SPOT intake as though the answer were "Yes". CSS will help you decide. 8 | 9 | *Then remove this first heading from this SUPPORT.MD file before publishing your repo.* 10 | 11 | # Support 12 | 13 | ## How to file issues and get help 14 | 15 | This project uses GitHub Issues to track bugs and feature requests. Please search the existing 16 | issues before filing new issues to avoid duplicates. For new issues, file your bug or 17 | feature request as a new Issue. 18 | 19 | For help and questions about using this project, please **REPO MAINTAINER: INSERT INSTRUCTIONS HERE 20 | FOR HOW TO ENGAGE REPO OWNERS OR COMMUNITY FOR HELP. COULD BE A STACK OVERFLOW TAG OR OTHER 21 | CHANNEL. WHERE WILL YOU HELP PEOPLE?**. 22 | 23 | ## Microsoft Support Policy 24 | 25 | Support for this **PROJECT or PRODUCT** is limited to the resources listed above. 26 | -------------------------------------------------------------------------------- /azure-purview-rest-api-specs.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure/Azure-Purview-API-PowerShell/a9f87fe672f53e78025400d0bf4c2cdfc956fa5c/azure-purview-rest-api-specs.zip -------------------------------------------------------------------------------- /purview_atlas_eventhub_sample.py: -------------------------------------------------------------------------------- 1 | ### 2 | # Purview Eventhub Sample. This tool shows how to read Purview's EventHub and catch realtime Kafka notifications from the EventHub in Atlas Notifications (https://atlas.apache.org/2.0.0/Notifications.html) format. 3 | # author: github 4 | # contact : arindamba@microsoft.com 5 | # date : October 10, 2021 6 | # Azure Purview Product Group, Microsoft 7 | ### 8 | 9 | from azure.eventhub.aio import EventHubConsumerClient 10 | import asyncio 11 | import json 12 | import sys 13 | sys.excepthook = lambda *args: None 14 | 15 | connection_str = 'Endpoint=sb://atlas-xxxxxxx.servicebus.windows.net/;SharedAccessKeyName=AlternateSharedAccessKey;SharedAccessKey=XXXXYYYYYYZZZZZZ' #Copy this value from Azure Purview portal -> Purview account -> Properties -> "Atlas Kafka endpoint primary/secondary connection string" 16 | consumer_group = '$Default' 17 | eventhub_entities_name = 'atlas_entities' 18 | eventhub_hook_name = 'atlas_hook' 19 | 20 | async def on_event_entities (partition_context, event): 21 | print("Received the event: JSON: from the EventHub: ATLAS_ENTITIES / Partition ID: ", partition_context.partition_id) 22 | msgjson = json.dumps(json.loads(event.body_as_str(encoding='UTF-8')), indent=4, sort_keys=True) 23 | print(msgjson) 24 | await partition_context.update_checkpoint(event) 25 | 26 | async def on_event_hook (partition_context, event): 27 | print("Received the event: JSON: from the EventHub: ATLAS_HOOK / Partition ID: ", partition_context.partition_id) 28 | msgjson = json.dumps(json.loads(event.body_as_str(encoding='UTF-8')), indent=4, sort_keys=True) 29 | print(msgjson) 30 | await partition_context.update_checkpoint(event) 31 | 32 | async def receive(): 33 | client_entities = EventHubConsumerClient.from_connection_string(connection_str, consumer_group, eventhub_name=eventhub_entities_name) 34 | async with client_entities: 35 | await client_entities.receive( 36 | on_event=on_event_entities, 37 | starting_position="-1", 38 | ) 39 | client_hook = EventHubConsumerClient.from_connection_string(connection_str, consumer_group, eventhub_name=eventhub_hook_name) 40 | async with client_hook: 41 | await client_hook.receive( 42 | on_event=on_event_hook, 43 | starting_position="-1", 44 | ) 45 | 46 | if __name__ == '__main__': 47 | loop = asyncio.get_event_loop() 48 | try: 49 | loop.run_until_complete(receive()) 50 | except KeyboardInterrupt as e: 51 | loop.stop() 52 | 53 | #await client.get_partition_ids())) 54 | -------------------------------------------------------------------------------- /purview_atlas_eventhub_sample_list_classifications_AB.py: -------------------------------------------------------------------------------- 1 | ### 2 | # Purview Eventhub Sample. This tool shows how to read Purview's EventHub and catch realtime Kafka notifications from the EventHub in Atlas Notifications (https://atlas.apache.org/2.0.0/Notifications.html) format. 3 | # author: github 4 | # contact : arindamba@microsoft.com 5 | # date : December, 2021 6 | # Azure Purview Product Group, Microsoft 7 | ### 8 | 9 | from azure.eventhub.aio import EventHubConsumerClient 10 | import asyncio 11 | import json 12 | import sys 13 | import csv 14 | 15 | csvfilename = 'C:/users/arind/Documents/out.txt.csv' 16 | connection_str = 'Endpoint=sb://atlas-xxxxxxx.servicebus.windows.net/;SharedAccessKeyName=AlternateSharedAccessKey;SharedAccessKey=XXXXYYYYYYZZZZZZ' #Copy this value from Azure Purview portal -> Purview account -> Properties -> "Atlas Kafka endpoint primary/secondary connection string" 17 | consumer_group = '$Default' 18 | eventhub_entities_name = 'atlas_entities' 19 | eventhub_hook_name = 'atlas_hook' 20 | 21 | message_counter = 0 22 | message_with_classifications_counter = 0 23 | 24 | 25 | def processmessage(msgjsonraw): 26 | global message_counter 27 | global message_with_classifications_counter 28 | global csvfilename 29 | message_counter = message_counter + 1 30 | jsondata = json.loads(msgjsonraw) 31 | print(jsondata['message']['entity']) 32 | msgjson = json.dumps(jsondata, indent=4, sort_keys=True) 33 | print(msgjson) 34 | if 'message' in jsondata: 35 | if 'entity' in jsondata['message']: 36 | if 'classifications' in jsondata['message']['entity']: 37 | message_with_classifications_counter = message_with_classifications_counter + 1 38 | classificationscount = len(jsondata['message']['entity']['classifications']) 39 | classifications = "" 40 | for I in range(classificationscount): 41 | classifications = classifications + jsondata['message']['entity']['classifications'][I]['typeName'] + " ---&--- " 42 | guid = jsondata['message']['entity']['guid'] 43 | fqn = jsondata['message']['entity']['attributes']['qualifiedName'] 44 | print("Classifications Detected [%3d] :----> " %(classificationscount)) 45 | print("Classifications :-----> ", (classifications)) 46 | print("FQN - Full Qualified Name Path of the Asset :-----> ", (fqn)) 47 | with open(csvfilename, 'a+') as csvfile: 48 | fieldnames = ["GUID", "FQN_Fully_Qualified_Name_Asset", "#ClassificationsCount","Classifications", "JSON"] 49 | writer = csv.DictWriter(csvfile, fieldnames=fieldnames) 50 | writer.writerow({"GUID":guid, "FQN_Fully_Qualified_Name_Asset":fqn, "#ClassificationsCount":classificationscount, "Classifications":classifications, "JSON":msgjson}) 51 | print("-------------------------------------------------------------------------------------------------------------------") 52 | 53 | 54 | async def on_event_entities(partition_context, event): 55 | print("Received the event: JSON: from the EventHub: ATLAS_ENTITIES / Partition ID: ", 56 | partition_context.partition_id) 57 | msgjsonraw = event.body_as_str(encoding='UTF-8') 58 | processmessage(msgjsonraw) 59 | await partition_context.update_checkpoint(event) 60 | 61 | 62 | async def on_event_hook(partition_context, event): 63 | print("Received the event: JSON: from the EventHub: ATLAS_HOOK / Partition ID: ", 64 | partition_context.partition_id) 65 | msgjsonraw = event.body_as_str(encoding='UTF-8') 66 | processmessage(msgjsonraw) 67 | await partition_context.update_checkpoint(event) 68 | 69 | 70 | async def receive(): 71 | client_entities = EventHubConsumerClient.from_connection_string( 72 | connection_str, consumer_group, eventhub_name=eventhub_entities_name) 73 | async with client_entities: 74 | await client_entities.receive( 75 | on_event=on_event_entities, 76 | starting_position="-1", 77 | ) 78 | client_hook = EventHubConsumerClient.from_connection_string( 79 | connection_str, consumer_group, eventhub_name=eventhub_hook_name) 80 | async with client_hook: 81 | await client_hook.receive( 82 | on_event=on_event_hook, 83 | starting_position="-1", 84 | ) 85 | 86 | 87 | if __name__ == '__main__': 88 | sys.stdout = open('C:/users/arind/Documents/out.txt', 'w') 89 | loop = asyncio.get_event_loop() 90 | with open(csvfilename, 'w') as csvfile: 91 | fieldnames = ["GUID", "FQN_Fully_Qualified_Name_Asset", "#ClassificationsCount","Classifications", "JSON"] 92 | writer = csv.DictWriter(csvfile, fieldnames=fieldnames) 93 | writer.writeheader() 94 | try: 95 | loop.run_until_complete(receive()) 96 | except KeyboardInterrupt as e: 97 | loop.stop() 98 | finally: 99 | sys.stdout = open('C:/users/arind/Documents/out_summary.txt', 'w') 100 | print("Summary :-----> Total %20d Entity Messages. " %(message_counter)) 101 | print(message_with_classifications_counter, " [ ", message_with_classifications_counter/message_counter*100, " % ] Entities With Classifications.") 102 | print(message_counter-message_with_classifications_counter, " [ ", (1-message_with_classifications_counter/message_counter)*100, " % ] Entities Without Classifications.") 103 | 104 | # await client.get_partition_ids())) 105 | --------------------------------------------------------------------------------