├── .github
    └── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE.md
├── README.md
├── check-storage-usage
    ├── .gitignore
    ├── CheckStorageUsage.cs
    ├── Properties
    │   ├── serviceDependencies.json
    │   └── serviceDependencies.local.json
    ├── README.md
    ├── check-storage-usage.csproj
    ├── check-storage-usage.sln
    ├── host.json
    └── local.settings.json
├── data-lake-gen2-acl-indexing
    ├── DataLakeGen2ACLIndexing.csproj
    ├── Program.cs
    ├── README.md
    ├── SampleData
    │   ├── Files for Organization.txt
    │   ├── Private
    │   │   └── confidential.txt
    │   ├── Shared Documents
    │   │   └── public.txt
    │   └── User Documents
    │   │   ├── Alice
    │   │       ├── alice-secret.txt
    │   │       └── alice.txt
    │   │   ├── Bob
    │   │       ├── Reports
    │   │       │   ├── a.txt
    │   │       │   ├── b.txt
    │   │       │   └── c.txt
    │   │       ├── Sales
    │   │       │   ├── d.txt
    │   │       │   └── e.txt
    │   │       └── bob.txt
    │   │   └── John
    │   │       ├── Documents
    │   │           ├── a.txt
    │   │           └── b.txt
    │   │       └── john.txt
    └── appsettings.json
├── export-data
    ├── .gitignore
    ├── README.md
    ├── Sample
    │   ├── Configuration.cs
    │   ├── Document.cs
    │   ├── Program.cs
    │   ├── Sample.csproj
    │   └── local.settings-example.json
    ├── export-data.sln
    ├── export-data
    │   ├── Bound.cs
    │   ├── ContinuousExporter.cs
    │   ├── Exporter.cs
    │   ├── FilePartitionWriter.cs
    │   ├── IPartitionWriter.cs
    │   ├── Partition.cs
    │   ├── PartitionExporter.cs
    │   ├── PartitionFile.cs
    │   ├── PartitionGenerator.cs
    │   ├── Program.cs
    │   ├── Util.cs
    │   └── export-data.csproj
    └── tests
    │   ├── MockPartitionWriter.cs
    │   ├── PartitionExporterTests.cs
    │   ├── Usings.cs
    │   ├── config.example.json
    │   └── tests.csproj
├── index-backup-restore
    ├── .gitignore
    ├── README.md
    ├── v10
    │   ├── AzureSearchBackupRestoreIndex.sln
    │   └── AzureSearchBackupRestoreIndex
    │   │   ├── AzureSearchBackupRestoreIndex.csproj
    │   │   ├── AzureSearchHelper.cs
    │   │   ├── Program.cs
    │   │   └── appsettings.json
    └── v11
    │   ├── AzureSearchBackupRestoreIndex.sln
    │   └── AzureSearchBackupRestoreIndex
    │       ├── AzureSearchBackupRestoreIndex.csproj
    │       ├── AzureSearchHelper.cs
    │       ├── Program.cs
    │       └── appsettings.json
└── search-aggregations
    ├── Program.cs
    ├── README.md
    ├── appsettings.json
    └── search-aggregations.csproj


/.github/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Microsoft Open Source Code of Conduct
 2 | 
 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
 4 | 
 5 | Resources:
 6 | 
 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/)
 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/)
 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns
10 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to azure-search-dotnet-utilities
 2 | 
 3 | This project welcomes contributions and suggestions.  Most contributions require you to agree to a
 4 | Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us
 5 | the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com.
 6 | 
 7 | When you submit a pull request, a CLA bot will automatically determine whether you need to provide
 8 | a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions
 9 | provided by the bot. You will only need to do this once across all repos using our CLA.
10 | 
11 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
12 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or
13 | contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
14 | 
15 |  - [Code of Conduct](#coc)
16 |  - [Issues and Bugs](#issue)
17 |  - [Feature Requests](#feature)
18 |  - [Submission Guidelines](#submit)
19 | 
20 | ## <a name="coc"></a> Code of Conduct
21 | Help us keep this project open and inclusive. Please read and follow our [Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
22 | 
23 | ## <a name="issue"></a> Found an Issue?
24 | If you find a bug in the source code or a mistake in the documentation, you can help us by
25 | [submitting an issue](#submit-issue) to the GitHub Repository. Even better, you can
26 | [submit a Pull Request](#submit-pr) with a fix.
27 | 
28 | ## <a name="feature"></a> Want a Feature?
29 | You can *request* a new feature by [submitting an issue](#submit-issue) to the GitHub
30 | Repository. If you would like to *implement* a new feature, please submit an issue with
31 | a proposal for your work first, to be sure that we can use it.
32 | 
33 | * **Small Features** can be crafted and directly [submitted as a Pull Request](#submit-pr).
34 | 
35 | ## <a name="submit"></a> Submission Guidelines
36 | 
37 | ### <a name="submit-issue"></a> Submitting an Issue
38 | Before you submit an issue, search the archive, maybe your question was already answered.
39 | 
40 | If your issue appears to be a bug, and hasn't been reported, open a new issue.
41 | Help us to maximize the effort we can spend fixing issues and adding new
42 | features, by not reporting duplicate issues.  Providing the following information will increase the
43 | chances of your issue being dealt with quickly:
44 | 
45 | * **Overview of the Issue** - if an error is being thrown a non-minified stack trace helps
46 | * **Version** - what version is affected (e.g. 0.1.2)
47 | * **Motivation for or Use Case** - explain what are you trying to do and why the current behavior is a bug for you
48 | * **Browsers and Operating System** - is this a problem with all browsers?
49 | * **Reproduce the Error** - provide a live example or a unambiguous set of steps
50 | * **Related Issues** - has a similar issue been reported before?
51 | * **Suggest a Fix** - if you can't fix the bug yourself, perhaps you can point to what might be
52 |   causing the problem (line of code or commit)
53 | 
54 | You can file new issues by providing the above information at the corresponding repository's issues link: https://github.com/[organization-name]/[repository-name]/issues/new].
55 | 
56 | ### <a name="submit-pr"></a> Submitting a Pull Request (PR)
57 | Before you submit your Pull Request (PR) consider the following guidelines:
58 | 
59 | * Search the repository (https://github.com/[organization-name]/[repository-name]/pulls) for an open or closed PR
60 |   that relates to your submission. You don't want to duplicate effort.
61 | 
62 | * Make your changes in a new git fork:
63 | 
64 | * Commit your changes using a descriptive commit message
65 | * Push your fork to GitHub:
66 | * In GitHub, create a pull request
67 | * If we suggest changes then:
68 |   * Make the required updates.
69 |   * Rebase your fork and force push to your GitHub repository (this will update your Pull Request):
70 | 
71 |     ```shell
72 |     git rebase master -i
73 |     git push -f
74 |     ```
75 | 
76 | That's it! Thank you for your contribution!
77 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 |     MIT License
 2 | 
 3 |     Copyright (c) Microsoft Corporation.
 4 | 
 5 |     Permission is hereby granted, free of charge, to any person obtaining a copy
 6 |     of this software and associated documentation files (the "Software"), to deal
 7 |     in the Software without restriction, including without limitation the rights
 8 |     to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 |     copies of the Software, and to permit persons to whom the Software is
10 |     furnished to do so, subject to the following conditions:
11 | 
12 |     The above copyright notice and this permission notice shall be included in all
13 |     copies or substantial portions of the Software.
14 | 
15 |     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 |     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 |     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 |     AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 |     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 |     OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 |     SOFTWARE


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # C# utility code samples for Azure AI Search
 2 | 
 3 | This repository contains C# code samples that help you perform specific tasks, such as checking storage or exporting content from an index. 
 4 | 
 5 | ## In this repository
 6 | 
 7 | | Sample | Description |
 8 | |--------|-------------|
 9 | | check-storage-usage | Checks storage usage of an Azure AI Search service on a schedule. You can modify this sample to [adjust the service's capacity](https://docs.microsoft.com/azure/search/search-capacity-planning) or send an alert when the storage usage exceeds a predefined threshold. |
10 | | data-lake-gen2-acl-indexing | Proof-of-concept console app that demonstrates how to index a subset of your Azure Data Lake Gen2 data by using access control lists to allow certain files and directories to be accessed by an indexer in Azure AI Search. The indexer connection to Azure Data Lake Gen2 uses a managed identity and role assignments for selective data access. The sample loads data and sets up permissions programmatically, and then runs the indexer to create and load a search index. |
11 | | export-data | A console application that exports data from an Azure AI Search service. |
12 | | index-backup-restore | A console app that backs up an index (schema and documents) to your local computer and then uses the stored backup to recreate the index in a target search service that you specify.|
13 | | search-aggregations | Proof-of-concept console app that demonstrates how aggregations can be computed from the random data, and how the data can be filtered using a query. |
14 | 
15 | ## More resources
16 | 
17 | + See [.NET samples in Azure AI Search](https://learn.microsoft.com/azure/search/samples-dotnet) for a comprehensive list of all Azure AI Search code samples that run on .NET.
18 | 
19 | + See [Azure AI Search documentation](https://learn.microsoft.com/azure/search) for product documentation.
20 | 


--------------------------------------------------------------------------------
/check-storage-usage/.gitignore:
--------------------------------------------------------------------------------
  1 | ## Ignore Visual Studio temporary files, build results, and
  2 | ## files generated by popular Visual Studio add-ons.
  3 | 
  4 | # User-specific files
  5 | *.suo
  6 | *.user
  7 | *.userosscache
  8 | *.sln.docstates
  9 | 
 10 | # User-specific files (MonoDevelop/Xamarin Studio)
 11 | *.userprefs
 12 | 
 13 | # Build results
 14 | [Dd]ebug/
 15 | [Dd]ebugPublic/
 16 | [Rr]elease/
 17 | [Rr]eleases/
 18 | x64/
 19 | x86/
 20 | bld/
 21 | [Bb]in/
 22 | [Oo]bj/
 23 | [Ll]og/
 24 | 
 25 | # Visual Studio 2015 cache/options directory
 26 | .vs/
 27 | # Uncomment if you have tasks that create the project's static files in wwwroot
 28 | #wwwroot/
 29 | 
 30 | # MSTest test Results
 31 | [Tt]est[Rr]esult*/
 32 | [Bb]uild[Ll]og.*
 33 | 
 34 | # NUNIT
 35 | *.VisualState.xml
 36 | TestResult.xml
 37 | 
 38 | # Build Results of an ATL Project
 39 | [Dd]ebugPS/
 40 | [Rr]eleasePS/
 41 | dlldata.c
 42 | 
 43 | # DNX
 44 | project.lock.json
 45 | project.fragment.lock.json
 46 | artifacts/
 47 | 
 48 | *_i.c
 49 | *_p.c
 50 | *_i.h
 51 | *.ilk
 52 | *.meta
 53 | *.obj
 54 | *.pch
 55 | *.pdb
 56 | *.pgc
 57 | *.pgd
 58 | *.rsp
 59 | *.sbr
 60 | *.tlb
 61 | *.tli
 62 | *.tlh
 63 | *.tmp
 64 | *.tmp_proj
 65 | *.log
 66 | *.vspscc
 67 | *.vssscc
 68 | .builds
 69 | *.pidb
 70 | *.svclog
 71 | *.scc
 72 | 
 73 | # Chutzpah Test files
 74 | _Chutzpah*
 75 | 
 76 | # Visual C++ cache files
 77 | ipch/
 78 | *.aps
 79 | *.ncb
 80 | *.opendb
 81 | *.opensdf
 82 | *.sdf
 83 | *.cachefile
 84 | *.VC.db
 85 | *.VC.VC.opendb
 86 | 
 87 | # Visual Studio profiler
 88 | *.psess
 89 | *.vsp
 90 | *.vspx
 91 | *.sap
 92 | 
 93 | # TFS 2012 Local Workspace
 94 | $tf/
 95 | 
 96 | # Guidance Automation Toolkit
 97 | *.gpState
 98 | 
 99 | # ReSharper is a .NET coding add-in
100 | _ReSharper*/
101 | *.[Rr]e[Ss]harper
102 | *.DotSettings.user
103 | 
104 | # JustCode is a .NET coding add-in
105 | .JustCode
106 | 
107 | # TeamCity is a build add-in
108 | _TeamCity*
109 | 
110 | # DotCover is a Code Coverage Tool
111 | *.dotCover
112 | 
113 | # NCrunch
114 | _NCrunch_*
115 | .*crunch*.local.xml
116 | nCrunchTemp_*
117 | 
118 | # MightyMoose
119 | *.mm.*
120 | AutoTest.Net/
121 | 
122 | # Web workbench (sass)
123 | .sass-cache/
124 | 
125 | # Installshield output folder
126 | [Ee]xpress/
127 | 
128 | # DocProject is a documentation generator add-in
129 | DocProject/buildhelp/
130 | DocProject/Help/*.HxT
131 | DocProject/Help/*.HxC
132 | DocProject/Help/*.hhc
133 | DocProject/Help/*.hhk
134 | DocProject/Help/*.hhp
135 | DocProject/Help/Html2
136 | DocProject/Help/html
137 | 
138 | # Click-Once directory
139 | publish/
140 | 
141 | # Publish Web Output
142 | *.[Pp]ublish.xml
143 | *.azurePubxml
144 | # TODO: Comment the next line if you want to checkin your web deploy settings
145 | # but database connection strings (with potential passwords) will be unencrypted
146 | #*.pubxml
147 | *.publishproj
148 | 
149 | # Microsoft Azure Web App publish settings. Comment the next line if you want to
150 | # checkin your Azure Web App publish settings, but sensitive information contained
151 | # in these scripts will be unencrypted
152 | PublishScripts/
153 | 
154 | # NuGet Packages
155 | *.nupkg
156 | # The packages folder can be ignored because of Package Restore
157 | **/packages/*
158 | # except build/, which is used as an MSBuild target.
159 | !**/packages/build/
160 | # Uncomment if necessary however generally it will be regenerated when needed
161 | #!**/packages/repositories.config
162 | # NuGet v3's project.json files produces more ignoreable files
163 | *.nuget.props
164 | *.nuget.targets
165 | 
166 | # Microsoft Azure Build Output
167 | csx/
168 | *.build.csdef
169 | 
170 | # Microsoft Azure Emulator
171 | ecf/
172 | rcf/
173 | 
174 | # Windows Store app package directories and files
175 | AppPackages/
176 | BundleArtifacts/
177 | Package.StoreAssociation.xml
178 | _pkginfo.txt
179 | 
180 | # Visual Studio cache files
181 | # files ending in .cache can be ignored
182 | *.[Cc]ache
183 | # but keep track of directories ending in .cache
184 | !*.[Cc]ache/
185 | 
186 | # Others
187 | ClientBin/
188 | ~$*
189 | *~
190 | *.dbmdl
191 | *.dbproj.schemaview
192 | *.jfm
193 | *.pfx
194 | *.publishsettings
195 | node_modules/
196 | orleans.codegen.cs
197 | 
198 | # Since there are multiple workflows, uncomment next line to ignore bower_components
199 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
200 | #bower_components/
201 | 
202 | # RIA/Silverlight projects
203 | Generated_Code/
204 | 
205 | # Backup & report files from converting an old project file
206 | # to a newer Visual Studio version. Backup files are not needed,
207 | # because we have git ;-)
208 | _UpgradeReport_Files/
209 | Backup*/
210 | UpgradeLog*.XML
211 | UpgradeLog*.htm
212 | 
213 | # SQL Server files
214 | *.mdf
215 | *.ldf
216 | 
217 | # Business Intelligence projects
218 | *.rdl.data
219 | *.bim.layout
220 | *.bim_*.settings
221 | 
222 | # Microsoft Fakes
223 | FakesAssemblies/
224 | 
225 | # GhostDoc plugin setting file
226 | *.GhostDoc.xml
227 | 
228 | # Node.js Tools for Visual Studio
229 | .ntvs_analysis.dat
230 | 
231 | # Visual Studio 6 build log
232 | *.plg
233 | 
234 | # Visual Studio 6 workspace options file
235 | *.opt
236 | 
237 | # Visual Studio LightSwitch build output
238 | **/*.HTMLClient/GeneratedArtifacts
239 | **/*.DesktopClient/GeneratedArtifacts
240 | **/*.DesktopClient/ModelManifest.xml
241 | **/*.Server/GeneratedArtifacts
242 | **/*.Server/ModelManifest.xml
243 | _Pvt_Extensions
244 | 
245 | # Paket dependency manager
246 | .paket/paket.exe
247 | paket-files/
248 | 
249 | # FAKE - F# Make
250 | .fake/
251 | 
252 | # JetBrains Rider
253 | .idea/
254 | *.sln.iml
255 | 
256 | # CodeRush
257 | .cr/
258 | 
259 | # Python Tools for Visual Studio (PTVS)
260 | __pycache__/
261 | *.pyc


--------------------------------------------------------------------------------
/check-storage-usage/CheckStorageUsage.cs:
--------------------------------------------------------------------------------
 1 | using System;
 2 | using System.Collections.Generic;
 3 | using System.Threading.Tasks;
 4 | using Azure;
 5 | using Azure.Communication.Email;
 6 | using Azure.Communication.Email.Models;
 7 | using Azure.Search.Documents.Indexes;
 8 | using Azure.Search.Documents.Indexes.Models;
 9 | using Microsoft.Azure.WebJobs;
10 | using Microsoft.Extensions.Logging;
11 | 
12 | namespace check_storage_usage
13 | {
14 |     public class CheckStorageUsage
15 |     {
16 |         // Run on a timer every 30 minutes
17 |         // https://docs.microsoft.com/azure/azure-functions/functions-bindings-timer
18 |         [FunctionName("CheckStorageUsage")]
19 |         public async Task Run([TimerTrigger("0 */30 * * * *")]TimerInfo timer, ILogger log)
20 |         {
21 |             string serviceName = Environment.GetEnvironmentVariable("ServiceName");
22 |             log.LogInformation($"Checking search storage usage for {serviceName}: {DateTime.Now}");
23 | 
24 |             string serviceAdminApiKey = Environment.GetEnvironmentVariable("ServiceAdminApiKey");
25 |             // Storage used percentage threshold is a number between 0 and 1 representing how much storage should be
26 |             // used before alerting
27 |             // Example: 0.8 = 80%
28 |             float storageUsedPercentThreshold = float.Parse(Environment.GetEnvironmentVariable("StorageUsedPercentThreshold"));
29 | 
30 |             var searchIndexClient = new SearchIndexClient(new Uri($"https://{serviceName}.search.windows.net"), new AzureKeyCredential(serviceAdminApiKey));
31 |             SearchServiceStatistics statistics = await searchIndexClient.GetServiceStatisticsAsync();
32 |             float storagedUsedPercent = (float)statistics.Counters.StorageSizeCounter.Usage / (float)statistics.Counters.StorageSizeCounter.Quota;
33 | 
34 |             if (storagedUsedPercent > storageUsedPercentThreshold)
35 |             {
36 |                 string connectionString = Environment.GetEnvironmentVariable("CommunicationServicesConnectionString");
37 |                 var emailClient = new EmailClient(connectionString);
38 | 
39 |                 string subject = string.Format("Low storage space on search service {0}", serviceName);
40 |                 string body = string.Format("Search service {0} is using {1:P2} of its storage which exceeds the alerting threshold of {2:P2}", serviceName, storagedUsedPercent, storageUsedPercentThreshold);
41 |                 EmailContent emailContent = new EmailContent(subject);
42 |                 emailContent.PlainText = body;
43 |                 string toEmailAddress = Environment.GetEnvironmentVariable("ToEmailAddress");
44 |                 string fromEmailAddress = Environment.GetEnvironmentVariable("FromEmailAddress");
45 |                 List<EmailAddress> emailAddresses = new List<EmailAddress> { new EmailAddress(toEmailAddress) };
46 |                 EmailRecipients emailRecipients = new EmailRecipients(emailAddresses);
47 |                 EmailMessage emailMessage = new EmailMessage(fromEmailAddress, emailContent, emailRecipients);
48 |                 Response<SendEmailResult> response = emailClient.Send(emailMessage);
49 |                 log.LogInformation("Sent email about low storage, status code {0}", response.GetRawResponse().Status);
50 |             }
51 |         }
52 |     }
53 | }
54 | 


--------------------------------------------------------------------------------
/check-storage-usage/Properties/serviceDependencies.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dependencies": {
 3 |     "appInsights1": {
 4 |       "type": "appInsights"
 5 |     },
 6 |     "storage1": {
 7 |       "type": "storage",
 8 |       "connectionId": "AzureWebJobsStorage"
 9 |     }
10 |   }
11 | }


--------------------------------------------------------------------------------
/check-storage-usage/Properties/serviceDependencies.local.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dependencies": {
 3 |     "appInsights1": {
 4 |       "type": "appInsights.sdk"
 5 |     },
 6 |     "storage1": {
 7 |       "type": "storage.emulator",
 8 |       "connectionId": "AzureWebJobsStorage"
 9 |     }
10 |   }
11 | }


--------------------------------------------------------------------------------
/check-storage-usage/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | page_type: sample
 3 | languages:
 4 |   - csharp
 5 | name: Check storage usage of Azure AI Search
 6 | description: "Demonstrates checking storage usage of an Azure AI Search service. This example builds a C# Function App using the Azure AI Search .NET SDK."
 7 | products:
 8 |   - azure
 9 |   - azure-cognitive-search
10 |   - azure-functions
11 | urlFragment: check-storage-usage
12 | ---
13 | 
14 | # Check Azure AI Search service storage usage
15 | 
16 | ![Flask sample MIT license badge](https://img.shields.io/badge/license-MIT-green.svg)
17 | 
18 | Demonstrates checking storage usage of an Azure AI Search service on a schedule. This sample may be modified to [adjust the service's capacity](https://docs.microsoft.com/azure/search/search-capacity-planning) or send an alert when the storage usage exceeds a predefined threshold.
19 | 
20 | This .NET Core application runs as an [Azure Function](https://docs.microsoft.com/azure/azure-functions/functions-overview). The program [is deployed to Azure](https://docs.microsoft.com/azure/azure-functions/functions-create-your-first-function-visual-studio?tabs=in-process) using [Visual Studio](https://visualstudio.microsoft.com/downloads/) and [runs automatically on a predefined schedule](https://docs.microsoft.com/azure/azure-functions/functions-create-scheduled-function).
21 | 
22 | ## Prerequisites
23 | 
24 | - [Visual Studio](https://visualstudio.microsoft.com/downloads/)
25 | - [Azure AI Search service](https://docs.microsoft.com/azure/search/search-create-service-portal)
26 | - [Azure Functions](https://docs.microsoft.com/azure/azure-functions/functions-overview)
27 | - [Azure Communication Services](https://docs.microsoft.com/azure/communication-services/overview)
28 | 
29 | ## Setup
30 | 
31 | 1. Configure a [Communication Services](https://docs.microsoft.com/azure/communication-services/quickstarts/create-communication-resource) resource [to send email](https://docs.microsoft.com/azure/communication-services/quickstarts/email/create-email-communication-resource).
32 | 
33 | 1. Clone or download this sample repository.
34 | 
35 | 1. Extract contents if the download is a zip file. Make sure the files are read-write.
36 | 
37 | ## Run the sample
38 | 
39 | 1. Run the function locally [using Visual Studio](https://docs.microsoft.com/azure/azure-functions/functions-develop-local)
40 | 
41 | 1. Deploy the sample to Azure [using Visual Studio](https://docs.microsoft.com/azure/azure-functions/functions-create-your-first-function-visual-studio?tabs=in-process#publish-the-project-to-azure).
42 | 
43 | 1. Navigate to the deployed Function App in the Azure portal.
44 | 
45 | 1. [Update the application settings of the Function App](https://docs.microsoft.com/azure/azure-functions/functions-how-to-use-azure-function-app-settings?tabs=portal). In the Azure portal, navigate to **Configuration** section under **Settings**. Add the following **Application Settings**:
46 | 
47 |    + `ServiceName` is the name of your search service.
48 |    + `ServiceAdminKey` is the [Admin API Key to access your search service](https://docs.microsoft.com/azure/search/search-security-api-keys#find-existing-keys).
49 |    + `StorageUsedPercentThreshold` is the threshold used for determining if a search service is using too much storage. This should be a decimal number between 0 and 1 which translates to a percentage of used storage. For example, 0.8 is 80% of used storage.
50 |    + `CommunicationServicesConnectionString` is a connection string for your [Communication Services resource](https://docs.microsoft.com/azure/communication-services/concepts/authentication#access-key).
51 |    + `ToEmailAddress` is the email address that will be notified of low storage in the search service.
52 |    + `FromEmailAddress` is the email address that the notification email will be sent from. It must be in the [domain associated with your Communication Services email resource](https://docs.microsoft.com/azure/communication-services/concepts/email/email-domain-and-sender-authentication)
53 | 
54 | ## Verify results
55 | 
56 | [An email is sent](https://docs.microsoft.com/azure/communication-services/quickstarts/email/send-email) to the provided email address that the search service has low storage available.
57 | 
58 | ## Next steps
59 | 
60 | You can learn more about Azure AI Search on the [official documentation site](https://docs.microsoft.com/azure/search).
61 | 


--------------------------------------------------------------------------------
/check-storage-usage/check-storage-usage.csproj:
--------------------------------------------------------------------------------
 1 | <Project Sdk="Microsoft.NET.Sdk">
 2 |   <PropertyGroup>
 3 |     <TargetFramework>net6.0</TargetFramework>
 4 |     <AzureFunctionsVersion>v4</AzureFunctionsVersion>
 5 |     <RootNamespace>check_storage_usage</RootNamespace>
 6 |   </PropertyGroup>
 7 |   <ItemGroup>
 8 |     <PackageReference Include="Azure.Communication.Email" Version="1.0.0-beta.1" />
 9 |     <PackageReference Include="Azure.Search.Documents" Version="11.3.0" />
10 |     <PackageReference Include="Microsoft.Azure.Functions.Extensions" Version="1.1.0" />
11 |     <PackageReference Include="Microsoft.NET.Sdk.Functions" Version="4.0.1" />
12 |   </ItemGroup>
13 |   <ItemGroup>
14 |     <None Update="host.json">
15 |       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
16 |     </None>
17 |     <None Update="local.settings.json">
18 |       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
19 |       <CopyToPublishDirectory>Never</CopyToPublishDirectory>
20 |     </None>
21 |   </ItemGroup>
22 | </Project>
23 | 


--------------------------------------------------------------------------------
/check-storage-usage/check-storage-usage.sln:
--------------------------------------------------------------------------------
 1 | ﻿
 2 | Microsoft Visual Studio Solution File, Format Version 12.00
 3 | # Visual Studio Version 17
 4 | VisualStudioVersion = 17.3.32804.467
 5 | MinimumVisualStudioVersion = 10.0.40219.1
 6 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "check-storage-usage", "check-storage-usage.csproj", "{6FB7258C-3EC6-4EA4-8E75-3D1189D9351C}"
 7 | EndProject
 8 | Global
 9 | 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
10 | 		Debug|Any CPU = Debug|Any CPU
11 | 		Release|Any CPU = Release|Any CPU
12 | 	EndGlobalSection
13 | 	GlobalSection(ProjectConfigurationPlatforms) = postSolution
14 | 		{6FB7258C-3EC6-4EA4-8E75-3D1189D9351C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
15 | 		{6FB7258C-3EC6-4EA4-8E75-3D1189D9351C}.Debug|Any CPU.Build.0 = Debug|Any CPU
16 | 		{6FB7258C-3EC6-4EA4-8E75-3D1189D9351C}.Release|Any CPU.ActiveCfg = Release|Any CPU
17 | 		{6FB7258C-3EC6-4EA4-8E75-3D1189D9351C}.Release|Any CPU.Build.0 = Release|Any CPU
18 | 	EndGlobalSection
19 | 	GlobalSection(SolutionProperties) = preSolution
20 | 		HideSolutionNode = FALSE
21 | 	EndGlobalSection
22 | 	GlobalSection(ExtensibilityGlobals) = postSolution
23 | 		SolutionGuid = {1204B124-F064-4D4D-9DA5-3BAC2EC09490}
24 | 	EndGlobalSection
25 | EndGlobal
26 | 


--------------------------------------------------------------------------------
/check-storage-usage/host.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "version": "2.0",
 3 |     "logging": {
 4 |         "applicationInsights": {
 5 |             "samplingSettings": {
 6 |                 "isEnabled": true,
 7 |                 "excludedTypes": "Request"
 8 |             }
 9 |         }
10 |     }
11 | }


--------------------------------------------------------------------------------
/check-storage-usage/local.settings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "IsEncrypted": false,
 3 |     "Values": {
 4 |         "AzureWebJobsStorage": "UseDevelopmentStorage=true",
 5 |         "FUNCTIONS_WORKER_RUNTIME": "dotnet",
 6 |         "ServiceName": "<search service name>",
 7 |         "ServiceAdminApiKey": "<search service admin api key>",
 8 |         "StorageUsedPercentThreshold":  0.8,
 9 |         "CommunicationServicesConnectionString": "<connection string>",
10 |         "ToEmailAddress": "a@example.org",
11 |         "FromEmailAddress": "donotreply@<azure communication services email domain>"
12 |     }
13 | }


--------------------------------------------------------------------------------
/data-lake-gen2-acl-indexing/DataLakeGen2ACLIndexing.csproj:
--------------------------------------------------------------------------------
 1 | <Project Sdk="Microsoft.NET.Sdk">
 2 | 
 3 |   <PropertyGroup>
 4 |     <OutputType>Exe</OutputType>
 5 |     <TargetFramework>net5.0</TargetFramework>
 6 |     <RootNamespace>DataLakeGen2ACLIndexing</RootNamespace>
 7 |   </PropertyGroup>
 8 | 
 9 |   <ItemGroup>
10 |     <PackageReference Include="Azure.Search.Documents" Version="11.2.0" />
11 |     <PackageReference Include="Azure.Storage.Files.DataLake" Version="12.7.0" />
12 |     <PackageReference Include="Azure.Identity" Version="1.11.4" />
13 |     <PackageReference Include="Microsoft.Extensions.Configuration.Json" Version="5.0.0" />
14 |   </ItemGroup>
15 | 
16 |   <ItemGroup>
17 |     <Content Include="SampleData\**\*.*">
18 |       <CopyToOutputDirectory>Always</CopyToOutputDirectory>
19 |     </Content>
20 |     <Content Include="appsettings.json">
21 |       <CopyToOutputDirectory>Always</CopyToOutputDirectory>
22 |     </Content>
23 |   </ItemGroup>
24 | 
25 | </Project>
26 | 


--------------------------------------------------------------------------------
/data-lake-gen2-acl-indexing/Program.cs:
--------------------------------------------------------------------------------
  1 | ﻿using System;
  2 | using System.Collections.Generic;
  3 | using System.IO;
  4 | using System.Linq;
  5 | using System.Threading.Tasks;
  6 | using Azure;
  7 | using Azure.Identity;
  8 | using Azure.Search.Documents.Indexes;
  9 | using Azure.Search.Documents.Indexes.Models;
 10 | using Azure.Storage.Files.DataLake;
 11 | using Azure.Storage.Files.DataLake.Models;
 12 | using Microsoft.Extensions.Configuration;
 13 | 
 14 | 
 15 | namespace DataLakeGen2ACLIndexing
 16 | {
 17 |     class Program
 18 |     {
 19 |         // Name of Container / ADLS Gen2 filesystem for sample data
 20 |         const string DATA_LAKE_FILESYSTEM_NAME = "acldemo";
 21 |         // Directory sample data is stored in locally
 22 |         const string SAMPLE_DATA_DIRECTORY = "SampleData";
 23 |         // Search index name for data indexed from ADLS Gen2
 24 |         const string SEARCH_ACL_INDEX_NAME = "acltestindex";
 25 |         // Search data source name for connection to ADLS Gen2
 26 |         const string SEARCH_ACL_DATASOURCE_NAME = "acltestdatasource";
 27 |         // Search indexer name for connection to ADLS Gen2
 28 |         const string SEARCH_ACL_INDEXER_NAME = "acltestindexer";
 29 | 
 30 |         async static Task Main(string[] args)
 31 |         {
 32 |             // Read settings from appsettings.json
 33 |             IConfigurationRoot configuration = new ConfigurationBuilder()
 34 |                 .AddJsonFile("appsettings.json", optional: true)
 35 |                 .Build();
 36 |             var settings = new AppSettings
 37 |             {
 38 |                 SearchManagedIdentityID = configuration["searchManagedIdentityID"],
 39 |                 SearchAdminKey = configuration["searchAdminKey"],
 40 |                 SearchEndpoint = configuration["searchEndpoint"],
 41 |                 DataLakeResourceID = configuration["dataLakeResourceID"],
 42 |                 DataLakeEndpoint = configuration["dataLakeEndpoint"]
 43 |             };
 44 | 
 45 |             // Login to Azure using the default credentials on your local machine
 46 |             var credential = new DefaultAzureCredential();
 47 |             var dfsClient = new DataLakeServiceClient(new Uri(settings.DataLakeEndpoint), credential);
 48 | 
 49 |             var fileSystemClient = dfsClient.GetFileSystemClient(DATA_LAKE_FILESYSTEM_NAME);
 50 |             Console.WriteLine("Create {0} if not exists...", DATA_LAKE_FILESYSTEM_NAME);
 51 |             await fileSystemClient.CreateIfNotExistsAsync();
 52 | 
 53 |             var rootDirectoryClient = fileSystemClient.GetDirectoryClient(String.Empty);
 54 |             Console.WriteLine("Uploading sample data if not exists...");
 55 |             await UploadSampleDataIfNotExistsAsync(SAMPLE_DATA_DIRECTORY, rootDirectoryClient);
 56 | 
 57 |             Console.WriteLine("Applying ACLs to sample data...");
 58 |             await ApplyACLsToSampleData(rootDirectoryClient, settings);
 59 |  
 60 |             Console.WriteLine("Creating search index, data source, and indexer...");
 61 |             await CreateSearchResources(settings);
 62 | 
 63 |             Console.WriteLine("Polling for search indexer completion...");
 64 |             await PollSearchIndexer(settings);
 65 |         }
 66 | 
 67 |         static async Task UploadSampleDataIfNotExistsAsync(string localDirectory, DataLakeDirectoryClient directoryClient)
 68 |         {
 69 |             // Upload all sample data files in this directory
 70 |             foreach (string filePath in Directory.GetFiles(localDirectory))
 71 |             {
 72 |                 string fileName = Path.GetFileName(filePath);
 73 |                 DataLakeFileClient fileClient = directoryClient.GetFileClient(fileName);
 74 |                 if (!await fileClient.ExistsAsync())
 75 |                 {
 76 |                     await fileClient.UploadAsync(filePath);
 77 |                 }
 78 |             }
 79 | 
 80 |             // Recursively create subdirectories, and upload all sample data files in those subdirectories
 81 |             foreach (string directory in Directory.GetDirectories(localDirectory))
 82 |             {
 83 |                 string directoryName = Path.GetFileNameWithoutExtension(directory);
 84 |                 DataLakeDirectoryClient subDirectoryClient = directoryClient.GetSubDirectoryClient(directoryName);
 85 |                 await subDirectoryClient.CreateIfNotExistsAsync();
 86 |                 await UploadSampleDataIfNotExistsAsync(directory, subDirectoryClient);
 87 |             }
 88 |         }
 89 | 
 90 |         static async Task ApplyACLsToSampleData(DataLakeDirectoryClient rootDirectoryClient, AppSettings settings)
 91 |         {
 92 |             Console.WriteLine("Applying Execute and Read ACLs to root directory...");
 93 |             await ApplyACLsForDirectory(rootDirectoryClient, RolePermissions.Execute | RolePermissions.Read, settings);
 94 | 
 95 |             Console.WriteLine(@"Applying Execute and Read ACLs to root ""Files For Organization""...");
 96 |             var filesForOrganizationClient = rootDirectoryClient.GetFileClient("Files for Organization.txt");
 97 |             await ApplyACLsForFile(filesForOrganizationClient, RolePermissions.Execute | RolePermissions.Read, settings);
 98 | 
 99 |             Console.WriteLine("Applying Execute And Read ACLs to Shared Documents directory recursively...");
100 |             var sharedDocumentsDirectoryClient = rootDirectoryClient.GetSubDirectoryClient("Shared Documents");
101 |             await ApplyACLsForDirectory(sharedDocumentsDirectoryClient, RolePermissions.Execute | RolePermissions.Read,settings, recursive: true);
102 | 
103 |             Console.WriteLine("Applying Execute and Read ACLs to User Documents directory...");
104 |             var userDocumentsDirectoryClient = rootDirectoryClient.GetSubDirectoryClient("User Documents");
105 |             await ApplyACLsForDirectory(userDocumentsDirectoryClient, RolePermissions.Execute | RolePermissions.Read, settings);
106 | 
107 |             Console.WriteLine("Applying Execute and Read ACLs to Alice's document directory...");
108 |             var aliceDirectoryClient = userDocumentsDirectoryClient.GetSubDirectoryClient("Alice");
109 |             await ApplyACLsForDirectory(aliceDirectoryClient, RolePermissions.Execute | RolePermissions.Read, settings);
110 | 
111 |             Console.WriteLine(@"Applying Execute and Read ACLs to ""Alice.txt""...");
112 |             var aliceTxtFile = aliceDirectoryClient.GetFileClient("alice.txt");
113 |             await ApplyACLsForFile(aliceTxtFile, RolePermissions.Execute | RolePermissions.Read, settings);
114 | 
115 |             Console.WriteLine("Applying Execute and Read ACLs to John's document directory recursively...");
116 |             var johnDirectoryClient = userDocumentsDirectoryClient.GetSubDirectoryClient("John");
117 |             await ApplyACLsForDirectory(johnDirectoryClient, RolePermissions.Execute | RolePermissions.Read, settings, recursive: true);
118 | 
119 |             Console.WriteLine("Applying Execute and Read ACLs to Bob's document directory recursively...");
120 |             var bobDirectoryClient = userDocumentsDirectoryClient.GetSubDirectoryClient("Bob");
121 |             await ApplyACLsForDirectory(bobDirectoryClient, RolePermissions.Execute | RolePermissions.Read, settings, recursive: true);
122 | 
123 |             Console.WriteLine(@"Removing Execute and Read ACLs from ""c.txt""");
124 |             var cClient = bobDirectoryClient.GetSubDirectoryClient("Reports").GetFileClient("c.txt");
125 |             await RemoveACLsForFile(cClient, settings);
126 | 
127 |             Console.WriteLine(@"Removing Execute and Read ACLs from Bob's Sales directory recursively...");
128 |             var salesClient = bobDirectoryClient.GetSubDirectoryClient("Sales");
129 |             await RemoveACLsForDirectory(salesClient, settings, recursive: true);
130 |         }
131 | 
132 |         // If recursive is false, apply ACLs to a directory. None of the sub-directory or sub-path ACLs are updated
133 |         // If recursive is true, apply ACLs to the directory and all sub-directories and sub-paths
134 |         // When applying ACL recursively, the ACLs on all sub-directories and sub-paths are replaced with this directory's ACL
135 |         static async Task ApplyACLsForDirectory(DataLakeDirectoryClient directoryClient, RolePermissions newACLs, AppSettings settings, bool recursive = false)
136 |         {
137 |             PathAccessControl directoryAccessControl =
138 |                 await directoryClient.GetAccessControlAsync();
139 | 
140 |             List<PathAccessControlItem> accessControlList = UpdateACLs(directoryAccessControl.AccessControlList, newACLs, settings);
141 | 
142 |             if (recursive)
143 |             {
144 |                 await directoryClient.SetAccessControlRecursiveAsync(accessControlList);
145 |             }
146 |             else
147 |             {
148 |                 await directoryClient.SetAccessControlListAsync(accessControlList);
149 |             }
150 |         }
151 | 
152 |         // If recursive is false, remove the ACL from a directory. None of the sub-directory or sub-path ACLs are updated
153 |         // If recursive is true, remove ACLs from the directory and all sub-directories and sub-paths
154 |         // When removing ACLs recursively, the ACLs on all sub-directories and sub-paths are replaced with this directory's ACL
155 |         static async Task RemoveACLsForDirectory(DataLakeDirectoryClient directoryClient, AppSettings settings, bool recursive = false)
156 |         {
157 |             PathAccessControl directoryAccessControl =
158 |                 await directoryClient.GetAccessControlAsync();
159 | 
160 |             List<PathAccessControlItem> accessControlList = RemoveACLs(directoryAccessControl.AccessControlList, settings);
161 | 
162 |             if (recursive)
163 |             {
164 |                 await directoryClient.SetAccessControlRecursiveAsync(accessControlList);
165 |             }
166 |             else
167 |             {
168 |                 await directoryClient.SetAccessControlListAsync(accessControlList);
169 |             }
170 |         }
171 | 
172 |         static async Task ApplyACLsForFile(DataLakeFileClient fileClient, RolePermissions newACLs, AppSettings settings)
173 |         {
174 |             PathAccessControl fileAccessControl =
175 |                 await fileClient.GetAccessControlAsync();
176 | 
177 |             List<PathAccessControlItem> accessControlList = UpdateACLs(fileAccessControl.AccessControlList, newACLs, settings);
178 | 
179 |             await fileClient.SetAccessControlListAsync(accessControlList);
180 |         }
181 | 
182 |         static async Task RemoveACLsForFile(DataLakeFileClient fileClient, AppSettings settings)
183 |         {
184 |             PathAccessControl fileAccessControl =
185 |                 await fileClient.GetAccessControlAsync();
186 | 
187 |             List<PathAccessControlItem> accessControlList = RemoveACLs(fileAccessControl.AccessControlList, settings);
188 | 
189 |             await fileClient.SetAccessControlListAsync(accessControlList);
190 |         }
191 | 
192 |         static List<PathAccessControlItem> UpdateACLs(IEnumerable<PathAccessControlItem> existingACLs, RolePermissions newPermissionsForManagedIdentity, AppSettings settings)
193 |         {
194 |             // Either add an ACL for the search identity if it doesn't exist,
195 |             // or update it if it exists
196 |             // To learn more please visit https://docs.microsoft.com/azure/storage/blobs/data-lake-storage-acl-dotnet#update-acls
197 |             List<PathAccessControlItem> accessControlList = existingACLs.ToList();
198 |             PathAccessControlItem managedIdentityAcl = accessControlList.FirstOrDefault(
199 |                 accessControlItem => accessControlItem.AccessControlType == AccessControlType.User && accessControlItem.EntityId == settings.SearchManagedIdentityID);
200 |             if (managedIdentityAcl == null)
201 |             {
202 |                 managedIdentityAcl = new PathAccessControlItem(
203 |                     accessControlType: AccessControlType.User,
204 |                     permissions: RolePermissions.Execute | RolePermissions.Read,
205 |                     entityId: settings.SearchManagedIdentityID);
206 |                 accessControlList.Add(managedIdentityAcl);
207 |             }
208 |             else
209 |             {
210 |                 managedIdentityAcl.Permissions = RolePermissions.Execute | RolePermissions.Read;
211 |             }
212 | 
213 |             return accessControlList;
214 |         }
215 | 
216 |         static List<PathAccessControlItem> RemoveACLs(IEnumerable<PathAccessControlItem> existingACLs, AppSettings settings)
217 |         {
218 |             // Remove the ACL for the search identity if exists
219 |             // To learn more please visit https://docs.microsoft.com/azure/storage/blobs/data-lake-storage-acl-dotnet#remove-acl-entries
220 |             List<PathAccessControlItem> accessControlList = existingACLs.ToList();
221 |             accessControlList.RemoveAll(
222 |                 accessControlItem => accessControlItem.AccessControlType == AccessControlType.User && accessControlItem.EntityId == settings.SearchManagedIdentityID);
223 | 
224 |             return accessControlList;
225 |         }
226 | 
227 |         static async Task CreateSearchResources(AppSettings settings)
228 |         {
229 |             SearchIndexClient indexClient = new SearchIndexClient(settings.SearchEndpointUri, settings.SearchKeyCredential);
230 | 
231 |             Console.WriteLine("Deleting search index {0} if exists...", SEARCH_ACL_INDEX_NAME);
232 |             try
233 |             {
234 |                 await indexClient.GetIndexAsync(SEARCH_ACL_INDEX_NAME);
235 |                 await indexClient.DeleteIndexAsync(SEARCH_ACL_INDEX_NAME);
236 |             }
237 |             catch (RequestFailedException)
238 |             {
239 |                 // Index didn't exist - continue
240 |             }
241 |     
242 |             Console.WriteLine("Creating search index {0}...", SEARCH_ACL_INDEX_NAME);
243 |             await indexClient.CreateOrUpdateIndexAsync(
244 |                 new SearchIndex(SEARCH_ACL_INDEX_NAME, fields: new[]
245 |                 {
246 |                     new SearchField("key", SearchFieldDataType.String) { IsKey = true },
247 |                     new SearchField("metadata_storage_path", SearchFieldDataType.String),
248 |                     new SearchField("content", SearchFieldDataType.String)
249 |                 }));
250 | 
251 |             Console.WriteLine("Creating search data source {0}...", SEARCH_ACL_DATASOURCE_NAME);
252 |             SearchIndexerClient indexerClient = new SearchIndexerClient(settings.SearchEndpointUri, settings.SearchKeyCredential);
253 |             await indexerClient.CreateOrUpdateDataSourceConnectionAsync(
254 |                 new SearchIndexerDataSourceConnection(
255 |                     name: SEARCH_ACL_DATASOURCE_NAME,
256 |                     type: SearchIndexerDataSourceType.AzureBlob,
257 |                     connectionString: "ResourceId=" + settings.DataLakeResourceID,
258 |                     container: new SearchIndexerDataContainer(name: DATA_LAKE_FILESYSTEM_NAME)));
259 | 
260 |             Console.WriteLine("Deleting search indexer {0} if exists...", SEARCH_ACL_INDEXER_NAME);
261 |             try
262 |             {
263 |                 await indexerClient.GetIndexerAsync(SEARCH_ACL_INDEXER_NAME);
264 |                 await indexerClient.DeleteIndexerAsync(SEARCH_ACL_INDEXER_NAME);
265 |             }
266 |             catch (RequestFailedException)
267 |             {
268 |                 // Indexer didn't exist - continue
269 |             }
270 | 
271 |             Console.WriteLine("Creating search indexer {0}...", SEARCH_ACL_INDEXER_NAME);
272 |             await indexerClient.CreateIndexerAsync(
273 |                 new SearchIndexer(
274 |                     name: SEARCH_ACL_INDEXER_NAME,
275 |                     dataSourceName: SEARCH_ACL_DATASOURCE_NAME,
276 |                     targetIndexName: SEARCH_ACL_INDEX_NAME)
277 |                 {
278 |                     Parameters = new IndexingParameters
279 |                     {
280 |                         MaxFailedItems = -1,
281 |                         IndexingParametersConfiguration = new IndexingParametersConfiguration
282 |                         {
283 |                             ParsingMode = BlobIndexerParsingMode.Text
284 |                         }
285 |                     }
286 |                 });
287 |         }
288 | 
289 |         static async Task PollSearchIndexer(AppSettings settings)
290 |         {
291 |             await Task.Delay(TimeSpan.FromSeconds(5));
292 | 
293 |             SearchIndexerClient indexerClient = new SearchIndexerClient(settings.SearchEndpointUri, settings.SearchKeyCredential);
294 |             while (true)
295 |             {
296 |                 SearchIndexerStatus status = await indexerClient.GetIndexerStatusAsync(SEARCH_ACL_INDEXER_NAME);
297 |                 if (status.LastResult != null &&
298 |                     status.LastResult.Status != IndexerExecutionStatus.InProgress)
299 |                 {
300 |                     Console.WriteLine("Completed indexing sample data");
301 |                     break;
302 |                 }
303 | 
304 |                 Console.WriteLine("Indexing has not finished. Waiting 5 seconds and polling again...");
305 |                 await Task.Delay(TimeSpan.FromSeconds(5));
306 |             }
307 |         }
308 | 
309 |         class AppSettings
310 |         {
311 |             public string SearchManagedIdentityID { get; set; }
312 |             public string SearchAdminKey { get; set; }
313 |             public string SearchEndpoint { get; set; }
314 |             public string DataLakeEndpoint { get; set;}
315 |             public string DataLakeResourceID { get; set; }
316 | 
317 |             public Uri SearchEndpointUri => new Uri(SearchEndpoint);
318 |             public AzureKeyCredential SearchKeyCredential => new AzureKeyCredential(SearchAdminKey);
319 |         }
320 |     }
321 | }
322 | 


--------------------------------------------------------------------------------
/data-lake-gen2-acl-indexing/README.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | page_type: sample
  3 | languages:
  4 |   - csharp
  5 | name: Index Azure Data Lake Gen2 using a managed identity
  6 | description: "Index a subset of your Azure Data Lake Gen2 data by using access control lists to allow certain files and directories to be accessed by an indexer in Azure AI Search."
  7 | products:
  8 |   - azure
  9 |   - azure-cognitive-search
 10 | urlFragment: data-lake-gen2-acl-indexing
 11 | ---
 12 | 
 13 | # Index Data Lake Gen2 using Azure AD
 14 | 
 15 | This Azure AI Search sample shows you how to configure an indexer connection to Azure Data Lake Gen2 that uses a managed identity and role assignments for selective data access. The sample loads data and sets up permissions for data access, and then runs the indexer to create and load a search index.
 16 | 
 17 | Normally, when setting up [managed identity with Azure Blob Storage or Data Lake Storage](https://docs.microsoft.com/azure/search/search-howto-managed-identities-storage#2---add-a-role-assignment), the [Storage Blob Data Reader role](https://docs.microsoft.com/azure/role-based-access-control/built-in-roles#storage-blob-data-reader) is used. However, this role grants full access to all files in the storage account, which may be undesirable if you are using [Access Control Lists](https://docs.microsoft.com/azure/storage/blobs/data-lake-storage-access-control) for more selective access. This sample shows you how to constrain data access to specific files and users.
 18 | 
 19 | ## Prerequisites
 20 | 
 21 | + [.NET 3](https://dotnet.microsoft.com/download/dotnet/5.0)
 22 | + [Git](https://git-scm.com/downloads)
 23 | + [Azure AI Search service](https://docs.microsoft.com/azure/search/search-create-service-portal) on a billable tier (free tier is not supported)
 24 | + [Azure Storage](https://docs.microsoft.com/azure/storage/common/storage-account-create?tabs=azure-portal) with the "Enable hierarchical namespace" option
 25 | + Client app: [Visual Studio](https://visualstudio.microsoft.com/downloads/), PowerShell, or [Visual Studio Code](https://code.visualstudio.com/download) with the [Azure Tools](https://docs.microsoft.com/dotnet/azure/configure-vs-code#install-the-azure-tools-extension-pack) extension pack
 26 | 
 27 | ## Clone the search sample with git
 28 | 
 29 | At a terminal, download the sample application to your local computer.
 30 | 
 31 | ```bash
 32 | git clone https://github.com/Azure-Samples/azure-search-dotnet-samples
 33 | ```
 34 | 
 35 | ## Set up Azure resources
 36 | 
 37 | 1. [Sign in to the Azure portal](https://portal.azure.com). 
 38 | 
 39 | 1. [Create a resource group if one doesn't already exist](https://docs.microsoft.com/azure/azure-resource-manager/management/manage-resource-groups-portal#create-resource-groups).
 40 | 
 41 | 1. [Create an Azure AI Search service if one doesn't already exist](https://docs.microsoft.com/azure/search/search-create-service-portal), at [Basic tier](https://azure.microsoft.com/pricing/details/search/) or above.
 42 | 
 43 | 1. Enable a managed identity for your search service using either of the following approaches:
 44 | 
 45 |    + [System-managed identity](https://docs.microsoft.com/azure/search/search-howto-managed-identities-storage#option-1---turn-on-system-assigned-managed-identity)
 46 | 
 47 |    + [User-managed identity](https://docs.microsoft.com/azure/search/search-howto-managed-identities-storage#option-2---assign-a-user-assigned-managed-identity-to-the-search-service-preview)
 48 | 
 49 | 1. [Create an Azure Storage account if one doesn't already exist](https://docs.microsoft.com/azure/storage/common/storage-account-create?tabs=azure-portal). Make sure that **Enable hierarchical namespace** is checked to enable Data Lake Storage Gen 2 on the storage account.
 50 | 
 51 | ## Grant permissions in Azure Storage
 52 | 
 53 | Search must be able to connect to Azure Storage, and the user who runs the app must be able to load and then secure that data. In this step, create role assignments in Azure Storage to support both tasks.
 54 | 
 55 | 1. In your storage account page in the portal, [create a role assignment](https://docs.microsoft.com/azure/role-based-access-control/role-assignments-portal?tabs=current) that allows the search service's managed identity access to the storage account:
 56 | 
 57 |     + Choose [**Reader**](https://docs.microsoft.com/azure/role-based-access-control/built-in-roles#reader) (do not use **Storage Blob Data Reader**)
 58 | 
 59 | 1. Repeat the previous step, this time [creating a role assignment](https://docs.microsoft.com/azure/role-based-access-control/role-assignments-portal?tabs=current) for the user running sample application. The role must be able to upload sample data and create role assignments in Data Lake Gen2 storage:
 60 | 
 61 |     + Choose a[**Storage Blob Data Contributor**](https://docs.microsoft.com/azure/role-based-access-control/built-in-roles#storage-blob-data-contributor) or [**Storage Blob Data Owner**](https://docs.microsoft.com/azure/role-based-access-control/built-in-roles#storage-blob-data-owner)
 62 | 
 63 | ## Edit appsettings.json
 64 | 
 65 | Open the **appsettings.json** file in your local copy of the sample application and change the following values.
 66 | 
 67 | 1. "searchManagedIdentityId": "Object (principal) ID for User-assigned or System Managed Identity for Search Service":
 68 | 
 69 |     + For a system-assigned managed identity, go the search service's dashboard in the portal. In the left navigation pane, select Identity and then [copy the ID for the system managed identity](https://docs.microsoft.com/azure/search/search-howto-managed-identities-storage#option-1---turn-on-system-assigned-managed-identity).
 70 | 
 71 |     + For a user-assigned managed identity, [list the user-managed identities for your subscription](https://docs.microsoft.com/azure/active-directory/managed-identities-azure-resources/how-manage-user-assigned-managed-identities?pivots=identity-mi-methods-azp#list-user-assigned-managed-identities) and then copy the object ID.
 72 | 
 73 | 1. "searchAdminKey": "Admin key for Search Service":
 74 | 
 75 |     + Find the Admin API key in the [Keys tab](https://docs.microsoft.com/azure/search/search-security-api-keys#find-existing-keys) on the search service's portal page.
 76 | 
 77 | 1. "searchEndpoint": `https://<search-service-name>.search.windows.net`:
 78 | 
 79 |     + Find the URI in the [search service's Overview portal page](https://docs.microsoft.com/azure/search/search-manage#overview-home-page).
 80 | 
 81 | 1. "dataLakeResourceID": `/subscriptions/<subscription-id>/resourceGroups/<resource-group-name>/providers/Microsoft.Storage/storageAccounts/<storageaccountname>`:
 82 | 
 83 |     + Find the resource ID in the storage account's service dashboard in the portal. Go to Settings > Endpoint > Data Lake Storage, and then copy the resource ID.
 84 | 
 85 | 1. "dataLakeEndpoint": `https://<storageaccountname>.dfs.core.windows.net`:
 86 | 
 87 |     + Find the endpoint in the storage account's service dashboard in the portal. Go to Settings > Endpoint > Data Lake Storage, and then copy the primary endpoint.
 88 | 
 89 | ## Run sample code and verify sample data
 90 | 
 91 | Use a client application that can connect to Azure and build a .NET project.
 92 | 
 93 | 1. Using Visual Studio Code with the Azure Tools Extension:
 94 | 
 95 |     1. On the side bar, select the Azure Tools extension and then sign in to your Azure account. 
 96 | 
 97 |     1. On the side bar, open Explorer, and then open the local folder containing the sample code.
 98 | 
 99 |     1. Right-click the folder name and open an integrated terminal.
100 | 
101 |     1. Run the following command to execute the sample code: `dotnet run`
102 | 
103 | 1. Using PowerShell on a computer that has .NET:
104 | 
105 |     1. With Administrator permissions in PowerShell, load the Az module: `Import-Module -Name Az`
106 | 
107 |     1. Connect to Azure: `Connect-AzAccount`
108 | 
109 |     1. Run the following command to execute the sample code: `dotnet run`
110 | 
111 | 1. When the sample data has finished indexing, the sample will exit with a message "Completed indexing sample data"
112 | 
113 | 1. Return to the Azure portal and your search service. Use [Search Explorer](https://docs.microsoft.com/azure/search/search-explorer) to view "acltestindex" to see the indexed sample data. Only data with an [Access Control List](https://docs.microsoft.com/azure/storage/blobs/data-lake-storage-access-control) allowing the indexer's identity will appear in the index.
114 | 
115 | ## Clean up resources
116 | 
117 | To clean up resources created in this tutorial, [delete the resource group](https://docs.microsoft.com/azure/azure-resource-manager/management/delete-resource-group) that contains the resources.
118 | 
119 | ## Next Steps
120 | 
121 | Learn more about how Azure Data Lake Storage Gen2 works with access control lists:
122 | 
123 | + [Access control lists (ACLs) in Azure Data Lake Storage Gen2](https://docs.microsoft.com/azure/storage/blobs/data-lake-storage-access-control)
124 | 
125 | + [Permissions table: Combining Azure RBAC and ACL](https://docs.microsoft.com/azure/storage/blobs/data-lake-storage-access-control-model#permissions-table-combining-azure-rbac-and-acl)
126 | 
127 | + [Use .NET to manage ACLs in Azure Data Lake Storage Gen2](https://docs.microsoft.com/azure/storage/blobs/data-lake-storage-acl-dotnet)


--------------------------------------------------------------------------------
/data-lake-gen2-acl-indexing/SampleData/Files for Organization.txt:
--------------------------------------------------------------------------------
1 | Files for Organization in this directory
2 | Shared Documents - Accessible to all
3 | Private Documents - Confidential documents, not accessible
4 | User Documents - Mix of accessible to all and confidential documents
5 | 


--------------------------------------------------------------------------------
/data-lake-gen2-acl-indexing/SampleData/Private/confidential.txt:
--------------------------------------------------------------------------------
1 | Confidential data. If this got out, that would be bad....


--------------------------------------------------------------------------------
/data-lake-gen2-acl-indexing/SampleData/Shared Documents/public.txt:
--------------------------------------------------------------------------------
1 | Public data. OK to be shared to everyone


--------------------------------------------------------------------------------
/data-lake-gen2-acl-indexing/SampleData/User Documents/Alice/alice-secret.txt:
--------------------------------------------------------------------------------
1 | You can't read this


--------------------------------------------------------------------------------
/data-lake-gen2-acl-indexing/SampleData/User Documents/Alice/alice.txt:
--------------------------------------------------------------------------------
1 | Alice declined to make most of her documents public. This one is public though.


--------------------------------------------------------------------------------
/data-lake-gen2-acl-indexing/SampleData/User Documents/Bob/Reports/a.txt:
--------------------------------------------------------------------------------
1 | Big report on the A organization. You can read this


--------------------------------------------------------------------------------
/data-lake-gen2-acl-indexing/SampleData/User Documents/Bob/Reports/b.txt:
--------------------------------------------------------------------------------
1 | Big report on the B organization. You can read this.


--------------------------------------------------------------------------------
/data-lake-gen2-acl-indexing/SampleData/User Documents/Bob/Reports/c.txt:
--------------------------------------------------------------------------------
1 | Secret report on the C organization. You can't read this.


--------------------------------------------------------------------------------
/data-lake-gen2-acl-indexing/SampleData/User Documents/Bob/Sales/d.txt:
--------------------------------------------------------------------------------
1 | Some sales Bob made to the D organization. This is private so you can't read this


--------------------------------------------------------------------------------
/data-lake-gen2-acl-indexing/SampleData/User Documents/Bob/Sales/e.txt:
--------------------------------------------------------------------------------
1 | Some sales Bob made to the E organization. This is private so you can't read this


--------------------------------------------------------------------------------
/data-lake-gen2-acl-indexing/SampleData/User Documents/Bob/bob.txt:
--------------------------------------------------------------------------------
1 | Bob has a few documents he wants made public. Use recursive acls to quickly set this up


--------------------------------------------------------------------------------
/data-lake-gen2-acl-indexing/SampleData/User Documents/John/Documents/a.txt:
--------------------------------------------------------------------------------
1 | Data for John on the A organization. This is public


--------------------------------------------------------------------------------
/data-lake-gen2-acl-indexing/SampleData/User Documents/John/Documents/b.txt:
--------------------------------------------------------------------------------
1 | Data for John on the B organization. This is public


--------------------------------------------------------------------------------
/data-lake-gen2-acl-indexing/SampleData/User Documents/John/john.txt:
--------------------------------------------------------------------------------
1 | John's documents, they are public


--------------------------------------------------------------------------------
/data-lake-gen2-acl-indexing/appsettings.json:
--------------------------------------------------------------------------------
1 | {
2 |     "searchManagedIdentityId": "Object (principal) ID for User Assigned or System Managed Identity for Search Service",
3 |     "searchAdminKey": "Admin key for Search Service",
4 |     "searchEndpoint": "https://[search-service-name].search.windows.net",
5 |     "dataLakeResourceID": "/subscriptions/[subscription-id]/resourceGroups/[resource-group-name]/providers/Microsoft.Storage/storageAccounts/[storageaccountname]",
6 |     "dataLakeEndpoint": "https://[storageaccountname].dfs.core.windows.net"
7 | }


--------------------------------------------------------------------------------
/export-data/.gitignore:
--------------------------------------------------------------------------------
  1 | tests/config.json
  2 | Sample/local.settings.json
  3 | 
  4 | ## Ignore Visual Studio temporary files, build results, and
  5 | ## files generated by popular Visual Studio add-ons.
  6 | ##
  7 | ## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore
  8 | 
  9 | # User-specific files
 10 | *.rsuser
 11 | *.suo
 12 | *.user
 13 | *.userosscache
 14 | *.sln.docstates
 15 | 
 16 | # User-specific files (MonoDevelop/Xamarin Studio)
 17 | *.userprefs
 18 | 
 19 | # Mono auto generated files
 20 | mono_crash.*
 21 | 
 22 | # Build results
 23 | [Dd]ebug/
 24 | [Dd]ebugPublic/
 25 | [Rr]elease/
 26 | [Rr]eleases/
 27 | x64/
 28 | x86/
 29 | [Ww][Ii][Nn]32/
 30 | [Aa][Rr][Mm]/
 31 | [Aa][Rr][Mm]64/
 32 | bld/
 33 | [Bb]in/
 34 | [Oo]bj/
 35 | [Ll]og/
 36 | [Ll]ogs/
 37 | 
 38 | # Visual Studio 2015/2017 cache/options directory
 39 | .vs/
 40 | # Uncomment if you have tasks that create the project's static files in wwwroot
 41 | #wwwroot/
 42 | 
 43 | # Visual Studio 2017 auto generated files
 44 | Generated\ Files/
 45 | 
 46 | # MSTest test Results
 47 | [Tt]est[Rr]esult*/
 48 | [Bb]uild[Ll]og.*
 49 | 
 50 | # NUnit
 51 | *.VisualState.xml
 52 | TestResult.xml
 53 | nunit-*.xml
 54 | 
 55 | # Build Results of an ATL Project
 56 | [Dd]ebugPS/
 57 | [Rr]eleasePS/
 58 | dlldata.c
 59 | 
 60 | # Benchmark Results
 61 | BenchmarkDotNet.Artifacts/
 62 | 
 63 | # .NET Core
 64 | project.lock.json
 65 | project.fragment.lock.json
 66 | artifacts/
 67 | 
 68 | # ASP.NET Scaffolding
 69 | ScaffoldingReadMe.txt
 70 | 
 71 | # StyleCop
 72 | StyleCopReport.xml
 73 | 
 74 | # Files built by Visual Studio
 75 | *_i.c
 76 | *_p.c
 77 | *_h.h
 78 | *.ilk
 79 | *.meta
 80 | *.obj
 81 | *.iobj
 82 | *.pch
 83 | *.pdb
 84 | *.ipdb
 85 | *.pgc
 86 | *.pgd
 87 | *.rsp
 88 | *.sbr
 89 | *.tlb
 90 | *.tli
 91 | *.tlh
 92 | *.tmp
 93 | *.tmp_proj
 94 | *_wpftmp.csproj
 95 | *.log
 96 | *.tlog
 97 | *.vspscc
 98 | *.vssscc
 99 | .builds
100 | *.pidb
101 | *.svclog
102 | *.scc
103 | 
104 | # Chutzpah Test files
105 | _Chutzpah*
106 | 
107 | # Visual C++ cache files
108 | ipch/
109 | *.aps
110 | *.ncb
111 | *.opendb
112 | *.opensdf
113 | *.sdf
114 | *.cachefile
115 | *.VC.db
116 | *.VC.VC.opendb
117 | 
118 | # Visual Studio profiler
119 | *.psess
120 | *.vsp
121 | *.vspx
122 | *.sap
123 | 
124 | # Visual Studio Trace Files
125 | *.e2e
126 | 
127 | # TFS 2012 Local Workspace
128 | $tf/
129 | 
130 | # Guidance Automation Toolkit
131 | *.gpState
132 | 
133 | # ReSharper is a .NET coding add-in
134 | _ReSharper*/
135 | *.[Rr]e[Ss]harper
136 | *.DotSettings.user
137 | 
138 | # TeamCity is a build add-in
139 | _TeamCity*
140 | 
141 | # DotCover is a Code Coverage Tool
142 | *.dotCover
143 | 
144 | # AxoCover is a Code Coverage Tool
145 | .axoCover/*
146 | !.axoCover/settings.json
147 | 
148 | # Coverlet is a free, cross platform Code Coverage Tool
149 | coverage*.json
150 | coverage*.xml
151 | coverage*.info
152 | 
153 | # Visual Studio code coverage results
154 | *.coverage
155 | *.coveragexml
156 | 
157 | # NCrunch
158 | _NCrunch_*
159 | .*crunch*.local.xml
160 | nCrunchTemp_*
161 | 
162 | # MightyMoose
163 | *.mm.*
164 | AutoTest.Net/
165 | 
166 | # Web workbench (sass)
167 | .sass-cache/
168 | 
169 | # Installshield output folder
170 | [Ee]xpress/
171 | 
172 | # DocProject is a documentation generator add-in
173 | DocProject/buildhelp/
174 | DocProject/Help/*.HxT
175 | DocProject/Help/*.HxC
176 | DocProject/Help/*.hhc
177 | DocProject/Help/*.hhk
178 | DocProject/Help/*.hhp
179 | DocProject/Help/Html2
180 | DocProject/Help/html
181 | 
182 | # Click-Once directory
183 | publish/
184 | 
185 | # Publish Web Output
186 | *.[Pp]ublish.xml
187 | *.azurePubxml
188 | # Note: Comment the next line if you want to checkin your web deploy settings,
189 | # but database connection strings (with potential passwords) will be unencrypted
190 | *.pubxml
191 | *.publishproj
192 | 
193 | # Microsoft Azure Web App publish settings. Comment the next line if you want to
194 | # checkin your Azure Web App publish settings, but sensitive information contained
195 | # in these scripts will be unencrypted
196 | PublishScripts/
197 | 
198 | # NuGet Packages
199 | *.nupkg
200 | # NuGet Symbol Packages
201 | *.snupkg
202 | # The packages folder can be ignored because of Package Restore
203 | **/[Pp]ackages/*
204 | # except build/, which is used as an MSBuild target.
205 | !**/[Pp]ackages/build/
206 | # Uncomment if necessary however generally it will be regenerated when needed
207 | #!**/[Pp]ackages/repositories.config
208 | # NuGet v3's project.json files produces more ignorable files
209 | *.nuget.props
210 | *.nuget.targets
211 | 
212 | # Microsoft Azure Build Output
213 | csx/
214 | *.build.csdef
215 | 
216 | # Microsoft Azure Emulator
217 | ecf/
218 | rcf/
219 | 
220 | # Windows Store app package directories and files
221 | AppPackages/
222 | BundleArtifacts/
223 | Package.StoreAssociation.xml
224 | _pkginfo.txt
225 | *.appx
226 | *.appxbundle
227 | *.appxupload
228 | 
229 | # Visual Studio cache files
230 | # files ending in .cache can be ignored
231 | *.[Cc]ache
232 | # but keep track of directories ending in .cache
233 | !?*.[Cc]ache/
234 | 
235 | # Others
236 | ClientBin/
237 | ~$*
238 | *~
239 | *.dbmdl
240 | *.dbproj.schemaview
241 | *.jfm
242 | *.pfx
243 | *.publishsettings
244 | orleans.codegen.cs
245 | 
246 | # Including strong name files can present a security risk
247 | # (https://github.com/github/gitignore/pull/2483#issue-259490424)
248 | #*.snk
249 | 
250 | # Since there are multiple workflows, uncomment next line to ignore bower_components
251 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
252 | #bower_components/
253 | 
254 | # RIA/Silverlight projects
255 | Generated_Code/
256 | 
257 | # Backup & report files from converting an old project file
258 | # to a newer Visual Studio version. Backup files are not needed,
259 | # because we have git ;-)
260 | _UpgradeReport_Files/
261 | Backup*/
262 | UpgradeLog*.XML
263 | UpgradeLog*.htm
264 | ServiceFabricBackup/
265 | *.rptproj.bak
266 | 
267 | # SQL Server files
268 | *.mdf
269 | *.ldf
270 | *.ndf
271 | 
272 | # Business Intelligence projects
273 | *.rdl.data
274 | *.bim.layout
275 | *.bim_*.settings
276 | *.rptproj.rsuser
277 | *- [Bb]ackup.rdl
278 | *- [Bb]ackup ([0-9]).rdl
279 | *- [Bb]ackup ([0-9][0-9]).rdl
280 | 
281 | # Microsoft Fakes
282 | FakesAssemblies/
283 | 
284 | # GhostDoc plugin setting file
285 | *.GhostDoc.xml
286 | 
287 | # Node.js Tools for Visual Studio
288 | .ntvs_analysis.dat
289 | node_modules/
290 | 
291 | # Visual Studio 6 build log
292 | *.plg
293 | 
294 | # Visual Studio 6 workspace options file
295 | *.opt
296 | 
297 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
298 | *.vbw
299 | 
300 | # Visual Studio 6 auto-generated project file (contains which files were open etc.)
301 | *.vbp
302 | 
303 | # Visual Studio 6 workspace and project file (working project files containing files to include in project)
304 | *.dsw
305 | *.dsp
306 | 
307 | # Visual Studio 6 technical files
308 | *.ncb
309 | *.aps
310 | 
311 | # Visual Studio LightSwitch build output
312 | **/*.HTMLClient/GeneratedArtifacts
313 | **/*.DesktopClient/GeneratedArtifacts
314 | **/*.DesktopClient/ModelManifest.xml
315 | **/*.Server/GeneratedArtifacts
316 | **/*.Server/ModelManifest.xml
317 | _Pvt_Extensions
318 | 
319 | # Paket dependency manager
320 | .paket/paket.exe
321 | paket-files/
322 | 
323 | # FAKE - F# Make
324 | .fake/
325 | 
326 | # CodeRush personal settings
327 | .cr/personal
328 | 
329 | # Python Tools for Visual Studio (PTVS)
330 | __pycache__/
331 | *.pyc
332 | 
333 | # Cake - Uncomment if you are using it
334 | # tools/**
335 | # !tools/packages.config
336 | 
337 | # Tabs Studio
338 | *.tss
339 | 
340 | # Telerik's JustMock configuration file
341 | *.jmconfig
342 | 
343 | # BizTalk build output
344 | *.btp.cs
345 | *.btm.cs
346 | *.odx.cs
347 | *.xsd.cs
348 | 
349 | # OpenCover UI analysis results
350 | OpenCover/
351 | 
352 | # Azure Stream Analytics local run output
353 | ASALocalRun/
354 | 
355 | # MSBuild Binary and Structured Log
356 | *.binlog
357 | 
358 | # NVidia Nsight GPU debugger configuration file
359 | *.nvuser
360 | 
361 | # MFractors (Xamarin productivity tool) working folder
362 | .mfractor/
363 | 
364 | # Local History for Visual Studio
365 | .localhistory/
366 | 
367 | # Visual Studio History (VSHistory) files
368 | .vshistory/
369 | 
370 | # BeatPulse healthcheck temp database
371 | healthchecksdb
372 | 
373 | # Backup folder for Package Reference Convert tool in Visual Studio 2017
374 | MigrationBackup/
375 | 
376 | # Ionide (cross platform F# VS Code tools) working folder
377 | .ionide/
378 | 
379 | # Fody - auto-generated XML schema
380 | FodyWeavers.xsd
381 | 
382 | # VS Code files for those working on multiple tools
383 | .vscode/*
384 | !.vscode/settings.json
385 | !.vscode/tasks.json
386 | !.vscode/launch.json
387 | !.vscode/extensions.json
388 | *.code-workspace
389 | 
390 | # Local History for Visual Studio Code
391 | .history/
392 | 
393 | # Windows Installer files from build outputs
394 | *.cab
395 | *.msi
396 | *.msix
397 | *.msm
398 | *.msp
399 | 
400 | # JetBrains Rider
401 | *.sln.iml


--------------------------------------------------------------------------------
/export-data/README.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | page_type: sample
  3 | languages:
  4 |   - csharp
  5 | name: Export data from an Azure AI Search index
  6 | description: "Export data from an Azure AI Search service. This example builds a C# Console Application using the Azure AI Search .NET SDK."
  7 | products:
  8 |   - azure
  9 |   - azure-cognitive-search
 10 | urlFragment: export-data
 11 | ---
 12 | 
 13 | # Export Azure AI Search service index data
 14 | 
 15 | ![Flask sample MIT license badge](https://img.shields.io/badge/license-MIT-green.svg)
 16 | 
 17 | Export data from an Azure AI Search service. This .NET application runs on the command line.
 18 | 
 19 | ## Prerequisites
 20 | 
 21 | - [Visual Studio](https://visualstudio.microsoft.com/downloads/)
 22 | - [Azure AI Search service](https://docs.microsoft.com/azure/search/search-create-service-portal)
 23 | 
 24 | ## Setup
 25 | 
 26 | 1. Clone or download this sample repository.
 27 | 
 28 | 1. Extract contents if the download is a zip file. Make sure the files are read-write.
 29 | 
 30 | ## Run the sample
 31 | 
 32 | 1. Run the app locally [using Visual Studio](https://docs.microsoft.com/azure/azure-functions/functions-develop-local) or [dotnet run](https://learn.microsoft.com/dotnet/core/tools/dotnet-run)
 33 | 
 34 | 1. There are 4 commands in the app
 35 |     1. `get-bounds`
 36 |     1. `partition-index`
 37 |     1. `export-partitions`
 38 |     1. `export-continuous`
 39 | 
 40 | These commands support two different strategies for exporting data from the index
 41 | 
 42 | 1. Partitioned export. Documents in the index are split into smaller partitions that can be concurrently exported into JSON files.
 43 | 1. Continuous export. An additional field is added to your index to track export progress, and is continually updated as more documents are exported.
 44 | 
 45 | These strategies have different tradeoffs. You should use partitioned export when:
 46 | 
 47 | - You have a [sortable](https://learn.microsoft.com/azure/search/search-pagination-page-layout#ordering-with-orderby) and [filterable](https://learn.microsoft.com/azure/search/search-filters) field that can be used to partition the documents in the index.
 48 | - You are not updating any documents in the index, or you are not updating the documents in the index you want to export.
 49 | - You have a large number of documents. Partitioned export supports exporting more than 1000 documents concurrently. Export speed depends on [how your search service is provisioned](https://learn.microsoft.com/azure/search/search-capacity-planning).
 50 | 
 51 | You should use continuous export when:
 52 | 
 53 | - You do not have a [sortable](https://learn.microsoft.com/azure/search/search-pagination-page-layout#ordering-with-orderby) and [filterable](https://learn.microsoft.com/azure/search/search-filters) field. This field is required for partitioned export
 54 | - You are actively updating the documents in the index you want to export
 55 | - You have [storage space remaining on your search service](https://learn.microsoft.com/azure/search/search-limits-quotas-capacity#storage-limits), and are OK with the export process updating documents in the index. Continuous export adds an additional field to track export progress, which requires some storage be available.
 56 | - Duplicate documents may be included in the exported data. If the search service has multiple replicas, a [best-effort attempt is made to use the same replica](https://learn.microsoft.com/azure/search/index-similarity-and-scoring#scoring-statistics-and-sticky-sessions) to ensure consistent export results. There may also [be a delay in updating already exported documents](https://learn.microsoft.com/rest/api/searchservice/addupdate-or-delete-documents#response), so documents may be exported more than once.
 57 | 
 58 | ### Partitioned export commands
 59 | 
 60 | #### get-bounds
 61 | 
 62 | The `get-bounds` command is used to find the smallest and largest values of a sortable and filterable field in the index. This is used to determine how to split up the documents in the index into smaller partitions
 63 | 
 64 | ```
 65 | Description:
 66 |   Find and display the largest and lowest value for the specified field. Used to determine how to partition index data for export
 67 | 
 68 | Usage:
 69 |   export-data get-bounds [options]
 70 | 
 71 | Options:
 72 |   --endpoint <endpoint> (REQUIRED)      Endpoint of the search service to export data from. Example: https://example.search.windows.net
 73 |   --admin-key <admin-key>               Admin key to the search service to export data from. If not specified - uses your Entra identity
 74 |   --index-name <index-name> (REQUIRED)  Name of the index to export data from
 75 |   --field-name <field-name> (REQUIRED)  Name of field used to partition the index data. This field must be filterable and sortable.
 76 |   -?, -h, --help                        Show help and usage information
 77 | ```
 78 | 
 79 | Sample usage:
 80 | 
 81 | ```
 82 |  dotnet run get-bounds --endpoint https://example.search.windows.net --admin-key AAAAAAA --index-name my-index --field-name date
 83 | 
 84 | Lower Bound 1969-12-31T16:11:38.0000000+00:00
 85 | Upper Bound 2022-11-06T12:14:21.0000000+00:00
 86 | ```
 87 | 
 88 | In this example, `date` is a [Edm.DateTimeOffset](https://learn.microsoft.com/rest/api/searchservice/supported-data-types) with the [sortable](https://learn.microsoft.com/azure/search/search-pagination-page-layout#ordering-with-orderby) and [filterable](https://learn.microsoft.com/azure/search/search-filters) attributes applied. The lowest possible value in the index for this field is 1969/12/31 and the highest possible value in the index for this field is 2011/11/06.
 89 | 
 90 | #### partition-index
 91 | 
 92 | The `partition-index` command is used to divide the index into smaller partitions.
 93 | 
 94 | ```
 95 | Description:
 96 |   Partitions the data in the index between the upper and lower bound values into partitions with at most 100,000 documents.
 97 | 
 98 | Usage:
 99 |   export-data partition-index [options]
100 | 
101 | Options:
102 |   --endpoint <endpoint> (REQUIRED)      Endpoint of the search service to export data from. Example: https://example.search.windows.net
103 |   --admin-key <admin-key>               Admin key to the search service to export data from. If not specified - uses your Entra identity
104 |   --index-name <index-name> (REQUIRED)  Name of the index to export data from
105 |   --field-name <field-name> (REQUIRED)  Name of field used to partition the index data. This field must be filterable and sortable.
106 |   --lower-bound <lower-bound>           Smallest value to use to partition the index data. Defaults to the smallest value in the index. []
107 |   --upper-bound <upper-bound>           Largest value to use to partition the index data. Defaults to the largest value in the index. []
108 |   --partition-size <partition-size>     Maximum size of a partition. Defaults to 100,000. Cannot exceed 100,000 [default: 100000]
109 |   --partition-path <partition-path>     Path of the file with JSON description of partitions. Should end in .json. Default is <index name>-partitions.json []
110 |   -?, -h, --help                        Show help and usage information
111 | ```
112 | 
113 | Sample usage:
114 | 
115 | ```
116 | dotnet run partition-index --endpoint https://example.search.windows.net --admin-key AAAAAAA --index-name my-index --field-name date
117 | 
118 | Wrote partitions to my-index-partitions.json
119 | ```
120 | 
121 | In this case, `my-index-partitions.json` has a JSON description of the partitions inside the index
122 | 
123 | ```json
124 | {
125 |   "endpoint": "https://example.search.windows.net",
126 |   "indexName": "my-index",
127 |   "fieldName": "date",
128 |   "totalDocumentCount": 500000,
129 |   "partitions": [
130 |     {
131 |       "upperBound": "1976-08-09T12:41:58.375+00:00",
132 |       "lowerBound": "1969-12-31T16:11:38+00:00",
133 |       "documentCount": 62382,
134 |       "filter": "date ge 1969-12-31T16:11:38.0000000+00:00 and date le 1976-08-09T12:41:58.3750000+00:00"
135 |     },
136 |     // more partitions in the same format as above
137 |   ]
138 | ```
139 | 
140 | The JSON file contains metadata about the index and the partitions it created, such as total document count and partition field name. The `partitions` field lists all the [filters](https://learn.microsoft.com/azure/search/search-filters) used to retrieve the partitions using [pagination](https://learn.microsoft.com/azure/search/search-pagination-page-layout#paging-results).
141 | 
142 | #### export-partitions
143 | 
144 | The `export-partitions` command is used to export the partitions created by `partition-index` into JSON files.
145 | 
146 | ```
147 | Description:
148 |   Exports data from a search index using a pre-generated partition file from partition-index
149 | 
150 | Usage:
151 |   export-data export-partitions [options]
152 | 
153 | Options:
154 |   --partition-path <partition-path> (REQUIRED)     Path of the file with JSON description of partitions. Should end in .json.
155 |   --admin-key <admin-key>                          Admin key to the search service to export data from. If not specified - uses your Entra identity
156 |   --export-path <export-path>                      Directory to write JSON Lines partition files to. Every line in the partition file contains a JSON object with the contents of the Search document. Format of file names is <index name>-<partition id>-documents.json [default: .]
157 |   --concurrent-partitions <concurrent-partitions>  Number of partitions to concurrently export. Default is 2 [default: 2]
158 |   --page-size <page-size>                          Page size to use when running export queries. Default is 1000 [default: 1000]
159 |   --include-partition <include-partition>          List of partitions by index to include in the export. Example: --include-partition 0 --include-partition 1 only runs the export on first 2 partitions []
160 |   --exclude-partition <exclude-partition>          List of partitions by index to exclude from the export. Example: --exclude-partition 0 --exclude-partition 1 runs the export on every partition except the first 2 []
161 |   --include-field <include-field>                  List of fields to include in the export. Example: --include-field field1 --include-field field2. []
162 |   --exclude-field <exclude-field>                  List of fields to exclude in the export. Example: --exclude-field field1 --exclude-field field2. []
163 |   -?, -h, --help                                   Show help and usage information
164 | ```
165 | 
166 | Sample usage:
167 | 
168 | ```cmd
169 | dotnet run export-partitions --partition-path my-index-partitions.json --admin-key AAAAAAA --export-path C:\Users\MyAccount\output --concurrent-partitions 8
170 | Starting partition 2
171 | Starting partition 1
172 | Starting partition 0
173 | Starting partition 3
174 | Starting partition 7
175 | Starting partition 4
176 | Starting partition 5
177 | Starting partition 6
178 | Ended partition 4
179 | Ended partition 6
180 | Ended partition 3
181 | Ended partition 0
182 | Ended partition 7
183 | Ended partition 2
184 | Ended partition 1
185 | Ended partition 5
186 | ```
187 | 
188 | The `export-partitions` command was run on partitions in the `my-index-partitions.json` file, which was output by the previous `partition-index` command. `--concurrent-partitions` was set to 8, so 8 partitions in this file were loaded into JSON files concurrently. This number can be changed to customize parallelization. Higher numbers increase load on the search service but complete the export more quickly. Lower numbers use less resources, but take a longer time to complete the export.
189 | 
190 | 1 JSON file per partition is output, with the file name formatted as `index-partition_index-documents.json`. The output [JSONL files](https://jsonlines.org/) have 1 JSON object per line, corresponding to a single search document. All fields marked as [retrievable](https://learn.microsoft.com/azure/search/search-query-simple-examples) are exported by default. Fields can be either explicitly included using `--include-field`, or explicitly excluded using `--exclude-field`.
191 | 
192 | Example output in `index-0-documents.json`:
193 | 
194 | ```json
195 | {"id":"document-1", "text": "first document", "date":"1969-12-31T16:11:38Z"}
196 | {"id":"document-2","text": "second document", "date":"1969-12-31T17:05:39Z"}
197 | ...
198 | ```
199 | 
200 | ### Continuous export commands
201 | 
202 | #### export-continuous
203 | 
204 | The `export-continuous` command starts finding documents that have not been exported and writes them into a JSON file
205 | 
206 | ```
207 | Description:
208 |   Exports data from a search service by adding a column to track which documents have been exported and continually updating it
209 | 
210 | Usage:
211 |   export-data export-continuous [options]
212 | 
213 | Options:
214 |   --endpoint <endpoint> (REQUIRED)         Endpoint of the search service to export data from. Example: https://example.search.windows.net
215 |   --admin-key <admin-key>                  Admin key to the search service to export data from. If not specified - uses your Entra identity
216 |   --index-name <index-name> (REQUIRED)     Name of the index to export data from
217 |   --export-field-name <export-field-name>  Name of the Edm.Boolean field the continuous export process will update to track which documents have been exported. Default is 'exported' [default: exported]
218 |   --page-size <page-size>                  Page size to use when running export queries. Default is 1000 [default: 1000]
219 |   --export-path <export-path>              Path to write JSON Lines file to. Every line in the file contains a JSON object with the contents of the Search document. Format of file is <index name>-documents.json []
220 |   --include-field <include-field>          List of fields to include in the export. Example: --include-field field1 --include-field field2. []
221 |   --exclude-field <exclude-field>          List of fields to exclude in the export. Example: --exclude-field field1 --exclude-field field2. []
222 |   -?, -h, --help                           Show help and usage information
223 | ```
224 | 
225 | Sample usage:
226 | 
227 | ```
228 | dotnet run export-continuous --endpoint https://example.search.windows.net --admin-key AAAA --index-name my-index   
229 | ```
230 | 
231 | 1 JSON file is output, with the file name formatted as `my-index-documents.json`. The output [JSONL file](https://jsonlines.org/) has 1 JSON object per line, corresponding to a single search document. All fields marked as [retrievable](https://learn.microsoft.com/azure/search/search-query-simple-examples) are exported by default, except the field used to track if the document was exported or not. Fields can be either explicitly included using `--include-field`, or explicitly excluded using `--exclude-field`. If the export is cancelled, it is resumed where it left off.
232 | 
233 | Duplicate documents may be included in the exported data. If the search service has multiple replicas, a [best-effort attempt is made to use the same replica](https://learn.microsoft.com/azure/search/index-similarity-and-scoring#scoring-statistics-and-sticky-sessions) to ensure consistent export results. There may also [be a delay in updating already exported documents](https://learn.microsoft.com/rest/api/searchservice/addupdate-or-delete-documents#response), so documents may be exported more than once. Storage usage also increases as additional data is added to the index. If duplicate documents or storage limits are an issue, partitioned export is recommended.
234 | 
235 | Example output in `my-index-documents.json`:
236 | 
237 | ```json
238 | {"id":"document-1", "text": "first document"}
239 | {"id":"document-2","text": "second document"}
240 | ```
241 | 
242 | ## Next steps
243 | 
244 | You can learn more about Azure AI Search on the [official documentation site](https://docs.microsoft.com/azure/search).
245 | 


--------------------------------------------------------------------------------
/export-data/Sample/Configuration.cs:
--------------------------------------------------------------------------------
 1 | ﻿using Microsoft.Extensions.Configuration;
 2 | 
 3 | namespace Sample
 4 | {
 5 |     public class Configuration
 6 |     {
 7 |         /// <summary>
 8 |         /// Service endpoint for the search service
 9 |         /// e.g. "https://your-search-service.search.windows.net
10 |         /// </summary>
11 |         [ConfigurationKeyName("AZURE_SEARCH_SERVICE_ENDPOINT")]
12 |         public string ServiceEndpoint { get; set; }
13 | 
14 |         /// <summary>
15 |         /// Index name in the search service
16 |         /// e.g. sample-index
17 |         /// </summary>
18 |         [ConfigurationKeyName("AZURE_SEARCH_INDEX_NAME")]
19 |         public string IndexName { get; set; }
20 | 
21 |         /// <summary>
22 |         /// Admin API key for search service
23 |         /// Optional, if not specified attempt to use DefaultAzureCredential
24 |         /// </summary>
25 |         [ConfigurationKeyName("AZURE_SEARCH_ADMIN_KEY")]
26 |         public string AdminKey { get; set; }
27 | 
28 |         /// <summary>
29 |         /// Directory to save the exported files in
30 |         /// </summary>
31 |         [ConfigurationKeyName("EXPORT_DIRECTORY")]
32 |         public string ExportDirectory { get; set; }
33 | 
34 |         /// <summary>
35 |         /// Validate the configuration
36 |         /// </summary>
37 |         /// <exception cref="ArgumentException">If any parameters are invalid</exception>
38 |         public void Validate()
39 |         {
40 |             if (!Uri.TryCreate(ServiceEndpoint, UriKind.Absolute, out _))
41 |             {
42 |                 throw new ArgumentException("Must specify service endpoint", nameof(ServiceEndpoint));
43 |             }
44 | 
45 |             if (string.IsNullOrEmpty(IndexName))
46 |             {
47 |                 throw new ArgumentException("Must specify index name", nameof(IndexName));
48 |             }
49 |         }
50 |     }
51 | }
52 | 


--------------------------------------------------------------------------------
/export-data/Sample/Document.cs:
--------------------------------------------------------------------------------
 1 | ﻿using Azure.Search.Documents.Indexes;
 2 | 
 3 | namespace Sample
 4 | {
 5 |     public class Document
 6 |     {
 7 |         [SimpleField(IsKey = true, IsFilterable = true)]
 8 |         public string Id { get; set; }
 9 | 
10 |         [SimpleField(IsFilterable = true, IsSortable = true)]
11 |         public DateTimeOffset Timestamp { get; set; }
12 |     }
13 | }
14 | 


--------------------------------------------------------------------------------
/export-data/Sample/Program.cs:
--------------------------------------------------------------------------------
 1 | ﻿using Azure.Search.Documents;
 2 | using Azure.Identity;
 3 | using Microsoft.Extensions.Configuration;
 4 | using Sample;
 5 | using Azure.Search.Documents.Indexes;
 6 | using Azure;
 7 | using Azure.Search.Documents.Indexes.Models;
 8 | using export_data;
 9 | 
10 | // Before running this sample
11 | // 1. Copy local.settings-example.json to local.settings.json
12 | // 2. Fill in the sample values with actual values
13 | var configuration = new Configuration();
14 | new ConfigurationBuilder()
15 |     .SetBasePath(Directory.GetCurrentDirectory())
16 |     .AddJsonFile("local.settings.json")
17 |     .Build()
18 |     .Bind(configuration);
19 | configuration.Validate();
20 | 
21 | var endpoint = new Uri(configuration.ServiceEndpoint);
22 | var defaultCredential = new DefaultAzureCredential();
23 | var adminKey = !string.IsNullOrEmpty(configuration.AdminKey) ? new AzureKeyCredential(configuration.AdminKey) : null;
24 | var searchIndexClient = adminKey != null ? new SearchIndexClient(endpoint, adminKey) : new SearchIndexClient(endpoint, defaultCredential);
25 | 
26 | var fieldBuilder = new FieldBuilder();
27 | var searchFields = fieldBuilder.Build(typeof(Document));
28 | var indexDefinition = new SearchIndex(configuration.IndexName, searchFields);
29 | await searchIndexClient.CreateOrUpdateIndexAsync(indexDefinition);
30 | 
31 | var searchClient = searchIndexClient.GetSearchClient(indexDefinition.Name);
32 | 
33 | // Upload randomly generated documents
34 | using (var bufferedSender = new SearchIndexingBufferedSender<Document>(searchClient))
35 | {
36 |     const int DocumentCount = 500000;
37 |     DateTimeOffset start = new DateTimeOffset(2023, 01, 01, 0, 0, 0, TimeSpan.Zero);
38 |     DateTimeOffset end = new DateTimeOffset(2024, 01, 01, 0, 0, 0, TimeSpan.Zero);
39 |     var random = new Random();
40 |     for (int i = 0; i < DocumentCount; i++)
41 |     {
42 |         bufferedSender.UploadDocuments(
43 |             new[] {
44 |                 new Document {
45 |                     Id = Convert.ToString(i),
46 |                     // Get the next date by adding a random amount of ticks between the end and start dates
47 |                     Timestamp = start + (random.NextDouble() * (end-start))
48 |                 }
49 |             });
50 |     }
51 | }
52 | 
53 | // Demonstrate how to use partition export
54 | SearchField timestampField = searchFields
55 |     .Where(field => field.Type == SearchFieldDataType.DateTimeOffset)
56 |     .Single();
57 | object lowerBound = await Bound.FindLowerBoundAsync(timestampField, searchClient);
58 | object upperBound = await Bound.FindUpperBoundAsync(timestampField, searchClient);
59 | List<Partition> partitions = await new PartitionGenerator(searchClient, timestampField, lowerBound, upperBound).GeneratePartitions();
60 | 
61 | var partitionFile = new PartitionFile
62 | {
63 |     Endpoint = endpoint.AbsoluteUri,
64 |     IndexName = indexDefinition.Name,
65 |     FieldName = timestampField.Name,
66 |     TotalDocumentCount = partitions.Sum(partition => partition.DocumentCount),
67 |     Partitions = partitions
68 | };
69 | 
70 | if (!Directory.Exists(configuration.ExportDirectory))
71 | {
72 |     Directory.CreateDirectory(configuration.ExportDirectory);
73 | }
74 | var partitionFilePath = Path.Combine(configuration.ExportDirectory, $"{indexDefinition.Name}-partitions.json");
75 | partitionFile.SerializeToFile(partitionFilePath);
76 | 
77 | var partitionWriter = new FilePartitionWriter(configuration.ExportDirectory, indexDefinition.Name);
78 | await new PartitionExporter(
79 |     partitionFile,
80 |     partitionWriter,
81 |     searchClient,
82 |     indexDefinition,
83 |     concurrentPartitions: 2,
84 |     pageSize: 1000).ExportAsync();


--------------------------------------------------------------------------------
/export-data/Sample/Sample.csproj:
--------------------------------------------------------------------------------
 1 | ﻿<Project Sdk="Microsoft.NET.Sdk">
 2 | 
 3 |   <PropertyGroup>
 4 |     <OutputType>Exe</OutputType>
 5 |     <TargetFramework>net6.0</TargetFramework>
 6 |     <ImplicitUsings>enable</ImplicitUsings>
 7 |     <Nullable>disable</Nullable>
 8 |   </PropertyGroup>
 9 | 
10 |   <ItemGroup>
11 |     <PackageReference Include="Microsoft.Extensions.Configuration.Binder" Version="8.0.1" />
12 |     <PackageReference Include="Microsoft.Extensions.Configuration.Json" Version="8.0.0" />
13 |   </ItemGroup>
14 | 
15 |   <ItemGroup>
16 |     <ProjectReference Include="..\export-data\export-data.csproj" />
17 |   </ItemGroup>
18 | 
19 |   <ItemGroup>
20 |     <None Update="local.settings.json">
21 |       <CopyToOutputDirectory>Always</CopyToOutputDirectory>
22 |     </None>
23 |   </ItemGroup>
24 | 
25 | </Project>
26 | 


--------------------------------------------------------------------------------
/export-data/Sample/local.settings-example.json:
--------------------------------------------------------------------------------
1 | {
2 |   "AZURE_SEARCH_SERVICE_ENDPOINT": "https://my-service-endpoint.search.windows.net",
3 |   "AZURE_SEARCH_INDEX_NAME": "my-admin-key",
4 |   "AZURE_SEARCH_ADMIN_KEY": "my-example-index-name",
5 |   "EXPORT_DIRECTORY":  "sample-export"
6 | }
7 | 


--------------------------------------------------------------------------------
/export-data/export-data.sln:
--------------------------------------------------------------------------------
 1 | ﻿
 2 | Microsoft Visual Studio Solution File, Format Version 12.00
 3 | # Visual Studio Version 17
 4 | VisualStudioVersion = 17.3.32929.385
 5 | MinimumVisualStudioVersion = 10.0.40219.1
 6 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "export-data", "export-data\export-data.csproj", "{DDF5C0C5-8499-453A-AA06-368A9E218262}"
 7 | EndProject
 8 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "tests", "tests\tests.csproj", "{EBF0AE26-F3AB-4FF7-8197-75F17D7F9402}"
 9 | EndProject
10 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Sample", "Sample\Sample.csproj", "{CBE2C21F-50C2-4CC2-82E0-263EE265612C}"
11 | EndProject
12 | Global
13 | 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
14 | 		Debug|Any CPU = Debug|Any CPU
15 | 		Release|Any CPU = Release|Any CPU
16 | 	EndGlobalSection
17 | 	GlobalSection(ProjectConfigurationPlatforms) = postSolution
18 | 		{DDF5C0C5-8499-453A-AA06-368A9E218262}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
19 | 		{DDF5C0C5-8499-453A-AA06-368A9E218262}.Debug|Any CPU.Build.0 = Debug|Any CPU
20 | 		{DDF5C0C5-8499-453A-AA06-368A9E218262}.Release|Any CPU.ActiveCfg = Release|Any CPU
21 | 		{DDF5C0C5-8499-453A-AA06-368A9E218262}.Release|Any CPU.Build.0 = Release|Any CPU
22 | 		{EBF0AE26-F3AB-4FF7-8197-75F17D7F9402}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
23 | 		{EBF0AE26-F3AB-4FF7-8197-75F17D7F9402}.Debug|Any CPU.Build.0 = Debug|Any CPU
24 | 		{EBF0AE26-F3AB-4FF7-8197-75F17D7F9402}.Release|Any CPU.ActiveCfg = Release|Any CPU
25 | 		{EBF0AE26-F3AB-4FF7-8197-75F17D7F9402}.Release|Any CPU.Build.0 = Release|Any CPU
26 | 		{CBE2C21F-50C2-4CC2-82E0-263EE265612C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
27 | 		{CBE2C21F-50C2-4CC2-82E0-263EE265612C}.Debug|Any CPU.Build.0 = Debug|Any CPU
28 | 		{CBE2C21F-50C2-4CC2-82E0-263EE265612C}.Release|Any CPU.ActiveCfg = Release|Any CPU
29 | 		{CBE2C21F-50C2-4CC2-82E0-263EE265612C}.Release|Any CPU.Build.0 = Release|Any CPU
30 | 	EndGlobalSection
31 | 	GlobalSection(SolutionProperties) = preSolution
32 | 		HideSolutionNode = FALSE
33 | 	EndGlobalSection
34 | 	GlobalSection(ExtensibilityGlobals) = postSolution
35 | 		SolutionGuid = {4B25BA6A-936B-47CB-9896-89F6A884EAC5}
36 | 	EndGlobalSection
37 | EndGlobal
38 | 


--------------------------------------------------------------------------------
/export-data/export-data/Bound.cs:
--------------------------------------------------------------------------------
  1 | ﻿using Azure.Search.Documents.Indexes.Models;
  2 | using Azure.Search.Documents.Models;
  3 | using Azure.Search.Documents;
  4 | 
  5 | namespace export_data
  6 | {
  7 |     /// <summary>
  8 |     /// Potential value for the sortable and filterable field, used as a bound between different potential partitions
  9 |     /// </summary>
 10 |     public static class Bound
 11 |     {
 12 |         public static async Task<object> FindUpperBoundAsync(SearchField field, SearchClient searchClient) =>
 13 |             DeserializeBound(field, await FindUpperBoundDocumentAsync(field, searchClient));
 14 | 
 15 |         /// <summary>
 16 |         /// Find the largest value of the sortable and filterable field in the index
 17 |         /// </summary>
 18 |         public static async Task<SearchDocument> FindUpperBoundDocumentAsync(SearchField field, SearchClient searchClient)
 19 |         {
 20 |             var upperBoundOptions = new SearchOptions();
 21 |             upperBoundOptions.Select.Add(field.Name);
 22 |             upperBoundOptions.Size = 1;
 23 |             upperBoundOptions.OrderBy.Add($"{field.Name} desc");
 24 |             SearchResults<SearchDocument> upperBoundResults = await searchClient.SearchAsync<SearchDocument>(
 25 |                 searchText: string.Empty,
 26 |                 options: upperBoundOptions);
 27 |             SearchDocument upperBoundDocument = await GetFirstResultAsync(upperBoundResults);
 28 |             if (upperBoundDocument == null)
 29 |             {
 30 |                 throw new ArgumentException($"Could not find largest value for field {field.Name}");
 31 |             }
 32 |             return upperBoundDocument;
 33 |         }
 34 | 
 35 |         public static async Task<object> FindLowerBoundAsync(SearchField field, SearchClient searchClient) =>
 36 |             DeserializeBound(field, await FindLowerBoundDocumentAsync(field, searchClient));
 37 | 
 38 |         /// <summary>
 39 |         /// Find the smallest value of the sortable and filterable field in the index
 40 |         /// </summary>
 41 |         public static async Task<SearchDocument> FindLowerBoundDocumentAsync(SearchField field, SearchClient searchClient)
 42 |         {
 43 |             var lowerBoundOptions = new SearchOptions();
 44 |             lowerBoundOptions.Select.Add(field.Name);
 45 |             lowerBoundOptions.Size = 1;
 46 |             lowerBoundOptions.OrderBy.Add($"{field.Name} asc");
 47 |             SearchResults<SearchDocument> lowerBoundResults = await searchClient.SearchAsync<SearchDocument>(
 48 |                 searchText: string.Empty,
 49 |                 options: lowerBoundOptions);
 50 |             SearchDocument lowerBoundDocument = await GetFirstResultAsync(lowerBoundResults);
 51 |             if (lowerBoundDocument == null)
 52 |             {
 53 |                 throw new ArgumentException($"Could not find smallest value for field {field.Name}");
 54 |             }
 55 | 
 56 |             return lowerBoundDocument;
 57 |         }
 58 | 
 59 |         public static async Task<SearchDocument> GetFirstResultAsync(SearchResults<SearchDocument> results)
 60 |         {
 61 |             await foreach (SearchResult<SearchDocument> result in results.GetResultsAsync())
 62 |             {
 63 |                 return result.Document;
 64 |             }
 65 | 
 66 |             return null;
 67 |         }
 68 | 
 69 |         public static object DeserializeBound(SearchField field, SearchDocument document)
 70 |         {
 71 |             if (field.Type == SearchFieldDataType.DateTimeOffset)
 72 |             {
 73 |                 return document.GetDateTimeOffset(field.Name);
 74 |             }
 75 | 
 76 |             throw new InvalidOperationException($"Unexpected field type {field.Type}");
 77 |         }
 78 | 
 79 |         public static object DeserializeBound(SearchFieldDataType fieldType, string bound)
 80 |         {
 81 |             if (fieldType == SearchFieldDataType.DateTimeOffset)
 82 |             {
 83 |                 return DateTimeOffset.Parse(bound);
 84 |             }
 85 | 
 86 |             throw new InvalidOperationException($"Unexpected field type {fieldType}");
 87 |         }
 88 | 
 89 |         public static string SerializeBound(object bound)
 90 |         {
 91 |             if (bound is DateTimeOffset boundDate)
 92 |             {
 93 |                 return boundDate.ToString("o");
 94 |             }
 95 | 
 96 |             return Convert.ToString(bound);
 97 |         }
 98 | 
 99 |         // Strategy: If the partition's lower bound is the lowest possible value of the field, include it in the partition
100 |         // Otherwise, exclude it from the partition
101 |         // Always include the highest value in the partition
102 |         // To learn more about filter syntax, please visit https://learn.microsoft.com/azure/search/search-filters
103 |         public static string GenerateBoundFilter(string field, object lowestBound, object partitionLowerBound, object partitionUpperBound)
104 |         {
105 |             string lowerBoundFilter = partitionLowerBound.Equals(lowestBound) ? "ge" : "gt";
106 |             return  $"{field} {lowerBoundFilter} {SerializeBound(partitionLowerBound)} and {field} le {SerializeBound(partitionUpperBound)}";
107 |         }
108 |     }
109 | }
110 | 


--------------------------------------------------------------------------------
/export-data/export-data/ContinuousExporter.cs:
--------------------------------------------------------------------------------
  1 | ﻿using Azure;
  2 | using Azure.Search.Documents;
  3 | using Azure.Search.Documents.Indexes;
  4 | using Azure.Search.Documents.Indexes.Models;
  5 | using Azure.Search.Documents.Models;
  6 | using System.Text.Json;
  7 | 
  8 | namespace export_data
  9 | {
 10 |     /// <summary>
 11 |     /// Exports data continuously from an index, updating the documents when they have been exported
 12 |     /// </summary>
 13 |     public class ContinuousExporter : Exporter
 14 |     {
 15 |         private readonly SearchClient _searchClient;
 16 |         private readonly SearchIndexClient _searchIndexClient;
 17 |         private readonly string _exportFieldName;
 18 |         private readonly int _pageSize;
 19 |         private readonly string _exportPath;
 20 | 
 21 |         public ContinuousExporter(SearchClient searchClient, SearchIndex index, SearchIndexClient searchIndexClient, string exportFieldName, int pageSize, string exportPath, IEnumerable<string> fieldsToInclude, ISet<string> fieldsToExclude) : base(index, fieldsToInclude, fieldsToExclude)
 22 |         {
 23 |             _searchClient = searchClient;
 24 |             _searchIndexClient = searchIndexClient;
 25 |             _exportFieldName = exportFieldName;
 26 |             _pageSize = pageSize;
 27 |             _exportPath = exportPath;
 28 |         }
 29 | 
 30 |         public override async Task ExportAsync()
 31 |         {
 32 |             await EnsureExportColumnExists();
 33 |             SearchField keyField = Index.Fields.First(field => field.IsKey ?? false);
 34 | 
 35 |             var exportUpdateOptions = new IndexDocumentsOptions { ThrowOnAnyError = true };
 36 | 
 37 |             var options = new SearchOptions
 38 |             {
 39 |                 Size = _pageSize,
 40 |                 // Set SessionId to target the same replica to retrieve consistent results
 41 |                 // To learn more, please visit https://learn.microsoft.com/azure/search/index-similarity-and-scoring#scoring-statistics-and-sticky-sessions
 42 |                 SessionId = Guid.NewGuid().ToString()
 43 |             };
 44 |             options.OrderBy.Add($"{_exportFieldName} asc");
 45 |             AddSelect(options, _exportFieldName);
 46 | 
 47 |             Console.WriteLine("Starting continuous export...");
 48 |             using FileStream exportOutput = File.Open(_exportPath, FileMode.Append, FileAccess.Write, FileShare.Read);
 49 |             bool firstDocumentExported = false;
 50 |             do
 51 |             {
 52 |                 SearchResults<SearchDocument> searchResults = await _searchClient.SearchAsync<SearchDocument>(searchText: string.Empty, options: options);
 53 |                 await foreach (Page<SearchResult<SearchDocument>> resultPage in searchResults.GetResultsAsync().AsPages())
 54 |                 {
 55 |                     SearchResult<SearchDocument> firstResult = resultPage.Values.FirstOrDefault();
 56 |                     if (firstResult == null)
 57 |                     {
 58 |                         firstDocumentExported = true;
 59 |                         break;
 60 |                     }
 61 | 
 62 | 
 63 |                     if (firstResult.Document.TryGetValue(_exportFieldName, out object exportValue) &&
 64 |                         exportValue is bool isExported &&
 65 |                         isExported)
 66 |                     {
 67 |                         firstDocumentExported = true;
 68 |                         break;
 69 |                     }
 70 | 
 71 |                     var exportedUpdates = new List<SearchDocument>();
 72 |                     foreach (SearchResult<SearchDocument> searchResult in resultPage.Values)
 73 |                     {
 74 |                         searchResult.Document.Remove(_exportFieldName);
 75 |                         JsonSerializer.Serialize(exportOutput, searchResult.Document);
 76 |                         exportOutput.WriteByte((byte)'\n');
 77 | 
 78 |                         exportedUpdates.Add(new SearchDocument
 79 |                         {
 80 |                             [keyField.Name] = searchResult.Document[keyField.Name],
 81 |                             [_exportFieldName] = true
 82 |                         });
 83 |                     }
 84 | 
 85 |                     if (exportedUpdates.Any())
 86 |                     {
 87 |                         // Delays in being able to search updates may cause already exported documents to be re-exported
 88 |                         // To learn more, please see https://learn.microsoft.com/rest/api/searchservice/addupdate-or-delete-documents#response
 89 |                         await _searchClient.MergeOrUploadDocumentsAsync(exportedUpdates, exportUpdateOptions);
 90 |                         Console.WriteLine($"Exported {exportedUpdates.Count} documents");
 91 |                     }
 92 |                 }
 93 |             }
 94 |             while (!firstDocumentExported);
 95 | 
 96 |             Console.WriteLine("Finished continuous export");
 97 |         }
 98 | 
 99 |         private async Task EnsureExportColumnExists()
100 |         {
101 |             SearchField exportField = Index.Fields.FirstOrDefault(field => field.Name == _exportFieldName);
102 |             if (exportField == null)
103 |             {
104 |                 exportField = new SearchField(_exportFieldName, SearchFieldDataType.Boolean)
105 |                 {
106 |                     IsSortable = true,
107 |                 };
108 |                 Index.Fields.Add(exportField);
109 |                 await _searchIndexClient.CreateOrUpdateIndexAsync(Index);
110 |             }
111 |             else if (exportField.Type != SearchFieldDataType.Boolean)
112 |             {
113 |                 throw new ArgumentException($"Export field {exportField.Name} has unexpected type {exportField.Type}, must be {SearchFieldDataType.Boolean}");
114 |             }
115 |         }
116 |     }
117 | }
118 | 


--------------------------------------------------------------------------------
/export-data/export-data/Exporter.cs:
--------------------------------------------------------------------------------
 1 | ﻿using Azure.Search.Documents;
 2 | using Azure.Search.Documents.Indexes.Models;
 3 | 
 4 | namespace export_data
 5 | {
 6 |     /// <summary>
 7 |     /// Base class for exporting data from an index
 8 |     /// </summary>
 9 |     public abstract class Exporter
10 |     {
11 |         /// <summary>
12 |         /// What fields to include in the exported data
13 |         /// </summary>
14 |         protected IEnumerable<string> FieldsToInclude { get; }
15 | 
16 |         /// <summary>
17 |         /// What fields to exclude from the exported data
18 |         /// </summary>
19 |         protected ISet<string> FieldsToExclude { get; }
20 | 
21 |         protected SearchIndex Index { get; }
22 | 
23 |         public Exporter(SearchIndex index, IEnumerable<string> fieldsToInclude, ISet<string> fieldsToExclude)
24 |         {
25 |             Index = index;
26 |             FieldsToInclude = fieldsToInclude;
27 |             FieldsToExclude = fieldsToExclude;
28 |         }
29 | 
30 |         // Export data from the search index
31 |         public abstract Task ExportAsync();
32 | 
33 |         // Update the $select clause in the query to pick the fields requested
34 |         // Learn more at https://learn.microsoft.com/azure/search/search-query-odata-select
35 |         protected void AddSelect(SearchOptions options, params string[] requiredFields)
36 |         {
37 |             if (FieldsToInclude?.Any() ?? false)
38 |             {
39 |                 foreach (string field in FieldsToInclude)
40 |                 {
41 |                     options.Select.Add(field);
42 |                 }
43 |             }
44 |             else if (FieldsToExclude?.Any() ?? false)
45 |             {
46 |                 foreach (string field in Index.Fields.Select(field => field.Name))
47 |                 {
48 |                     if (!FieldsToExclude.Contains(field))
49 |                     {
50 |                         options.Select.Add(field);
51 |                     }
52 |                 }
53 |             }
54 | 
55 |             // If there are any required fields and we have specified either included or excluded fields,
56 |             // ensure the required fields are present
57 |             if (requiredFields.Length > 0 && options.Select.Any())
58 |             {
59 |                 foreach (string requiredField in requiredFields)
60 |                 {
61 |                     if (!options.Select.Contains(requiredField))
62 |                     {
63 |                         options.Select.Add(requiredField);
64 |                     }
65 |                 }
66 |             }
67 |         }
68 |     }
69 | }
70 | 


--------------------------------------------------------------------------------
/export-data/export-data/FilePartitionWriter.cs:
--------------------------------------------------------------------------------
 1 | ﻿using Azure;
 2 | using Azure.Search.Documents;
 3 | using Azure.Search.Documents.Models;
 4 | using System;
 5 | using System.Collections.Generic;
 6 | using System.CommandLine;
 7 | using System.Linq;
 8 | using System.Text;
 9 | using System.Text.Json;
10 | using System.Threading.Tasks;
11 | 
12 | namespace export_data
13 | {
14 |     public class FilePartitionWriter : IPartitionWriter
15 |     {
16 |         private readonly string _directory;
17 |         private readonly string _indexName;
18 | 
19 |         public FilePartitionWriter(string directory, string indexName)
20 |         {
21 |             _directory = directory;
22 |             _indexName = indexName;
23 |         }
24 | 
25 |         public async Task WritePartitionAsync(int partitionId, SearchResults<SearchDocument> searchResults, CancellationToken cancellationToken, int? pageSizeHint = null)
26 |         {
27 |             if (!Directory.Exists(_directory))
28 |             {
29 |                 Directory.CreateDirectory(_directory);
30 |             }
31 | 
32 |             string exportPath = Path.Combine(_directory, $"{_indexName}-{partitionId}-documents.json");
33 |             using FileStream exportOutput = File.Open(exportPath, FileMode.OpenOrCreate, FileAccess.Write, FileShare.Read);
34 | 
35 |             await foreach (Page<SearchResult<SearchDocument>> resultPage in searchResults.GetResultsAsync().AsPages(pageSizeHint: pageSizeHint))
36 |             {
37 |                 foreach (SearchResult<SearchDocument> searchResult in resultPage.Values)
38 |                 {
39 |                     JsonSerializer.Serialize(exportOutput, searchResult.Document);
40 |                     exportOutput.WriteByte((byte)'\n');
41 |                 }
42 |             }
43 |         }
44 |     }
45 | }
46 | 


--------------------------------------------------------------------------------
/export-data/export-data/IPartitionWriter.cs:
--------------------------------------------------------------------------------
 1 | ﻿using Azure.Search.Documents.Models;
 2 | using System;
 3 | using System.Collections.Generic;
 4 | using System.Linq;
 5 | using System.Text;
 6 | using System.Threading.Tasks;
 7 | 
 8 | namespace export_data
 9 | {
10 |     public interface IPartitionWriter
11 |     {
12 |         public Task WritePartitionAsync(int partitionId, SearchResults<SearchDocument> searchResults, CancellationToken cancellationToken, int? pageSizeHint = null);
13 |     }
14 | }
15 | 


--------------------------------------------------------------------------------
/export-data/export-data/Partition.cs:
--------------------------------------------------------------------------------
 1 | ﻿namespace export_data
 2 | {
 3 |     /// <summary>
 4 |     /// Represents a sub-partition of a search index
 5 |     /// </summary>
 6 |     public record Partition : IComparable<Partition>
 7 |     {
 8 |         // Highest value included in this partition
 9 |         public object UpperBound { get; init; }
10 | 
11 |         // Lowest value, might be included in this partition
12 |         public object LowerBound { get; init; }
13 | 
14 |         // Approximate document count included in this partition
15 |         public long DocumentCount { get; init; }
16 | 
17 |         // Filter query string used to retrieve this partition
18 |         // To learn more, please visit https://learn.microsoft.com/azure/search/search-filters
19 |         public string Filter { get; init; }
20 | 
21 |         public int CompareTo(Partition other)
22 |         {
23 |             if (LowerBound is DateTimeOffset lowerBoundDate &&
24 |                 other.LowerBound is DateTimeOffset otherLowerBoundDate)
25 |             {
26 |                 return lowerBoundDate.CompareTo(otherLowerBoundDate);
27 |             }
28 | 
29 |             throw new InvalidOperationException($"Unexpected lower bound type {LowerBound.GetType()}, other lower bound type {other.LowerBound.GetType()}");
30 |         }
31 | 
32 |         public Partition Merge(Partition other, string field, object lowestBound)
33 |         {
34 |             if (UpperBound != other.LowerBound)
35 |             {
36 |                 throw new InvalidOperationException("Cannot merge partitions where upper bound does not match next lower bound");
37 |             }
38 | 
39 |             return new Partition
40 |             {
41 |                 LowerBound = LowerBound,
42 |                 UpperBound = other.UpperBound,
43 |                 DocumentCount = DocumentCount + other.DocumentCount,
44 |                 Filter = Bound.GenerateBoundFilter(field, lowestBound, partitionLowerBound: LowerBound, partitionUpperBound: other.LowerBound)
45 |             };
46 |         }
47 |     }
48 | }
49 | 


--------------------------------------------------------------------------------
/export-data/export-data/PartitionExporter.cs:
--------------------------------------------------------------------------------
  1 | ﻿using Azure.Search.Documents;
  2 | using Azure.Search.Documents.Indexes.Models;
  3 | using Azure.Search.Documents.Models;
  4 | using System.Collections.Concurrent;
  5 | 
  6 | namespace export_data
  7 | {
  8 |     /// <summary>
  9 |     /// Export documents partitioned by a sortable and filterable field in the index
 10 |     /// </summary>
 11 |     public class PartitionExporter : Exporter
 12 |     {
 13 |         private readonly PartitionFile _partitionFile;
 14 |         private readonly SearchClient _searchClient;
 15 |         private readonly IPartitionWriter _partitionWriter;
 16 |         private readonly int _concurrentPartitions;
 17 |         private readonly int _pageSize;
 18 |         private readonly int[] _partitionIdsToInclude;
 19 |         private readonly ISet<int> _partitionIdsToExclude;
 20 | 
 21 |         public PartitionExporter(PartitionFile partitionFile, IPartitionWriter partitionWriter, SearchClient searchClient, SearchIndex index, int concurrentPartitions, int pageSize, int[] partitionIdsToInclude = null, ISet<int> partitionIdsToExclude = null, string[] fieldsToInclude = null, ISet<string> fieldsToExclude = null) : base(index, fieldsToInclude, fieldsToExclude)
 22 |         {
 23 |             _partitionFile = partitionFile;
 24 |             _partitionWriter = partitionWriter;
 25 |             _searchClient = searchClient;
 26 |             _concurrentPartitions = concurrentPartitions;
 27 |             _pageSize = pageSize;
 28 |             _partitionIdsToInclude = partitionIdsToInclude;
 29 |             _partitionIdsToExclude = partitionIdsToExclude;
 30 |         }
 31 | 
 32 |         public override async Task ExportAsync()
 33 |         {
 34 |             var cancellationTokenSource = new CancellationTokenSource();
 35 |             var partitions = new ConcurrentQueue<PartitionToExport>();
 36 |             if (_partitionIdsToInclude != null && _partitionIdsToInclude.Length > 0)
 37 |             {
 38 |                 foreach (int id in _partitionIdsToInclude)
 39 |                 {
 40 |                     partitions.Enqueue(new PartitionToExport { Id = id, Partition = _partitionFile.Partitions[id] });
 41 |                 }
 42 |             }
 43 |             else
 44 |             {
 45 |                 for (int id = 0; id < _partitionFile.Partitions.Count; id++)
 46 |                 {
 47 |                     if (_partitionIdsToExclude == null ||
 48 |                         (_partitionIdsToExclude != null && !_partitionIdsToExclude.Contains(id)))
 49 |                     {
 50 |                         partitions.Enqueue(new PartitionToExport { Id = id, Partition = _partitionFile.Partitions[id] });
 51 |                     }
 52 |                 }
 53 |             }
 54 | 
 55 |             var exporters = new Task[_concurrentPartitions];
 56 |             for (int i = 0; i < exporters.Length; i++)
 57 |             {
 58 |                 exporters[i] = Task.Run(async () =>
 59 |                 {
 60 |                     while (!cancellationTokenSource.IsCancellationRequested &&
 61 |                             partitions.TryDequeue(out PartitionToExport nextPartition))
 62 |                     {
 63 |                         Console.WriteLine($"Starting partition {nextPartition.Id}");
 64 |                         try
 65 |                         {
 66 |                             await ExportPartitionAsync(nextPartition.Id, nextPartition.Partition, cancellationTokenSource.Token);
 67 |                             Console.WriteLine($"Ended partition {nextPartition.Id}");
 68 |                         }
 69 |                         catch (Exception e)
 70 |                         {
 71 |                             Console.Error.Write(e.ToString());
 72 |                             cancellationTokenSource.Cancel();
 73 |                         }
 74 |                     }
 75 |                 });
 76 |             }
 77 | 
 78 |             await Task.WhenAll(exporters);
 79 |         }
 80 | 
 81 |         private async Task ExportPartitionAsync(int partitionId, Partition partition, CancellationToken cancellationToken)
 82 |         {
 83 |             // Partitions being exported should have already been sub-partitioned into sizes less than 100k
 84 |             // This check exists because DocumentCount on a partition can theoretically be larger than the int max size
 85 |             int searchMaxSize = partition.DocumentCount > int.MaxValue ? int.MaxValue : (int)partition.DocumentCount;
 86 |             var options = new SearchOptions
 87 |             {
 88 |                 Filter = partition.Filter,
 89 |                 Size = searchMaxSize,
 90 |                 Skip = 0
 91 |             };
 92 |             AddSelect(options);
 93 |             options.OrderBy.Add($"{_partitionFile.FieldName} asc");
 94 |             SearchResults<SearchDocument> searchResults = await _searchClient.SearchAsync<SearchDocument>(searchText: string.Empty, options: options, cancellationToken: cancellationToken);
 95 | 
 96 |             await _partitionWriter.WritePartitionAsync(partitionId, searchResults, cancellationToken);
 97 |         }
 98 | 
 99 |         private record PartitionToExport
100 |         {
101 |             public int Id { get; init; }
102 | 
103 |             public Partition Partition { get; init; }
104 |         }
105 |     }
106 | }
107 | 


--------------------------------------------------------------------------------
/export-data/export-data/PartitionFile.cs:
--------------------------------------------------------------------------------
 1 | ﻿using System.Text.Json;
 2 | 
 3 | namespace export_data
 4 | {
 5 |     /// <summary>
 6 |     /// Record of all partitions to be exported from a search index
 7 |     /// </summary>
 8 |     public record PartitionFile
 9 |     {
10 |         // Endpoint used to connect to the search service
11 |         public string Endpoint { get; init; }
12 | 
13 |         // Name of the search index to export
14 |         public string IndexName { get; init; }
15 | 
16 |         // Name of the sortable and filterable field to use to partition the index documents
17 |         public string FieldName { get; init; }
18 | 
19 |         // Sum of all the partition approximate document counts
20 |         public long TotalDocumentCount { get; init; }
21 | 
22 |         // List of all partitions. Sorted by lower bound.
23 |         public List<Partition> Partitions { get; init; }
24 | 
25 |         public void SerializeToFile(string path)
26 |         {
27 |             File.WriteAllText(path, JsonSerializer.Serialize(this, options: Util.SerializerOptions));
28 |         }
29 |     }
30 | }
31 | 


--------------------------------------------------------------------------------
/export-data/export-data/PartitionGenerator.cs:
--------------------------------------------------------------------------------
  1 | ﻿using Azure.Search.Documents;
  2 | using Azure.Search.Documents.Indexes.Models;
  3 | using Azure.Search.Documents.Models;
  4 | 
  5 | namespace export_data
  6 | {
  7 |     /// <summary>
  8 |     /// Splits up a search index into smaller partitions
  9 |     /// </summary>
 10 |     /// <remarks>
 11 |     /// Requires a sortable and filterable field. Max partition size is 100,000, to learn more please visit
 12 |     /// https://learn.microsoft.com/azure/search/search-pagination-page-layout#paging-results
 13 |     /// </remarks>
 14 |     public class PartitionGenerator
 15 |     {
 16 |         // Max page size is 100,000
 17 |         private const long MaximumDocumentCount = 100000;
 18 |         private readonly long _partitionMaximumDocumentCount;
 19 |         // Search client for paging through results
 20 |         private readonly SearchClient _searchClient;
 21 |         // Sortable filterable field to partition documents
 22 |         private readonly SearchField _field;
 23 |         // Lowest value for the field. Documents with a field value less than this will not be partitioned
 24 |         private readonly object _lowerBound;
 25 |         // Highest value for the field. Documents with a field value greater than this will not be partitioned
 26 |         private readonly object _upperBound;
 27 | 
 28 |         public PartitionGenerator(SearchClient searchClient, SearchField field, object lowerBound, object upperBound, long partitionMaximumDocumentCount = -1)
 29 |         {
 30 |             _searchClient = searchClient;
 31 |             _field = field;
 32 |             _lowerBound = lowerBound;
 33 |             _upperBound = upperBound;
 34 |             _partitionMaximumDocumentCount = partitionMaximumDocumentCount > 0 ? partitionMaximumDocumentCount : MaximumDocumentCount;
 35 |         }
 36 | 
 37 |         public async Task<List<Partition>> GeneratePartitions()
 38 |         {
 39 |             var partitions = new List<Partition>();
 40 |             var dataToPartition = new Stack<Partition>();
 41 |             dataToPartition.Push(await GeneratePartition(_lowerBound, _upperBound));
 42 | 
 43 |             // Keep splitting the initial partition in half until all partitions are <= 100,000 documents
 44 |             while (dataToPartition.TryPop(out Partition nextPartition))
 45 |             {
 46 |                 if (nextPartition.DocumentCount <= _partitionMaximumDocumentCount)
 47 |                 {
 48 |                     partitions.Add(nextPartition);
 49 |                     continue;
 50 |                 }
 51 | 
 52 |                 object midpoint = GetMidpoint(nextPartition.LowerBound, nextPartition.UpperBound);
 53 |                 dataToPartition.Push(await GeneratePartition(nextPartition.LowerBound, midpoint));
 54 |                 dataToPartition.Push(await GeneratePartition(midpoint, nextPartition.UpperBound));
 55 |             }
 56 | 
 57 |             // Then merge all the partitions back together to create larger ones
 58 |             return MergePartitions(partitions);
 59 |         }
 60 | 
 61 |         // Merges smaller partitions into the largest ones possible
 62 |         private List<Partition> MergePartitions(List<Partition> partitions)
 63 |         {
 64 |             partitions.Sort();
 65 |             IEnumerator<Partition> partitionEnumerator = partitions.GetEnumerator();
 66 |             if (!partitionEnumerator.MoveNext())
 67 |             {
 68 |                 return partitions;
 69 |             }
 70 | 
 71 |             var mergedPartitions = new List<Partition>();
 72 |             Partition nextPartition = partitionEnumerator.Current;
 73 |             while (partitionEnumerator.MoveNext())
 74 |             {
 75 |                 Partition mergedPartition = nextPartition.Merge(partitionEnumerator.Current, _field.Name, _lowerBound);
 76 |                 if (mergedPartition.DocumentCount > _partitionMaximumDocumentCount)
 77 |                 {
 78 |                     mergedPartitions.Add(nextPartition);
 79 |                     nextPartition = partitionEnumerator.Current;
 80 |                 }
 81 |                 else
 82 |                 {
 83 |                     nextPartition = mergedPartition;
 84 |                 }
 85 |             }
 86 |             mergedPartitions.Add(nextPartition);
 87 |             return mergedPartitions;
 88 |         }
 89 | 
 90 |         // Execute a filtered search against the index to generate a candidate partition
 91 |         private async Task<Partition> GeneratePartition(object partitionLowerBound, object partitionUpperBound)
 92 |         {
 93 |             SearchOptions options = CreatePartitionSearchOptions(partitionLowerBound, partitionUpperBound);
 94 |             SearchResults<SearchDocument> partitionResults = await _searchClient.SearchAsync<SearchDocument>(searchText: string.Empty, options: options);
 95 |             if (!partitionResults.TotalCount.HasValue)
 96 |             {
 97 |                 throw new InvalidOperationException("Expected results to have total count");
 98 |             }
 99 | 
100 |             return new Partition
101 |             {
102 |                 LowerBound = partitionLowerBound,
103 |                 UpperBound = partitionUpperBound,
104 |                 DocumentCount = partitionResults.TotalCount.Value,
105 |                 Filter = options.Filter
106 |             };
107 |         }
108 | 
109 |         private SearchOptions CreatePartitionSearchOptions(object partitionLowerBound, object partitionUpperBound) =>
110 |             new()
111 |             {
112 |                 IncludeTotalCount = true,
113 |                 Size = 1,
114 |                 Filter = Bound.GenerateBoundFilter(_field.Name, _lowerBound, partitionLowerBound, partitionUpperBound)
115 |             };
116 | 
117 |         // Get a value for the sortable and filterable field between the lower and upper bound.
118 |         public static object GetMidpoint(object lowerBound, object upperBound)
119 |         {
120 |             if (lowerBound is DateTimeOffset lowerBoundDate && upperBound is DateTimeOffset upperBoundDate)
121 |             {
122 |                 return lowerBoundDate + ((upperBoundDate - lowerBoundDate) / 2);
123 |             }
124 | 
125 |             throw new InvalidOperationException($"Unknown lower bound type {lowerBound.GetType()}, upper bound type {upperBound.GetType()}");
126 |         }
127 |     }
128 | }
129 | 


--------------------------------------------------------------------------------
/export-data/export-data/Program.cs:
--------------------------------------------------------------------------------
  1 | ﻿using System.CommandLine;
  2 | using System.Text.Json;
  3 | using Azure;
  4 | using Azure.Identity;
  5 | using Azure.Search.Documents;
  6 | using Azure.Search.Documents.Indexes;
  7 | using Azure.Search.Documents.Indexes.Models;
  8 | 
  9 | namespace export_data
 10 | {
 11 |     public static class Program
 12 |     {
 13 |         private const int MaximumPartitionSize = 100000;
 14 | 
 15 |         // Supported field types for the partition bounds
 16 |         // To learn more about field types, please visit https://learn.microsoft.com/rest/api/searchservice/supported-data-types
 17 |         private static readonly SearchFieldDataType[] SupportedFieldTypes = new[]
 18 |         {
 19 |             SearchFieldDataType.DateTimeOffset
 20 |         };
 21 | 
 22 |         public static async Task Main(string[] args)
 23 |         {
 24 |             // Setup command line arguments
 25 |             var endpointOption = new Option<string>(
 26 |                 name: "--endpoint",
 27 |                 description: "Endpoint of the search service to export data from. Example: https://example.search.windows.net")
 28 |             {
 29 |                 IsRequired = true,
 30 |             };
 31 |             var adminKeyOption = new Option<string>(
 32 |                 name: "--admin-key",
 33 |                 description: "Admin key to the search service to export data from. If not specified - uses your Entra identity")
 34 |             {
 35 |                 IsRequired = false
 36 |             };
 37 |             var indexOption = new Option<string>(
 38 |                 name: "--index-name",
 39 |                 description: "Name of the index to export data from")
 40 |             {
 41 |                 IsRequired = true
 42 |             };
 43 |             var fieldOption = new Option<string>(
 44 |                 name: "--field-name",
 45 |                 description: "Name of field used to partition the index data. This field must be filterable and sortable.")
 46 |             {
 47 |                 IsRequired = true
 48 |             };
 49 |             var upperBoundOption = new Option<string>(
 50 |                 name: "--upper-bound",
 51 |                 description: "Largest value to use to partition the index data. Defaults to the largest value in the index.",
 52 |                 getDefaultValue: () => null);
 53 |             var lowerBoundOption = new Option<string>(
 54 |                 name: "--lower-bound",
 55 |                 description: "Smallest value to use to partition the index data. Defaults to the smallest value in the index.",
 56 |                 getDefaultValue: () => null);
 57 |             var partitionFileOption = new Option<string>(
 58 |                 name: "--partition-path",
 59 |                 description: "Path of the file with JSON description of partitions. Should end in .json. Default is <index name>-partitions.json",
 60 |                 getDefaultValue: () => null);
 61 |             var partitionFileRequiredOption = new Option<string>(
 62 |                 name: "--partition-path",
 63 |                 description: "Path of the file with JSON description of partitions. Should end in .json.")
 64 |             {
 65 |                 IsRequired = true
 66 |             };
 67 |             var exportDirectoryOption = new Option<string>(
 68 |                 name: "--export-path",
 69 |                 description: "Directory to write JSON Lines partition files to. Every line in the partition file contains a JSON object with the contents of the Search document. Format of file names is <index name>-<partition id>-documents.json",
 70 |                 getDefaultValue: () => ".");
 71 |             var concurrentPartitionsOption = new Option<int>(
 72 |                 name: "--concurrent-partitions",
 73 |                 description: "Number of partitions to concurrently export. Default is 2",
 74 |                 getDefaultValue: () => 2);
 75 |             var pageSizeOption = new Option<int>(
 76 |                 name: "--page-size",
 77 |                 description: "Page size to use when running export queries. Default is 1000",
 78 |                 getDefaultValue: () => 1000);
 79 |             var partitionSizeOption = new Option<int>(
 80 |                 name: "--partition-size",
 81 |                 description: "Maximum size of a partition. Defaults to 100,000. Cannot exceed 100,000",
 82 |                 getDefaultValue: () => MaximumPartitionSize);
 83 |             var includePartitionsOption = new Option<int[]>(
 84 |                 name: "--include-partition",
 85 |                 description: "List of partitions by index to include in the export. Example: --include-partition 0 --include-partition 1 only runs the export on first 2 partitions",
 86 |                 getDefaultValue: () => null);
 87 |             var excludePartitionsOption = new Option<int[]>(
 88 |                 name: "--exclude-partition",
 89 |                 description: "List of partitions by index to exclude from the export. Example: --exclude-partition 0 --exclude-partition 1 runs the export on every partition except the first 2",
 90 |                 getDefaultValue: () => null);
 91 |             var exportFieldOption = new Option<string>(
 92 |                 name: "--export-field-name",
 93 |                 description: "Name of the Edm.Boolean field the continuous export process will update to track which documents have been exported. Default is 'exported'",
 94 |                 getDefaultValue: () => "exported");
 95 |             var exportFileOption = new Option<string>(
 96 |                 name: "--export-path",
 97 |                 description: "Path to write JSON Lines file to. Every line in the file contains a JSON object with the contents of the Search document. Format of file is <index name>-documents.json",
 98 |                 getDefaultValue: () => null);
 99 |             var includeFieldsOption = new Option<string[]>(
100 |                 name: "--include-field",
101 |                 description: "List of fields to include in the export. Example: --include-field field1 --include-field field2.",
102 |                 getDefaultValue: () => null);
103 |             var excludeFieldsOption = new Option<string[]>(
104 |                 name: "--exclude-field",
105 |                 description: "List of fields to exclude in the export. Example: --exclude-field field1 --exclude-field field2.",
106 |                 getDefaultValue: () => null);
107 | 
108 |             var boundsCommand = new Command("get-bounds", "Find and display the largest and lowest value for the specified field. Used to determine how to partition index data for export")
109 |             {
110 |                 endpointOption,
111 |                 adminKeyOption,
112 |                 indexOption,
113 |                 fieldOption
114 |             };
115 |             boundsCommand.SetHandler(async (string endpoint, string adminKey, string indexName, string fieldName) =>
116 |             {
117 |                 (SearchField field, SearchClient searchClient) = await InitializeFieldAndSearchClientAsync(endpoint, adminKey, indexName, fieldName);
118 | 
119 |                 object lowerBound = await Bound.FindLowerBoundAsync(field, searchClient);
120 |                 Console.WriteLine($"Lower Bound {Bound.SerializeBound(lowerBound)}");
121 | 
122 |                 object upperBound = await Bound.FindUpperBoundAsync(field, searchClient);
123 |                 Console.WriteLine($"Upper Bound {Bound.SerializeBound(upperBound)}");
124 |             }, endpointOption, adminKeyOption, indexOption, fieldOption);
125 | 
126 |             var partitionCommand = new Command("partition-index", "Partitions the data in the index between the upper and lower bound values into partitions with at most 100,000 documents.")
127 |             {
128 |                 endpointOption,
129 |                 adminKeyOption,
130 |                 indexOption,
131 |                 fieldOption,
132 |                 lowerBoundOption,
133 |                 upperBoundOption,
134 |                 partitionSizeOption,
135 |                 partitionFileOption
136 |             };
137 |             partitionCommand.SetHandler(async (string endpoint, string adminKey, string indexName, string fieldName, string inputLowerBound, string inputUpperBound, int partitionSize, string partitionFilePath) =>
138 |             {
139 |                 if (partitionSize < 0 || partitionSize > MaximumPartitionSize)
140 |                 {
141 |                     throw new ArgumentException($"Partition size {partitionSize} must be between 0 and {MaximumPartitionSize}");
142 |                 }
143 | 
144 |                 if (string.IsNullOrEmpty(partitionFilePath))
145 |                 {
146 |                     partitionFilePath = $"{indexName}-partitions.json";
147 |                 }
148 | 
149 |                 (SearchField field, SearchClient searchClient) = await InitializeFieldAndSearchClientAsync(endpoint, adminKey, indexName, fieldName);
150 |                 object lowerBound;
151 |                 if (string.IsNullOrEmpty(inputLowerBound))
152 |                 {
153 |                     lowerBound = await Bound.FindLowerBoundAsync(field, searchClient);
154 |                 }
155 |                 else
156 |                 {
157 |                     lowerBound = Bound.DeserializeBound(field.Type, inputLowerBound);
158 |                 }
159 | 
160 |                 object upperBound;
161 |                 if (string.IsNullOrEmpty(inputUpperBound))
162 |                 {
163 |                     upperBound = await Bound.FindUpperBoundAsync(field, searchClient);
164 |                 }
165 |                 else
166 |                 {
167 |                     upperBound = Bound.DeserializeBound(field.Type, inputUpperBound);
168 |                 }
169 | 
170 | 
171 |                 List<Partition> partitions = await new PartitionGenerator(searchClient, field, lowerBound, upperBound, partitionSize).GeneratePartitions();
172 |                 var output = new PartitionFile
173 |                 {
174 |                     Endpoint = endpoint,
175 |                     IndexName = indexName,
176 |                     FieldName = fieldName,
177 |                     TotalDocumentCount = partitions.Sum(partition => partition.DocumentCount),
178 |                     Partitions = partitions
179 |                 };
180 |                 output.SerializeToFile(partitionFilePath);
181 |                 Console.WriteLine($"Wrote partitions to {partitionFilePath}");
182 |             }, endpointOption, adminKeyOption, indexOption, fieldOption, lowerBoundOption, upperBoundOption, partitionSizeOption, partitionFileOption);
183 | 
184 |             var exportPartitionsCommand = new Command(name: "export-partitions", description: "Exports data from a search index using a pre-generated partition file from partition-index")
185 |             {
186 |                 partitionFileRequiredOption,
187 |                 adminKeyOption,
188 |                 exportDirectoryOption,
189 |                 concurrentPartitionsOption,
190 |                 pageSizeOption,
191 |                 includePartitionsOption,
192 |                 excludePartitionsOption,
193 |                 includeFieldsOption,
194 |                 excludeFieldsOption
195 |             };
196 |             exportPartitionsCommand.SetHandler(async (invocationContext) =>
197 |             {
198 |                 string partitionFilePath = invocationContext.ParseResult.GetValueForOption<string>(partitionFileRequiredOption);
199 |                 string adminKey = invocationContext.ParseResult.GetValueForOption<string>(adminKeyOption);
200 |                 string exportDirectory = invocationContext.ParseResult.GetValueForOption<string>(exportDirectoryOption);
201 |                 int concurrentPartitions = invocationContext.ParseResult.GetValueForOption<int>(concurrentPartitionsOption);
202 |                 int pageSize = invocationContext.ParseResult.GetValueForOption<int>(pageSizeOption);
203 |                 int[] partitionsToInclude = invocationContext.ParseResult.GetValueForOption<int[]>(includePartitionsOption);
204 |                 int[] partitionsToExclude = invocationContext.ParseResult.GetValueForOption<int[]>(excludePartitionsOption);
205 |                 string[] fieldsToInclude = invocationContext.ParseResult.GetValueForOption<string[]>(includeFieldsOption);
206 |                 string[] fieldsToExclude = invocationContext.ParseResult.GetValueForOption<string[]>(excludeFieldsOption);
207 | 
208 |                 if (partitionsToExclude.Any() && partitionsToInclude.Any())
209 |                 {
210 |                     throw new ArgumentException("Only pass either --include-partition or --exclude-partition, not both");
211 |                 }
212 | 
213 |                 using FileStream input = File.OpenRead(partitionFilePath);
214 |                 var partitionFile = JsonSerializer.Deserialize<PartitionFile>(input, options: Util.SerializerOptions);
215 |                 SearchClient searchClient = InitializeSearchClient(partitionFile.Endpoint, adminKey, partitionFile.IndexName);
216 |                 SearchIndexClient searchIndexClient = InitializeSearchIndexClient(partitionFile.Endpoint, adminKey);
217 |                 SearchIndex index = await searchIndexClient.GetIndexAsync(partitionFile.IndexName);
218 |                 var partitionWriter = new FilePartitionWriter(exportDirectory, index.Name);
219 |                 await new PartitionExporter(
220 |                     partitionFile,
221 |                     partitionWriter,
222 |                     searchClient,
223 |                     index,
224 |                     concurrentPartitions,
225 |                     pageSize,
226 |                     partitionsToInclude,
227 |                     partitionsToExclude.ToHashSet(),
228 |                     fieldsToInclude,
229 |                     fieldsToExclude.ToHashSet())
230 |                 .ExportAsync();
231 |             });
232 | 
233 |             var exportContinuousCommand = new Command(name: "export-continuous", description: "Exports data from a search service by adding a column to track which documents have been exported and continually updating it")
234 |             {
235 |                 endpointOption,
236 |                 adminKeyOption,
237 |                 indexOption,
238 |                 exportFieldOption,
239 |                 pageSizeOption,
240 |                 exportFileOption,
241 |                 includeFieldsOption,
242 |                 excludeFieldsOption
243 |             };
244 |             exportContinuousCommand.SetHandler(async (endpoint, adminKey, indexName, exportField, pageSize, exportFilePath, fieldsToInclude, fieldsToExclude) =>
245 |             {
246 |                 if (fieldsToInclude.Any() && fieldsToExclude.Any())
247 |                 {
248 |                     throw new ArgumentException("Only pass either --include-field or --exclude-field, not both");
249 |                 }
250 | 
251 |                 if (string.IsNullOrEmpty(exportFilePath))
252 |                 {
253 |                     exportFilePath = $"{indexName}-documents.json";
254 |                 }
255 | 
256 |                 SearchClient searchClient = InitializeSearchClient(endpoint, adminKey, indexName);
257 |                 SearchIndexClient searchIndexClient = InitializeSearchIndexClient(endpoint, adminKey);
258 |                 SearchIndex index = await searchIndexClient.GetIndexAsync(indexName);
259 | 
260 |                 await new ContinuousExporter(searchClient, index, searchIndexClient, exportField, pageSize, exportFilePath, fieldsToInclude, fieldsToExclude.ToHashSet()).ExportAsync();
261 |                 
262 |             }, endpointOption, adminKeyOption, indexOption, exportFieldOption, pageSizeOption, exportFileOption, includeFieldsOption, excludeFieldsOption);
263 | 
264 |             var rootCommand = new RootCommand(description: "Export data from a search index. Requires a filterable and sortable field.")
265 |             {
266 |                 boundsCommand,
267 |                 partitionCommand,
268 |                 exportPartitionsCommand,
269 |                 exportContinuousCommand
270 |             };
271 |             await rootCommand.InvokeAsync(args);
272 |         }
273 | 
274 |         public static async Task<(SearchField field, SearchClient searchClient)> InitializeFieldAndSearchClientAsync(string endpoint, string adminKey, string indexName, string fieldName)
275 |         {
276 |             var endpointUri = new Uri(endpoint);
277 |             SearchClient searchClient;
278 |             SearchIndexClient searchIndexClient;
279 |             if (!string.IsNullOrEmpty(adminKey))
280 |             {
281 |                 var credential = new AzureKeyCredential(adminKey);
282 |                 searchClient = new SearchClient(endpointUri, indexName, credential);
283 |                 searchIndexClient = new SearchIndexClient(endpointUri, credential);
284 |             }
285 |             else
286 |             {
287 |                 var credential = new DefaultAzureCredential();
288 |                 searchClient = new SearchClient(endpointUri, indexName, credential);
289 |                 searchIndexClient = new SearchIndexClient(endpointUri, credential);
290 |             }
291 |             SearchField field = await GetFieldAsync(searchIndexClient, indexName, fieldName);
292 |             return (field, searchClient);
293 |         }
294 | 
295 |         public static SearchClient InitializeSearchClient(string endpoint, string key, string indexName)
296 |         {
297 |             var endpointUri = new Uri(endpoint);
298 |             if (!string.IsNullOrEmpty(key))
299 |             {
300 |                 return new SearchClient(endpointUri, indexName, new AzureKeyCredential(key));
301 |             }
302 | 
303 |             return new SearchClient(endpointUri, indexName, new DefaultAzureCredential());
304 |         }
305 | 
306 |         public static SearchIndexClient InitializeSearchIndexClient(string endpoint, string key)
307 |         {
308 |             var endpointUri = new Uri(endpoint);
309 |             if (!string.IsNullOrEmpty(key))
310 |             {
311 |                 return new SearchIndexClient(endpointUri, new AzureKeyCredential(key));
312 |             }
313 | 
314 |             return new SearchIndexClient(endpointUri, new DefaultAzureCredential());
315 |         }
316 | 
317 |         // Fetch the index definition and validate that the field meets the sortable and filterable requirements
318 |         public static async Task<SearchField> GetFieldAsync(SearchIndexClient searchIndexClient, string indexName, string fieldName)
319 |         {
320 |             SearchIndex index = await searchIndexClient.GetIndexAsync(indexName);
321 |             SearchField field = index.Fields.FirstOrDefault(field => field.Name == fieldName);
322 | 
323 |             if (field == null)
324 |             {
325 |                 throw new ArgumentException($"Could not find {fieldName} in {indexName}", nameof(fieldName));
326 |             }
327 |             if (!(field.IsSortable ?? false) || !(field.IsFilterable ?? false))
328 |             {
329 |                 throw new ArgumentException($"{fieldName} must be sortable and filterable", nameof(fieldName));
330 |             }
331 |             if (!SupportedFieldTypes.Contains(field.Type))
332 |             {
333 |                 string supportedFieldTypesList = string.Join(", ", SupportedFieldTypes.Select(type => type.ToString()));
334 |                 throw new ArgumentException($"{fieldName} is of type {field.Type}, supported types {supportedFieldTypesList}", nameof(fieldName));
335 |             }
336 | 
337 |             return field;
338 |         }
339 |     }
340 | }


--------------------------------------------------------------------------------
/export-data/export-data/Util.cs:
--------------------------------------------------------------------------------
 1 | ﻿using System.Text.Encodings.Web;
 2 | using System.Text.Json;
 3 | 
 4 | namespace export_data
 5 | {
 6 |     public static class Util
 7 |     {
 8 |         public static readonly JsonSerializerOptions SerializerOptions = new JsonSerializerOptions
 9 |         {
10 |             PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
11 |             WriteIndented = true,
12 |             // Required to put non-ASCII characters in JSON files. To learn more, please visit https://learn.microsoft.com/dotnet/api/system.text.encodings.web.javascriptencoder.unsaferelaxedjsonescaping
13 |             Encoder = JavaScriptEncoder.UnsafeRelaxedJsonEscaping
14 |         };
15 |     }
16 | }
17 | 


--------------------------------------------------------------------------------
/export-data/export-data/export-data.csproj:
--------------------------------------------------------------------------------
 1 | ﻿<Project Sdk="Microsoft.NET.Sdk">
 2 | 
 3 |   <PropertyGroup>
 4 |     <OutputType>Exe</OutputType>
 5 |     <TargetFramework>net6.0</TargetFramework>
 6 |     <RootNamespace>export_data</RootNamespace>
 7 |     <ImplicitUsings>enable</ImplicitUsings>
 8 | 	<GenerateAssemblyInfo>false</GenerateAssemblyInfo>
 9 |   </PropertyGroup>
10 | 
11 |   <ItemGroup>
12 |     <PackageReference Include="Azure.Identity" Version="1.11.4" />
13 |     <PackageReference Include="Azure.Search.Documents" Version="11.5.1" />
14 |     <PackageReference Include="System.CommandLine" Version="2.0.0-beta4.22272.1" />
15 |   </ItemGroup>
16 | 
17 | </Project>
18 | 


--------------------------------------------------------------------------------
/export-data/tests/MockPartitionWriter.cs:
--------------------------------------------------------------------------------
 1 | ﻿using Azure;
 2 | using Azure.Search.Documents.Models;
 3 | using export_data;
 4 | using System;
 5 | using System.Collections.Concurrent;
 6 | using System.Collections.Generic;
 7 | using System.Linq;
 8 | using System.Text;
 9 | using System.Threading.Tasks;
10 | 
11 | namespace tests
12 | {
13 |     public class MockPartitionWriter : IPartitionWriter
14 |     {
15 |         private readonly string _key;
16 |         private readonly ConcurrentDictionary<int, Dictionary<string, SearchDocument>> _exportedPartitions = new();
17 | 
18 |         public MockPartitionWriter(string key)
19 |         {
20 |             _key = key;
21 |         }
22 | 
23 |         public async Task WritePartitionAsync(int partitionId, SearchResults<SearchDocument> searchResults, CancellationToken cancellationToken, int? pageSizeHint = null)
24 |         {
25 |             var partition = new Dictionary<string, SearchDocument>();
26 |             await foreach (Page<SearchResult<SearchDocument>> resultPage in searchResults.GetResultsAsync().AsPages(pageSizeHint: pageSizeHint))
27 |             {
28 |                 foreach (SearchResult<SearchDocument> searchResult in resultPage.Values)
29 |                 {
30 |                     partition[searchResult.Document[_key].ToString()] = searchResult.Document;
31 |                 }
32 |             }
33 | 
34 |             _exportedPartitions[partitionId] = partition;
35 |         }
36 | 
37 |         public IReadOnlyDictionary<int, IReadOnlyDictionary<string, SearchDocument>> GetExportedPartitions()
38 |         {
39 |             var results = new Dictionary<int, IReadOnlyDictionary<string, SearchDocument>>();
40 |             foreach (KeyValuePair<int, Dictionary<string, SearchDocument>> partition in _exportedPartitions)
41 |             {
42 |                 results[partition.Key] = partition.Value;
43 |             }
44 |             return results;
45 |         }
46 |     }
47 | }
48 | 


--------------------------------------------------------------------------------
/export-data/tests/PartitionExporterTests.cs:
--------------------------------------------------------------------------------
  1 | using Azure.Identity;
  2 | using Azure.Search.Documents;
  3 | using Azure.Search.Documents.Indexes;
  4 | using Azure.Search.Documents.Indexes.Models;
  5 | using Azure.Search.Documents.Models;
  6 | using export_data;
  7 | using Microsoft.Extensions.Configuration;
  8 | 
  9 | namespace tests
 10 | {
 11 |     [TestClass]
 12 |     public class PartitionExporterTests
 13 |     {
 14 |         private const string TestIndexName = "partition-exporter-test";
 15 |         private const int TestDocumentCount = 1000;
 16 |         private const int TestPartitionSize = 125;
 17 |         private static SearchIndexClient SearchIndexClient { get; set; }
 18 |         private static SearchClient SearchClient { get; set; }
 19 |         private static SearchField BoundField { get; } = new SearchField(name: "timestamp", SearchFieldDataType.DateTimeOffset);
 20 |         private static string KeyField { get; } = "id";
 21 |         private static readonly DateTimeOffset startDate = new DateTimeOffset(2022, 1, 1, 0, 0, 0, TimeSpan.Zero);
 22 | 
 23 |         [ClassInitialize]
 24 |         public static void ClassInitialize(TestContext _)
 25 |         {
 26 |             IConfigurationRoot configuration = new ConfigurationBuilder()
 27 |                 .AddJsonFile("config.json")
 28 |                 .Build();
 29 |             var credential = new DefaultAzureCredential();
 30 |             SearchIndexClient = new SearchIndexClient(new Uri(configuration["searchEndpoint"]), credential);
 31 |             SearchIndexClient.CreateOrUpdateIndex(GetTestIndexDefinition());
 32 |             SearchClient = new SearchClient(SearchIndexClient.Endpoint, TestIndexName, credential);
 33 |             foreach (IEnumerable<SearchDocument> batch in SetupTestData())
 34 |             {
 35 |                 SearchClient.UploadDocuments(batch);
 36 |             }
 37 | 
 38 |             // Wait for updates to propogate
 39 |             Task.Delay(TimeSpan.FromSeconds(3)).Wait();
 40 |         }
 41 | 
 42 |         [ClassCleanup]
 43 |         public static void ClassCleanup()
 44 |         {
 45 |             SearchIndexClient.DeleteIndex(TestIndexName);
 46 |         }
 47 | 
 48 |         [TestMethod]
 49 |         public async Task TestPartitionExporter()
 50 |         {
 51 |             var lowerBound = await Bound.FindLowerBoundAsync(BoundField, SearchClient);
 52 |             var upperBound = await Bound.FindUpperBoundAsync(BoundField, SearchClient);
 53 | 
 54 |             List<Partition> partitions = await new PartitionGenerator(SearchClient, BoundField, lowerBound, upperBound, partitionMaximumDocumentCount: TestPartitionSize).GeneratePartitions();
 55 |             var partitionFile = new PartitionFile
 56 |             {
 57 |                 Endpoint = SearchIndexClient.Endpoint.AbsoluteUri,
 58 |                 IndexName = TestIndexName,
 59 |                 FieldName = BoundField.Name,
 60 |                 TotalDocumentCount = partitions.Sum(partition => partition.DocumentCount),
 61 |                 Partitions = partitions
 62 |             };
 63 | 
 64 |             var mockPartitionWriter = new MockPartitionWriter(KeyField);
 65 |             var partitionExporter = new PartitionExporter(
 66 |                 partitionFile,
 67 |                 mockPartitionWriter,
 68 |                 SearchClient,
 69 |                 GetTestIndexDefinition(),
 70 |                 concurrentPartitions: 2,
 71 |                 pageSize: 1000,
 72 |                 partitionIdsToInclude: Enumerable.Range(0, partitions.Count).ToArray(),
 73 |                 partitionIdsToExclude: null,
 74 |                 fieldsToInclude: null,
 75 |                 fieldsToExclude: null);
 76 | 
 77 |             await partitionExporter.ExportAsync();
 78 | 
 79 |             IReadOnlyDictionary<int, IReadOnlyDictionary<string, SearchDocument>> exportedPartitions = mockPartitionWriter.GetExportedPartitions();
 80 |             Assert.AreEqual(8, exportedPartitions.Count, $"Got {exportedPartitions.Count} partitions, expected 11");
 81 |             for (int i = 0; i < 8; i++)
 82 |             {
 83 |                 IReadOnlyDictionary<string, SearchDocument> partition = exportedPartitions[i];
 84 |                 Assert.AreEqual(125, partition.Count, $"Unexpected partition length {partition.Count} for partition {i}");
 85 | 
 86 |                 for (int j = 0; j < partition.Count; j++)
 87 |                 {
 88 |                     int expectedId = (i * 125) + j;
 89 |                     SearchDocument partitionedDocument = partition.GetValueOrDefault(expectedId.ToString());
 90 |                     Assert.IsNotNull(partitionedDocument, $"Missing document {expectedId} in partition {i}");
 91 |                     string actualTimestamp = partitionedDocument["timestamp"].ToString();
 92 |                     string expectedTimestamp = DateTimeOffsetForDocument(expectedId).ToString("yyyy-MM-ddTHH:mm:ssZ");
 93 |                     Assert.AreEqual(expectedTimestamp, actualTimestamp);
 94 |                 }
 95 |             }
 96 |         }
 97 | 
 98 |         private static SearchIndex GetTestIndexDefinition() =>
 99 |             new SearchIndex(TestIndexName)
100 |             {
101 |                 Fields =
102 |                 {
103 |                     new SearchField(name: "id", SearchFieldDataType.String) { IsKey = true },
104 |                     new SearchField(name: "timestamp", SearchFieldDataType.DateTimeOffset)
105 |                 }
106 |             };
107 | 
108 |         private static IEnumerable<IEnumerable<SearchDocument>> SetupTestData()
109 |         {
110 |             var batch = new List<SearchDocument>();
111 |             for (int i = 0; i < TestDocumentCount; i++)
112 |             {
113 |                 var timestamp = DateTimeOffsetForDocument(i);
114 |                 batch.Add(new SearchDocument(new Dictionary<string, object>
115 |                 {
116 |                     ["id"] = i.ToString(),
117 |                     ["timestamp"] = timestamp
118 |                 }));
119 |                 if (batch.Count >= 1000)
120 |                 {
121 |                     yield return batch;
122 |                     batch = new List<SearchDocument>();
123 |                 }
124 |             }
125 | 
126 |             if (batch.Count > 0)
127 |             {
128 |                 yield return batch;
129 |             }
130 |         }
131 | 
132 |         // Document's timestamp is the start time + its id so partitions are ordered by document id
133 |         private static DateTimeOffset DateTimeOffsetForDocument(int documentId) =>
134 |             startDate.AddDays(documentId);
135 |     }
136 | }


--------------------------------------------------------------------------------
/export-data/tests/Usings.cs:
--------------------------------------------------------------------------------
1 | global using Microsoft.VisualStudio.TestTools.UnitTesting;


--------------------------------------------------------------------------------
/export-data/tests/config.example.json:
--------------------------------------------------------------------------------
1 | {
2 |   "searchEndpoint":  "https://your-search-service.search.windows.net"
3 | }
4 | 


--------------------------------------------------------------------------------
/export-data/tests/tests.csproj:
--------------------------------------------------------------------------------
 1 | ﻿<Project Sdk="Microsoft.NET.Sdk">
 2 | 
 3 |   <PropertyGroup>
 4 |     <TargetFramework>net6.0</TargetFramework>
 5 |     <ImplicitUsings>enable</ImplicitUsings>
 6 |     <Nullable>disable</Nullable>
 7 | 
 8 |     <IsPackable>false</IsPackable>
 9 |   </PropertyGroup>
10 | 
11 |   <ItemGroup>
12 |     <PackageReference Include="Azure.Identity" Version="1.11.4" />
13 |     <PackageReference Include="Azure.Search.Documents" Version="11.5.1" />
14 |     <PackageReference Include="Microsoft.Extensions.Configuration.Json" Version="8.0.0" />
15 |     <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.1.0" />
16 |     <PackageReference Include="MSTest.TestAdapter" Version="2.2.8" />
17 |     <PackageReference Include="MSTest.TestFramework" Version="2.2.8" />
18 |     <PackageReference Include="coverlet.collector" Version="3.1.2" />
19 |   </ItemGroup>
20 | 
21 |   <ItemGroup>
22 |     <ProjectReference Include="..\export-data\export-data.csproj" />
23 |   </ItemGroup>
24 | 
25 |   <ItemGroup>
26 |     <None Update="config.json">
27 |       <CopyToOutputDirectory>Always</CopyToOutputDirectory>
28 |     </None>
29 |   </ItemGroup>
30 | 
31 | </Project>
32 | 


--------------------------------------------------------------------------------
/index-backup-restore/.gitignore:
--------------------------------------------------------------------------------
  1 | ## Ignore Visual Studio temporary files, build results, and
  2 | ## files generated by popular Visual Studio add-ons.
  3 | ##
  4 | ## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore
  5 | 
  6 | # User-specific files
  7 | *.rsuser
  8 | *.suo
  9 | *.user
 10 | *.userosscache
 11 | *.sln.docstates
 12 | 
 13 | # User-specific files (MonoDevelop/Xamarin Studio)
 14 | *.userprefs
 15 | 
 16 | # Mono auto generated files
 17 | mono_crash.*
 18 | 
 19 | # Build results
 20 | [Dd]ebug/
 21 | [Dd]ebugPublic/
 22 | [Rr]elease/
 23 | [Rr]eleases/
 24 | x64/
 25 | x86/
 26 | [Ww][Ii][Nn]32/
 27 | [Aa][Rr][Mm]/
 28 | [Aa][Rr][Mm]64/
 29 | bld/
 30 | [Bb]in/
 31 | [Oo]bj/
 32 | [Ll]og/
 33 | [Ll]ogs/
 34 | 
 35 | # Visual Studio 2015/2017 cache/options directory
 36 | .vs/
 37 | # Uncomment if you have tasks that create the project's static files in wwwroot
 38 | #wwwroot/
 39 | 
 40 | # Visual Studio 2017 auto generated files
 41 | Generated\ Files/
 42 | 
 43 | # MSTest test Results
 44 | [Tt]est[Rr]esult*/
 45 | [Bb]uild[Ll]og.*
 46 | 
 47 | # NUnit
 48 | *.VisualState.xml
 49 | TestResult.xml
 50 | nunit-*.xml
 51 | 
 52 | # Build Results of an ATL Project
 53 | [Dd]ebugPS/
 54 | [Rr]eleasePS/
 55 | dlldata.c
 56 | 
 57 | # Benchmark Results
 58 | BenchmarkDotNet.Artifacts/
 59 | 
 60 | # .NET Core
 61 | project.lock.json
 62 | project.fragment.lock.json
 63 | artifacts/
 64 | 
 65 | # ASP.NET Scaffolding
 66 | ScaffoldingReadMe.txt
 67 | 
 68 | # StyleCop
 69 | StyleCopReport.xml
 70 | 
 71 | # Files built by Visual Studio
 72 | *_i.c
 73 | *_p.c
 74 | *_h.h
 75 | *.ilk
 76 | *.meta
 77 | *.obj
 78 | *.iobj
 79 | *.pch
 80 | *.pdb
 81 | *.ipdb
 82 | *.pgc
 83 | *.pgd
 84 | *.rsp
 85 | *.sbr
 86 | *.tlb
 87 | *.tli
 88 | *.tlh
 89 | *.tmp
 90 | *.tmp_proj
 91 | *_wpftmp.csproj
 92 | *.log
 93 | *.tlog
 94 | *.vspscc
 95 | *.vssscc
 96 | .builds
 97 | *.pidb
 98 | *.svclog
 99 | *.scc
100 | 
101 | # Chutzpah Test files
102 | _Chutzpah*
103 | 
104 | # Visual C++ cache files
105 | ipch/
106 | *.aps
107 | *.ncb
108 | *.opendb
109 | *.opensdf
110 | *.sdf
111 | *.cachefile
112 | *.VC.db
113 | *.VC.VC.opendb
114 | 
115 | # Visual Studio profiler
116 | *.psess
117 | *.vsp
118 | *.vspx
119 | *.sap
120 | 
121 | # Visual Studio Trace Files
122 | *.e2e
123 | 
124 | # TFS 2012 Local Workspace
125 | $tf/
126 | 
127 | # Guidance Automation Toolkit
128 | *.gpState
129 | 
130 | # ReSharper is a .NET coding add-in
131 | _ReSharper*/
132 | *.[Rr]e[Ss]harper
133 | *.DotSettings.user
134 | 
135 | # TeamCity is a build add-in
136 | _TeamCity*
137 | 
138 | # DotCover is a Code Coverage Tool
139 | *.dotCover
140 | 
141 | # AxoCover is a Code Coverage Tool
142 | .axoCover/*
143 | !.axoCover/settings.json
144 | 
145 | # Coverlet is a free, cross platform Code Coverage Tool
146 | coverage*.json
147 | coverage*.xml
148 | coverage*.info
149 | 
150 | # Visual Studio code coverage results
151 | *.coverage
152 | *.coveragexml
153 | 
154 | # NCrunch
155 | _NCrunch_*
156 | .*crunch*.local.xml
157 | nCrunchTemp_*
158 | 
159 | # MightyMoose
160 | *.mm.*
161 | AutoTest.Net/
162 | 
163 | # Web workbench (sass)
164 | .sass-cache/
165 | 
166 | # Installshield output folder
167 | [Ee]xpress/
168 | 
169 | # DocProject is a documentation generator add-in
170 | DocProject/buildhelp/
171 | DocProject/Help/*.HxT
172 | DocProject/Help/*.HxC
173 | DocProject/Help/*.hhc
174 | DocProject/Help/*.hhk
175 | DocProject/Help/*.hhp
176 | DocProject/Help/Html2
177 | DocProject/Help/html
178 | 
179 | # Click-Once directory
180 | publish/
181 | 
182 | # Publish Web Output
183 | *.[Pp]ublish.xml
184 | *.azurePubxml
185 | # Note: Comment the next line if you want to checkin your web deploy settings,
186 | # but database connection strings (with potential passwords) will be unencrypted
187 | *.pubxml
188 | *.publishproj
189 | 
190 | # Microsoft Azure Web App publish settings. Comment the next line if you want to
191 | # checkin your Azure Web App publish settings, but sensitive information contained
192 | # in these scripts will be unencrypted
193 | PublishScripts/
194 | 
195 | # NuGet Packages
196 | *.nupkg
197 | # NuGet Symbol Packages
198 | *.snupkg
199 | # The packages folder can be ignored because of Package Restore
200 | **/[Pp]ackages/*
201 | # except build/, which is used as an MSBuild target.
202 | !**/[Pp]ackages/build/
203 | # Uncomment if necessary however generally it will be regenerated when needed
204 | #!**/[Pp]ackages/repositories.config
205 | # NuGet v3's project.json files produces more ignorable files
206 | *.nuget.props
207 | *.nuget.targets
208 | 
209 | # Microsoft Azure Build Output
210 | csx/
211 | *.build.csdef
212 | 
213 | # Microsoft Azure Emulator
214 | ecf/
215 | rcf/
216 | 
217 | # Windows Store app package directories and files
218 | AppPackages/
219 | BundleArtifacts/
220 | Package.StoreAssociation.xml
221 | _pkginfo.txt
222 | *.appx
223 | *.appxbundle
224 | *.appxupload
225 | 
226 | # Visual Studio cache files
227 | # files ending in .cache can be ignored
228 | *.[Cc]ache
229 | # but keep track of directories ending in .cache
230 | !?*.[Cc]ache/
231 | 
232 | # Others
233 | ClientBin/
234 | ~$*
235 | *~
236 | *.dbmdl
237 | *.dbproj.schemaview
238 | *.jfm
239 | *.pfx
240 | *.publishsettings
241 | orleans.codegen.cs
242 | 
243 | # Including strong name files can present a security risk
244 | # (https://github.com/github/gitignore/pull/2483#issue-259490424)
245 | #*.snk
246 | 
247 | # Since there are multiple workflows, uncomment next line to ignore bower_components
248 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
249 | #bower_components/
250 | 
251 | # RIA/Silverlight projects
252 | Generated_Code/
253 | 
254 | # Backup & report files from converting an old project file
255 | # to a newer Visual Studio version. Backup files are not needed,
256 | # because we have git ;-)
257 | _UpgradeReport_Files/
258 | Backup*/
259 | UpgradeLog*.XML
260 | UpgradeLog*.htm
261 | ServiceFabricBackup/
262 | *.rptproj.bak
263 | 
264 | # SQL Server files
265 | *.mdf
266 | *.ldf
267 | *.ndf
268 | 
269 | # Business Intelligence projects
270 | *.rdl.data
271 | *.bim.layout
272 | *.bim_*.settings
273 | *.rptproj.rsuser
274 | *- [Bb]ackup.rdl
275 | *- [Bb]ackup ([0-9]).rdl
276 | *- [Bb]ackup ([0-9][0-9]).rdl
277 | 
278 | # Microsoft Fakes
279 | FakesAssemblies/
280 | 
281 | # GhostDoc plugin setting file
282 | *.GhostDoc.xml
283 | 
284 | # Node.js Tools for Visual Studio
285 | .ntvs_analysis.dat
286 | node_modules/
287 | 
288 | # Visual Studio 6 build log
289 | *.plg
290 | 
291 | # Visual Studio 6 workspace options file
292 | *.opt
293 | 
294 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
295 | *.vbw
296 | 
297 | # Visual Studio 6 auto-generated project file (contains which files were open etc.)
298 | *.vbp
299 | 
300 | # Visual Studio 6 workspace and project file (working project files containing files to include in project)
301 | *.dsw
302 | *.dsp
303 | 
304 | # Visual Studio 6 technical files
305 | *.ncb
306 | *.aps
307 | 
308 | # Visual Studio LightSwitch build output
309 | **/*.HTMLClient/GeneratedArtifacts
310 | **/*.DesktopClient/GeneratedArtifacts
311 | **/*.DesktopClient/ModelManifest.xml
312 | **/*.Server/GeneratedArtifacts
313 | **/*.Server/ModelManifest.xml
314 | _Pvt_Extensions
315 | 
316 | # Paket dependency manager
317 | .paket/paket.exe
318 | paket-files/
319 | 
320 | # FAKE - F# Make
321 | .fake/
322 | 
323 | # CodeRush personal settings
324 | .cr/personal
325 | 
326 | # Python Tools for Visual Studio (PTVS)
327 | __pycache__/
328 | *.pyc
329 | 
330 | # Cake - Uncomment if you are using it
331 | # tools/**
332 | # !tools/packages.config
333 | 
334 | # Tabs Studio
335 | *.tss
336 | 
337 | # Telerik's JustMock configuration file
338 | *.jmconfig
339 | 
340 | # BizTalk build output
341 | *.btp.cs
342 | *.btm.cs
343 | *.odx.cs
344 | *.xsd.cs
345 | 
346 | # OpenCover UI analysis results
347 | OpenCover/
348 | 
349 | # Azure Stream Analytics local run output
350 | ASALocalRun/
351 | 
352 | # MSBuild Binary and Structured Log
353 | *.binlog
354 | 
355 | # NVidia Nsight GPU debugger configuration file
356 | *.nvuser
357 | 
358 | # MFractors (Xamarin productivity tool) working folder
359 | .mfractor/
360 | 
361 | # Local History for Visual Studio
362 | .localhistory/
363 | 
364 | # Visual Studio History (VSHistory) files
365 | .vshistory/
366 | 
367 | # BeatPulse healthcheck temp database
368 | healthchecksdb
369 | 
370 | # Backup folder for Package Reference Convert tool in Visual Studio 2017
371 | MigrationBackup/
372 | 
373 | # Ionide (cross platform F# VS Code tools) working folder
374 | .ionide/
375 | 
376 | # Fody - auto-generated XML schema
377 | FodyWeavers.xsd
378 | 
379 | # VS Code files for those working on multiple tools
380 | .vscode/*
381 | !.vscode/settings.json
382 | !.vscode/tasks.json
383 | !.vscode/launch.json
384 | !.vscode/extensions.json
385 | *.code-workspace
386 | 
387 | # Local History for Visual Studio Code
388 | .history/
389 | 
390 | # Windows Installer files from build outputs
391 | *.cab
392 | *.msi
393 | *.msix
394 | *.msm
395 | *.msp
396 | 
397 | # JetBrains Rider
398 | *.sln.iml


--------------------------------------------------------------------------------
/index-backup-restore/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | page_type: sample
 3 | languages:
 4 |   - csharp
 5 | name: Back up and restore an Azure AI Search index
 6 | description: "This application backs up a 'source' index schema and its documents to JSON files on your computer, and then uses those files to recreate a 'target' index copy in the 'target' search service that you specify. Depending on your needs, you can use all or part of this application to backup your index files and/or move an index from one search service to another."
 7 | products:
 8 |   - azure
 9 |   - azure-cognitive-search
10 | urlFragment: azure-search-backup-restore-index
11 | ---
12 | 
13 | # Back up and restore an Azure AI Search index
14 | 
15 | ![MIT license badge](https://img.shields.io/badge/license-MIT-green.svg)
16 | 
17 | **This unofficial code sample is offered \"as-is\" and might not work for all customers and scenarios. If you run into difficulties, you should manually recreate and reload your search index on a new search service.**
18 | 
19 | This application copies an index from one service to another, creating JSON files on your computer with the index schema and documents. This tool is useful if you've been using the Free pricing tier to develop your index and want to move to the Basic or higher tier for production use. It's also useful if you want to back up your index to your computer and restore the index at a later time.
20 | 
21 | > **Note**: Azure AI Search now supports [service upgrades](https://learn.microsoft.com/azure/search/search-how-to-upgrade) and [pricing tier changes](https://learn.microsoft.com/azure/search/search-capacity-planning#change-your-pricing-tier). If you're backing up and restoring your index for migration to a higher capacity service, you now have other options.
22 | 
23 | ## IMPORTANT - PLEASE READ
24 | 
25 | Search indexes are different from other datastores because they are constantly ranking and scoring results and data may shift. If you page through search results or even use continuation tokens as this tool does, it is possible to miss some data during data extraction.
26 | 
27 | As an example, assume that you are searching for documents and a document with ID 101 is part of page 5 of the search results. Then, as you are extracting data from page to page, and move from page 4 to page 5, it is possible that now ID 101 is actually part of page 4. This means that when you look at page 5, it is no longer there and you have missed that document. As a result, it is best if there are no changes being made to the search index when you use run this tool.
28 | 
29 | For this reason, this tool compares the number of index documents in the original index and the index copy. If the numbers don't match, the copy may be missing data. Although this safeguard does not provide a perfect solution, it does help you help prevent you from missing data.
30 | 
31 | **If your index has more than 100,000 documents**, this sample, as written, will not work. This is because the REST API $skip feature, that is used for paging, has a 100K document limit. However, you can work around this limitation by adding code to iterate over, and filter on, a facet with less that 100K documents per facet value.
32 | 
33 | ## Prerequisites
34 | 
35 | - [Visual Studio](https://visualstudio.microsoft.com/downloads/)
36 | - [Azure AI Search service](https://docs.microsoft.com/azure/search/search-create-service-portal)
37 | 
38 | ## Setup
39 | 
40 | 1. Clone or download this sample repository.
41 | 1. Extract contents if the download is a zip file. Make sure the files are read-write.
42 | 
43 | This sample is available in two versions:
44 | 
45 | 1. **v11** (recommended): uses the newer [Azure.Search.Documents](https://docs.microsoft.com/dotnet/api/overview/azure/search.documents-readme) client library. This is the library recommended for use on all new projects.
46 | 2. **v10**: uses the deprecated [Microsoft.Azure.Search](https://learn.microsoft.com/dotnet/api/microsoft.azure.search) client libraries.
47 | 
48 | ## Run the sample
49 | 
50 | > [!NOTE]
51 | > In this application the term "source" identifies the search service and index and that you are backing up. The term "target" identifies the search service and index that will contain the restored (copied) index.
52 | 
53 | 1. Open the AzureSearchBackupRestoreIndex.sln project in Visual Studio.
54 | 
55 | 1. By default, this application will copy the source index to the target search service using the target index name you provide.
56 |     - If you only want to back up the index and not restore it immediately, do this:
57 |         - Comment out the code in the **Main** method after the **BackupIndexAndDocuments** method call.
58 |         - Comment out the last two lines of the **ConfigurationSetup** method that set the _TargetSearchClient_ and _TargetIndexClient_.
59 |     - If you want to restore a index that you previously backed up, do this:
60 |         - Make sure that the the _BackupDirectory_ in the appsettings.json file is pointing to to the backup location.
61 |         - Comment out the **BackupIndexAndDocuments** method call and the the line that checks the _targetCount_ in the **Main** method.
62 |         - Comment out the lines in **ConfigurationSetup** method that set the _SourceSearchClient_ and _SourceIndexClient_.
63 | 
64 | 1. Open the appsettings.json and replace the placeholder strings with all applicable values:
65 | 
66 |     - The source search service name (SourceSearchServiceName) and key (SourceAdminKey) and the name of the index that you want to restore/copy.
67 |     - The target search service name (TargetSearchServiceName) and key (TargetAdminKey) and the name of the restored/copied index in the target service.
68 |     - The location on your computer where you want to store the backup index schema and documents (BackupDirectory). The location must be have non-admin write permission. Include escape characters in directory paths. Examples:
69 |       - Windows: `C:\\users\<your-account-name>\indexBackup\\` (Windows)
70 |       - MacOS: `/Users/<your-account-name>/indexBackup` (MacOS)
71 |       - Relative to `.csproj` file location: `index-backup`
72 | 
73 | 1. If necessary, update the _APIVersionString_ value in `AzureSearchHelper.cs`.
74 | 
75 | 1. Compile and Run the project.
76 | 
77 | ## Next steps
78 | 
79 | You can learn more about Azure AI Search on the [official documentation site](https://docs.microsoft.com/azure/search).
80 | 


--------------------------------------------------------------------------------
/index-backup-restore/v10/AzureSearchBackupRestoreIndex.sln:
--------------------------------------------------------------------------------
 1 | ﻿
 2 | Microsoft Visual Studio Solution File, Format Version 12.00
 3 | # Visual Studio Version 16
 4 | VisualStudioVersion = 16.0.28803.352
 5 | MinimumVisualStudioVersion = 10.0.40219.1
 6 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "AzureSearchBackupRestoreIndex", "AzureSearchBackupRestoreIndex\AzureSearchBackupRestoreIndex.csproj", "{3BA854A7-9DF4-4BE1-9749-E51A49AE2E16}"
 7 | EndProject
 8 | Global
 9 | 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
10 | 		Debug|Any CPU = Debug|Any CPU
11 | 		Release|Any CPU = Release|Any CPU
12 | 	EndGlobalSection
13 | 	GlobalSection(ProjectConfigurationPlatforms) = postSolution
14 | 		{3BA854A7-9DF4-4BE1-9749-E51A49AE2E16}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
15 | 		{3BA854A7-9DF4-4BE1-9749-E51A49AE2E16}.Debug|Any CPU.Build.0 = Debug|Any CPU
16 | 		{3BA854A7-9DF4-4BE1-9749-E51A49AE2E16}.Release|Any CPU.ActiveCfg = Release|Any CPU
17 | 		{3BA854A7-9DF4-4BE1-9749-E51A49AE2E16}.Release|Any CPU.Build.0 = Release|Any CPU
18 | 	EndGlobalSection
19 | 	GlobalSection(SolutionProperties) = preSolution
20 | 		HideSolutionNode = FALSE
21 | 	EndGlobalSection
22 | 	GlobalSection(ExtensibilityGlobals) = postSolution
23 | 		SolutionGuid = {CB63787D-86CA-48E4-849F-371290479660}
24 | 	EndGlobalSection
25 | EndGlobal
26 | 


--------------------------------------------------------------------------------
/index-backup-restore/v10/AzureSearchBackupRestoreIndex/AzureSearchBackupRestoreIndex.csproj:
--------------------------------------------------------------------------------
 1 | <Project Sdk="Microsoft.NET.Sdk">
 2 | 
 3 |   <PropertyGroup>
 4 |     <OutputType>Exe</OutputType>
 5 |     <TargetFramework>net8.0</TargetFramework>
 6 |   </PropertyGroup>
 7 | 
 8 |   <ItemGroup>
 9 |     <PackageReference Include="Microsoft.Azure.Search" Version="10.0.0" />
10 |     <PackageReference Include="Microsoft.Extensions.Configuration" Version="3.0.0" />
11 |     <PackageReference Include="Microsoft.Extensions.Configuration.Json" Version="3.0.0" />
12 |     <PackageReference Include="Microsoft.Rest.ClientRuntime" Version="2.3.24" />
13 |     <PackageReference Include="Microsoft.Rest.ClientRuntime.Azure" Version="3.3.19" />
14 |     <PackageReference Include="Microsoft.Spatial" Version="7.6.0" />
15 |     <PackageReference Include="Newtonsoft.Json" Version="13.0.1" />
16 |   </ItemGroup>
17 | 
18 |   <ItemGroup>
19 |     <None Update="appsettings.json">
20 |       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
21 |     </None>
22 |   </ItemGroup>
23 | 
24 | </Project>
25 | 


--------------------------------------------------------------------------------
/index-backup-restore/v10/AzureSearchBackupRestoreIndex/AzureSearchHelper.cs:
--------------------------------------------------------------------------------
 1 | ﻿//Copyright 2019 Microsoft
 2 | 
 3 | //Licensed under the Apache License, Version 2.0 (the "License");
 4 | //you may not use this file except in compliance with the License.
 5 | //You may obtain a copy of the License at
 6 | 
 7 | //       http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | //Unless required by applicable law or agreed to in writing, software
10 | //distributed under the License is distributed on an "AS IS" BASIS,
11 | //WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | //See the License for the specific language governing permissions and
13 | //limitations under the License.
14 | 
15 | using System;
16 | using System.Net.Http;
17 | using System.Text;
18 | using Newtonsoft.Json;
19 | using Newtonsoft.Json.Converters;
20 | using Newtonsoft.Json.Serialization;
21 | 
22 | namespace AzureSearchBackupRestore
23 | {
24 |     public class AzureSearchHelper
25 |     {
26 |         // Available versions: https://learn.microsoft.com/en-us/rest/api/searchservice/search-service-api-versions
27 |         public const string ApiVersionString = "api-version=2024-03-01-Preview";
28 | 
29 |         private static readonly JsonSerializerSettings _jsonSettings;
30 | 
31 |         static AzureSearchHelper()
32 |         {
33 |             _jsonSettings = new JsonSerializerSettings
34 |             {
35 |                 Formatting = Formatting.Indented, // for readability, change to None for compactness
36 |                 ContractResolver = new CamelCasePropertyNamesContractResolver(),
37 |                 DateTimeZoneHandling = DateTimeZoneHandling.Utc
38 |             };
39 | 
40 |             _jsonSettings.Converters.Add(new StringEnumConverter());
41 |         }
42 | 
43 |         public static string SerializeJson(object value)
44 |         {
45 |             return JsonConvert.SerializeObject(value, _jsonSettings);
46 |         }
47 | 
48 |         public static T DeserializeJson<T>(string json)
49 |         {
50 |             return JsonConvert.DeserializeObject<T>(json, _jsonSettings);
51 |         }
52 | 
53 |         public static HttpResponseMessage SendSearchRequest(HttpClient client, HttpMethod method, Uri uri, string json = null)
54 |         {
55 |             UriBuilder builder = new UriBuilder(uri);
56 |             string separator = string.IsNullOrWhiteSpace(builder.Query) ? string.Empty : "&";
57 |             builder.Query = builder.Query.TrimStart('?') + separator + ApiVersionString;
58 | 
59 |             var request = new HttpRequestMessage(method, builder.Uri);
60 | 
61 |             if (json != null)
62 |             {
63 |                 request.Content = new StringContent(json, Encoding.UTF8, "application/json");
64 |             }
65 | 
66 |             return client.SendAsync(request).Result;
67 |         }
68 | 
69 |         public static void EnsureSuccessfulSearchResponse(HttpResponseMessage response)
70 |         {
71 |             if (!response.IsSuccessStatusCode)
72 |             {
73 |                 string error = response.Content == null ? null : response.Content.ReadAsStringAsync().Result;
74 |                 throw new Exception("Search request failed: " + error);
75 |             }
76 |         }
77 |     }
78 | }
79 | 


--------------------------------------------------------------------------------
/index-backup-restore/v10/AzureSearchBackupRestoreIndex/Program.cs:
--------------------------------------------------------------------------------
  1 | ﻿// This is a prototype tool that allows for extraction of data from a search index
  2 | // Since this tool is still under development, it should not be used for production usage
  3 | 
  4 | using Microsoft.Azure.Search;
  5 | using Microsoft.Azure.Search.Models;
  6 | using Microsoft.Extensions.Configuration;
  7 | using Newtonsoft.Json;
  8 | using System;
  9 | using System.Collections.Generic;
 10 | using System.IO;
 11 | using System.Net.Http;
 12 | using System.Threading;
 13 | using System.Threading.Tasks;
 14 | 
 15 | 
 16 | namespace AzureSearchBackupRestore
 17 | {
 18 |     class Program
 19 |     {
 20 | 
 21 |         private static string SourceSearchServiceName;
 22 |         private static string SourceAdminKey;
 23 |         private static string SourceIndexName;
 24 |         private static string TargetSearchServiceName;
 25 |         private static string TargetAdminKey;
 26 |         private static string TargetIndexName;
 27 |         private static string BackupDirectory;
 28 | 
 29 |         private static SearchServiceClient SourceSearchClient;
 30 |         private static ISearchIndexClient SourceIndexClient;
 31 |         private static SearchServiceClient TargetSearchClient;
 32 |         private static ISearchIndexClient TargetIndexClient;
 33 | 
 34 |         private static int MaxBatchSize = 500;          // JSON files will contain this many documents / file and can be up to 1000
 35 |         private static int ParallelizedJobs = 10;       // Output content in parallel jobs
 36 | 
 37 |         static void Main(string[] args)
 38 |         {
 39 | 
 40 |             //Get source and target search service info and index names from appsettings.json file
 41 |             //Set up source and target search service clients
 42 |             ConfigurationSetup();
 43 | 
 44 |             //Backup the source index
 45 |             Console.WriteLine("\nSTART INDEX BACKUP");
 46 |             BackupIndexAndDocuments();
 47 | 
 48 |             //Recreate and import content to target index
 49 |             Console.WriteLine("\nSTART INDEX RESTORE");
 50 |             DeleteIndex();
 51 |             CreateTargetIndex();
 52 |             ImportFromJSON();
 53 |             Console.WriteLine("\n  Waiting 10 seconds for target to index content...");
 54 |             Console.WriteLine("  NOTE: For really large indexes it may take longer to index all content.\n");
 55 |             Thread.Sleep(10000);
 56 | 
 57 |             // Validate all content is in target index
 58 |             int sourceCount = GetCurrentDocCount(SourceIndexClient);
 59 |             int targetCount = GetCurrentDocCount(TargetIndexClient);
 60 |             Console.WriteLine("\nSAFEGUARD CHECK: Source and target index counts should match");
 61 |             Console.WriteLine(" Source index contains {0} docs", sourceCount);
 62 |             Console.WriteLine(" Target index contains {0} docs\n", targetCount);
 63 | 
 64 |             Console.WriteLine("Press any key to continue...");
 65 |             Console.ReadLine();
 66 |         }
 67 | 
 68 |         static void ConfigurationSetup()
 69 |         {
 70 | 
 71 |             IConfigurationBuilder builder = new ConfigurationBuilder().AddJsonFile("appsettings.json");
 72 |             IConfigurationRoot configuration = builder.Build();
 73 | 
 74 |             SourceSearchServiceName = configuration["SourceSearchServiceName"];
 75 |             SourceAdminKey = configuration["SourceAdminKey"];
 76 |             SourceIndexName = configuration["SourceIndexName"];
 77 |             TargetSearchServiceName = configuration["TargetSearchServiceName"];
 78 |             TargetAdminKey = configuration["TargetAdminKey"];
 79 |             TargetIndexName = configuration["TargetIndexName"];
 80 |             BackupDirectory = configuration["BackupDirectory"];
 81 | 
 82 |             Console.WriteLine("CONFIGURATION:");
 83 |             Console.WriteLine("\n  Source service and index {0}, {1}", SourceSearchServiceName, SourceIndexName);
 84 |             Console.WriteLine("\n  Target service and index: {0}, {1}", TargetSearchServiceName, TargetIndexName);
 85 |             Console.WriteLine("\n  Backup directory: " + BackupDirectory);
 86 | 
 87 |             Console.WriteLine("\nDoes this look correct? Press any key to continue, Ctrl+C to cancel.");
 88 |             Console.ReadLine();
 89 | 
 90 |             SourceSearchClient = new SearchServiceClient(SourceSearchServiceName, new SearchCredentials(SourceAdminKey));
 91 |             SourceIndexClient = SourceSearchClient.Indexes.GetClient(SourceIndexName);
 92 | 
 93 | 
 94 |             TargetSearchClient = new SearchServiceClient(TargetSearchServiceName, new SearchCredentials(TargetAdminKey));
 95 |             TargetIndexClient = TargetSearchClient.Indexes.GetClient(TargetIndexName);
 96 | 
 97 |         }
 98 |         static void BackupIndexAndDocuments()
 99 |         {
100 |             // Backup the index schema to the specified backup directory
101 |             Console.WriteLine("\n  Backing up source index schema to {0}\n", Path.Combine(BackupDirectory, SourceIndexName + ".schema"));
102 | 
103 |             File.WriteAllText(Path.Combine(BackupDirectory, SourceIndexName + ".schema"), GetIndexSchema());
104 | 
105 |             // Extract the content to JSON files
106 |             int SourceDocCount = GetCurrentDocCount(SourceIndexClient);
107 |             WriteIndexDocuments(SourceDocCount);     // Output content from index to json files
108 | 
109 |         }
110 | 
111 |         static void WriteIndexDocuments(int CurrentDocCount)
112 |         {
113 |             // Write document files in batches (per MaxBatchSize) in parallel
114 |             string IDFieldName = GetIDFieldName();
115 |             int FileCounter = 0;
116 |             for (int batch = 0; batch <= (CurrentDocCount / MaxBatchSize); batch += ParallelizedJobs)
117 |             {
118 | 
119 |                 List<Task> tasks = new List<Task>();
120 |                 for (int job = 0; job < ParallelizedJobs; job++)
121 |                 {
122 |                     FileCounter++;
123 |                     int fileCounter = FileCounter;
124 |                     if ((fileCounter - 1) * MaxBatchSize < CurrentDocCount)
125 |                     {
126 |                         Console.WriteLine("  Backing up source documents to {0} - (batch size = {1})", Path.Combine(BackupDirectory, SourceIndexName + fileCounter + ".json"), MaxBatchSize);
127 | 
128 |                         tasks.Add(Task.Factory.StartNew(() =>
129 |                             ExportToJSON((fileCounter - 1) * MaxBatchSize, IDFieldName, Path.Combine(BackupDirectory, SourceIndexName + fileCounter + ".json"))
130 |                         ));
131 |                     }
132 | 
133 |                 }
134 |                 Task.WaitAll(tasks.ToArray());  // Wait for all the stored procs in the group to complete
135 |             }
136 | 
137 |             return;
138 |         }
139 | 
140 |         static void ExportToJSON(int Skip, string IDFieldName, string FileName)
141 |         {
142 |             // Extract all the documents from the selected index to JSON files in batches of 500 docs / file
143 |             string json = string.Empty;
144 |             try
145 |             {
146 |                 SearchParameters sp = new SearchParameters()
147 |                 {
148 |                     SearchMode = SearchMode.All,
149 |                     Top = MaxBatchSize,
150 |                     Skip = Skip
151 |                 };
152 |                 DocumentSearchResult<Document> response = SourceIndexClient.Documents.Search("*", sp);
153 | 
154 |                 foreach (var doc in response.Results)
155 |                 {
156 |                     json += JsonConvert.SerializeObject(doc.Document) + ",";
157 |                     // Geospatial is formatted such that it needs to be changed for reupload
158 |                     // Unfortunately since it comes down in Lat, Lon format, I need to alter it to Lon, Lat for upload
159 | 
160 |                     while (json.IndexOf("CoordinateSystem") > -1)
161 |                     {
162 |                         // At this point the data looks like this
163 |                         // {"Latitude":38.3399,"Longitude":-86.0887,"IsEmpty":false,"Z":null,"M":null,"CoordinateSystem":{"EpsgId":4326,"Id":"4326","Name":"WGS84"}}
164 |                         int LatStartLocation = json.IndexOf("\"Latitude\":");
165 |                         LatStartLocation = json.IndexOf(":", LatStartLocation) + 1;
166 |                         int LatEndLocation = json.IndexOf(",", LatStartLocation);
167 |                         int LonStartLocation = json.IndexOf("\"Longitude\":");
168 |                         LonStartLocation = json.IndexOf(":", LonStartLocation) + 1;
169 |                         int LonEndLocation = json.IndexOf(",", LonStartLocation);
170 |                         string Lat = json.Substring(LatStartLocation, LatEndLocation - LatStartLocation);
171 |                         string Lon = json.Substring(LonStartLocation, LonEndLocation - LonStartLocation);
172 | 
173 |                         // Now it needs to look like this
174 |                         // { "type": "Point", "coordinates": [-122.131577, 47.678581] }
175 |                         int GeoStartPosition = json.IndexOf("\"Latitude\":") - 1;
176 |                         int GeoEndPosition = json.IndexOf("}}", GeoStartPosition) + 2;
177 |                         string updatedJson = json.Substring(0, GeoStartPosition) + "{ \"type\": \"Point\", \"coordinates\": [";
178 |                         updatedJson += Lon + ", " + Lat + "] }";
179 |                         updatedJson += json.Substring(GeoEndPosition);
180 |                         json = updatedJson;
181 |                     }
182 | 
183 |                     json = json.Replace("\"Latitude\":", "\"type\": \"Point\", \"coordinates\": [");
184 |                     json = json.Replace("\"Longitude\":", "");
185 |                     json = json.Replace(",\"IsEmpty\":false,\"Z\":null,\"M\":null,\"CoordinateSystem\":{\"EpsgId\":4326,\"Id\":\"4326\",\"Name\":\"WGS84\"}", "]");
186 |                     json += "\n";
187 | 
188 |                     //{ "type": "Point", "coordinates": [-122.131577, 47.678581] }
189 |                     //{"Latitude":41.113,"Longitude":-95.6269}
190 |                     //json += "\n";
191 | 
192 |                 }
193 | 
194 |                 // Output the formatted content to a file
195 |                 json = json.Substring(0, json.Length - 3); // remove trailing comma
196 |                 File.WriteAllText(FileName, "{\"value\": [");
197 |                 File.AppendAllText(FileName, json);
198 |                 File.AppendAllText(FileName, "]}");
199 |                 Console.WriteLine("  Total documents: {0}", response.Results.Count.ToString());
200 |                 json = string.Empty;
201 | 
202 | 
203 |             }
204 |             catch (Exception ex)
205 |             {
206 |                 Console.WriteLine("Error: {0}", ex.Message.ToString());
207 |             }
208 |             return;
209 |         }
210 | 
211 |         static string GetIDFieldName()
212 |         {
213 |             // Find the id field of this index
214 |             string IDFieldName = string.Empty;
215 |             try
216 |             {
217 |                 var schema = SourceSearchClient.Indexes.Get(SourceIndexName);
218 |                 foreach (var field in schema.Fields)
219 |                 {
220 |                     if (field.IsKey == true)
221 |                     {
222 |                         IDFieldName = Convert.ToString(field.Name);
223 |                         break;
224 |                     }
225 |                 }
226 | 
227 |             }
228 |             catch (Exception ex)
229 |             {
230 |                 Console.WriteLine("Error: {0}", ex.Message.ToString());
231 |             }
232 |             return IDFieldName;
233 |         }
234 | 
235 |         static string GetIndexSchema()
236 |         {
237 | 
238 |             // Extract the schema for this index
239 |             // We use REST here because we can take the response as-is
240 | 
241 |             Uri ServiceUri = new Uri("https://" + SourceSearchServiceName + ".search.windows.net");
242 |             HttpClient HttpClient = new HttpClient();
243 |             HttpClient.DefaultRequestHeaders.Add("api-key", SourceAdminKey);
244 | 
245 |             string Schema = string.Empty;
246 |             try
247 |             {
248 |                 Uri uri = new Uri(ServiceUri, "/indexes/" + SourceIndexName);
249 |                 HttpResponseMessage response = AzureSearchHelper.SendSearchRequest(HttpClient, HttpMethod.Get, uri);
250 |                 AzureSearchHelper.EnsureSuccessfulSearchResponse(response);
251 |                 Schema = response.Content.ReadAsStringAsync().Result.ToString();
252 | 
253 |             }
254 |             catch (Exception ex)
255 |             {
256 |                 Console.WriteLine("Error: {0}", ex.Message.ToString());
257 |             }
258 | 
259 |             return Schema;
260 |         }
261 | 
262 |         private static bool DeleteIndex()
263 |         {
264 |             Console.WriteLine("\n  Delete target index {0} in {1} search service, if it exists", TargetIndexName, TargetSearchServiceName);
265 |             // Delete the index if it exists
266 |             try
267 |             {
268 |                 TargetSearchClient.Indexes.Delete(TargetIndexName);
269 |             }
270 |             catch (Exception ex)
271 |             {
272 |                 Console.WriteLine("  Error deleting index: {0}\n", ex.Message);
273 |                 Console.WriteLine("  Did you remember to set your SearchServiceName and SearchServiceApiKey?\n");
274 |                 return false;
275 |             }
276 | 
277 |             return true;
278 |         }
279 | 
280 |         static void CreateTargetIndex()
281 |         {
282 |             Console.WriteLine("\n  Create target index {0} in {1} search service", TargetIndexName, TargetSearchServiceName);
283 |             // Use the schema file to create a copy of this index
284 |             // I like using REST here since I can just take the response as-is
285 | 
286 | 
287 |             string json = File.ReadAllText(Path.Combine(BackupDirectory, SourceIndexName + ".schema"));
288 | 
289 | 
290 |             // Do some cleaning of this file to change index name, etc
291 |             json = "{" + json.Substring(json.IndexOf("\"name\""));
292 |             int indexOfIndexName = json.IndexOf("\"", json.IndexOf("name\"") + 5) + 1;
293 |             int indexOfEndOfIndexName = json.IndexOf("\"", indexOfIndexName);
294 |             json = json.Substring(0, indexOfIndexName) + TargetIndexName + json.Substring(indexOfEndOfIndexName);
295 | 
296 |             Uri ServiceUri = new Uri("https://" + TargetSearchServiceName + ".search.windows.net");
297 |             HttpClient HttpClient = new HttpClient();
298 |             HttpClient.DefaultRequestHeaders.Add("api-key", TargetAdminKey);
299 | 
300 |             try
301 |             {
302 |                 Uri uri = new Uri(ServiceUri, "/indexes");
303 |                 HttpResponseMessage response = AzureSearchHelper.SendSearchRequest(HttpClient, HttpMethod.Post, uri, json);
304 |                 response.EnsureSuccessStatusCode();
305 |             }
306 |             catch (Exception ex)
307 |             {
308 |                 Console.WriteLine("  Error: {0}", ex.Message.ToString());
309 |             }
310 | 
311 |         }
312 |         static int GetCurrentDocCount(ISearchIndexClient IndexClient)
313 |         {
314 | 
315 |             // Get the current doc count of the specified index
316 |             try
317 |             {
318 |                 SearchParameters sp = new SearchParameters()
319 |                 {
320 |                     SearchMode = SearchMode.All,
321 |                     IncludeTotalResultCount = true
322 |                 };
323 | 
324 |                 DocumentSearchResult<Document> response = IndexClient.Documents.Search("*", sp);
325 |                 return Convert.ToInt32(response.Count);
326 | 
327 |             }
328 |             catch (Exception ex)
329 |             {
330 |                 Console.WriteLine("  Error: {0}", ex.Message.ToString());
331 |             }
332 | 
333 |             return -1;
334 | 
335 |         }
336 |         static void ImportFromJSON()
337 |         {
338 |             Console.WriteLine("\n  Upload index documents from saved JSON files");
339 |             // Take JSON file and import this as-is to target index
340 |             Uri ServiceUri = new Uri("https://" + TargetSearchServiceName + ".search.windows.net");
341 |             HttpClient HttpClient = new HttpClient();
342 |             HttpClient.DefaultRequestHeaders.Add("api-key", TargetAdminKey);
343 | 
344 |             try
345 |             {
346 |                 foreach (string fileName in Directory.GetFiles(BackupDirectory, SourceIndexName + "*.json"))
347 |                 {
348 |                     Console.WriteLine("  -Uploading documents from file {0}", fileName);
349 |                     string json = File.ReadAllText(fileName);
350 |                     Uri uri = new Uri(ServiceUri, "/indexes/" + TargetIndexName + "/docs/index");
351 |                     HttpResponseMessage response = AzureSearchHelper.SendSearchRequest(HttpClient, HttpMethod.Post, uri, json);
352 |                     response.EnsureSuccessStatusCode();
353 |                 }
354 |             }
355 |             catch (Exception ex)
356 |             {
357 |                 Console.WriteLine("  Error: {0}", ex.Message.ToString());
358 |             }
359 |         }
360 |     }
361 | }
362 | 


--------------------------------------------------------------------------------
/index-backup-restore/v10/AzureSearchBackupRestoreIndex/appsettings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "SourceSearchServiceName": "<BACKUP-SEARCH-SERVICE-NAME>",
 3 |   "SourceAdminKey": "<BACKUP-ADMIN-KEY>",
 4 |   "SourceIndexName": "<BACKUP-INDEX-NAME>",
 5 |   "TargetSearchServiceName": "<RESTORE-SEARCH-SERVICE-NAME>",
 6 |   "TargetAdminKey": "<RESTORE-ADMIN-KEY>",
 7 |   "TargetIndexName": "<RESTORE-INDEX-NAME>",
 8 |   "BackupDirectory": "<BACKUP-DIRECTORY>"
 9 | }
10 | 
11 | 


--------------------------------------------------------------------------------
/index-backup-restore/v11/AzureSearchBackupRestoreIndex.sln:
--------------------------------------------------------------------------------
 1 | ﻿
 2 | Microsoft Visual Studio Solution File, Format Version 12.00
 3 | # Visual Studio Version 16
 4 | VisualStudioVersion = 16.0.28803.352
 5 | MinimumVisualStudioVersion = 10.0.40219.1
 6 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "AzureSearchBackupRestoreIndex", "AzureSearchBackupRestoreIndex\AzureSearchBackupRestoreIndex.csproj", "{3BA854A7-9DF4-4BE1-9749-E51A49AE2E16}"
 7 | EndProject
 8 | Global
 9 | 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
10 | 		Debug|Any CPU = Debug|Any CPU
11 | 		Release|Any CPU = Release|Any CPU
12 | 	EndGlobalSection
13 | 	GlobalSection(ProjectConfigurationPlatforms) = postSolution
14 | 		{3BA854A7-9DF4-4BE1-9749-E51A49AE2E16}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
15 | 		{3BA854A7-9DF4-4BE1-9749-E51A49AE2E16}.Debug|Any CPU.Build.0 = Debug|Any CPU
16 | 		{3BA854A7-9DF4-4BE1-9749-E51A49AE2E16}.Release|Any CPU.ActiveCfg = Release|Any CPU
17 | 		{3BA854A7-9DF4-4BE1-9749-E51A49AE2E16}.Release|Any CPU.Build.0 = Release|Any CPU
18 | 	EndGlobalSection
19 | 	GlobalSection(SolutionProperties) = preSolution
20 | 		HideSolutionNode = FALSE
21 | 	EndGlobalSection
22 | 	GlobalSection(ExtensibilityGlobals) = postSolution
23 | 		SolutionGuid = {CB63787D-86CA-48E4-849F-371290479660}
24 | 	EndGlobalSection
25 | EndGlobal
26 | 


--------------------------------------------------------------------------------
/index-backup-restore/v11/AzureSearchBackupRestoreIndex/AzureSearchBackupRestoreIndex.csproj:
--------------------------------------------------------------------------------
 1 | <Project Sdk="Microsoft.NET.Sdk">
 2 | 
 3 |   <PropertyGroup>
 4 |     <OutputType>Exe</OutputType>
 5 |     <TargetFramework>net8.0</TargetFramework>
 6 |   </PropertyGroup>
 7 | 
 8 |   <ItemGroup>
 9 |     <PackageReference Include="Azure.Search.Documents" Version="11.6.0" />
10 |     <PackageReference Include="Microsoft.Extensions.Configuration" Version="3.0.0" />
11 |     <PackageReference Include="Microsoft.Extensions.Configuration.Json" Version="3.0.0" />
12 |     <PackageReference Include="Microsoft.Rest.ClientRuntime" Version="2.3.24" />
13 |     <PackageReference Include="Microsoft.Rest.ClientRuntime.Azure" Version="3.3.19" />
14 |     <PackageReference Include="Microsoft.Spatial" Version="7.6.0" />
15 |     <PackageReference Include="Polly" Version="8.4.1" />
16 |   </ItemGroup>
17 | 
18 |   <ItemGroup>
19 |     <None Update="appsettings.json">
20 |       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
21 |     </None>
22 |   </ItemGroup>
23 | 
24 | </Project>
25 | 


--------------------------------------------------------------------------------
/index-backup-restore/v11/AzureSearchBackupRestoreIndex/AzureSearchHelper.cs:
--------------------------------------------------------------------------------
 1 | ﻿//Copyright 2019 Microsoft
 2 | 
 3 | //Licensed under the Apache License, Version 2.0 (the "License");
 4 | //you may not use this file except in compliance with the License.
 5 | //You may obtain a copy of the License at
 6 | 
 7 | //       http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | //Unless required by applicable law or agreed to in writing, software
10 | //distributed under the License is distributed on an "AS IS" BASIS,
11 | //WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | //See the License for the specific language governing permissions and
13 | //limitations under the License.
14 | 
15 | using System;
16 | using System.Net.Http;
17 | using System.Text;
18 | using System.Text.Json;
19 | using System.Text.Json.Serialization;
20 | using Polly;
21 | using Polly.Retry;
22 | 
23 | namespace AzureSearchBackupRestoreIndex;
24 | 
25 | public class AzureSearchHelper
26 | {
27 |     // Azure AI Search requires that we use at least the API version "2024-03-01-Preview" to access all index features.
28 |     // Hence, this API version string is used for requests to Azure AI Search.
29 |     private const string ApiVersionString = "api-version=2024-03-01-Preview";
30 | 
31 |     private static readonly JsonSerializerOptions JsonOptions;
32 | 
33 |     // Retry policy to improve document migration resilience
34 |     // AI Search may fail to process large batches
35 |     private static readonly AsyncRetryPolicy<HttpResponseMessage> RetryPolicy = Policy
36 |             .HandleResult<HttpResponseMessage>(r => !r.IsSuccessStatusCode)
37 |             .Or<Exception>()
38 |             .WaitAndRetryAsync(4, retryAttempt => TimeSpan.FromSeconds(Math.Pow(2, retryAttempt)));
39 | 
40 |     static AzureSearchHelper()
41 |     {
42 |         JsonOptions = new JsonSerializerOptions { };
43 | 
44 |         JsonOptions.Converters.Add(new JsonStringEnumConverter());
45 |     }
46 | 
47 |     public static HttpResponseMessage SendSearchRequest(HttpClient client, HttpMethod method, Uri uri, string json = null)
48 |     {
49 |         UriBuilder builder = new UriBuilder(uri);
50 |         string separator = string.IsNullOrWhiteSpace(builder.Query) ? string.Empty : "&";
51 |         builder.Query = builder.Query.TrimStart('?') + separator + ApiVersionString;
52 | 
53 |         HttpResponseMessage response = RetryPolicy.ExecuteAsync(async () =>
54 |         {
55 |             var request = new HttpRequestMessage(method, builder.Uri);
56 | 
57 |             if (json != null)
58 |             {
59 |                 request.Content = new StringContent(json, Encoding.UTF8, "application/json");
60 |             }
61 | 
62 |             return await client.SendAsync(request);
63 |         }).GetAwaiter().GetResult();
64 | 
65 |         return response;
66 |     }
67 | 
68 |     public static void EnsureSuccessfulSearchResponse(HttpResponseMessage response)
69 |     {
70 |         if (!response.IsSuccessStatusCode)
71 |         {
72 |             string error = response.Content?.ReadAsStringAsync().Result;
73 |             throw new Exception("Search request failed: " + error);
74 |         }
75 |     }
76 | }


--------------------------------------------------------------------------------
/index-backup-restore/v11/AzureSearchBackupRestoreIndex/Program.cs:
--------------------------------------------------------------------------------
  1 | ﻿// This is a prototype tool that allows for extraction of data from a search index
  2 | // Since this tool is still under development, it should not be used for production usage
  3 | 
  4 | using System;
  5 | using System.Collections.Generic;
  6 | using System.IO;
  7 | using System.Linq;
  8 | using System.Net.Http;
  9 | using System.Text.Json;
 10 | using System.Threading;
 11 | using System.Threading.Tasks;
 12 | using Azure;
 13 | using Azure.Search.Documents;
 14 | using Azure.Search.Documents.Indexes;
 15 | using Azure.Search.Documents.Models;
 16 | using Microsoft.Extensions.Configuration;
 17 | 
 18 | namespace AzureSearchBackupRestoreIndex;
 19 | 
 20 | class Program
 21 | {
 22 |     private static string SourceSearchServiceName;
 23 |     private static string SourceAdminKey;
 24 |     private static string SourceIndexName;
 25 |     private static string TargetSearchServiceName;
 26 |     private static string TargetAdminKey;
 27 |     private static string TargetIndexName;
 28 |     private static string BackupDirectory;
 29 | 
 30 |     private static SearchIndexClient SourceIndexClient;
 31 |     private static SearchClient SourceSearchClient;
 32 |     private static SearchIndexClient TargetIndexClient;
 33 |     private static SearchClient TargetSearchClient;
 34 | 
 35 |     private static int MaxBatchSize = 500;          // JSON files will contain this many documents / file and can be up to 1000
 36 |     private static int ParallelizedJobs = 10;       // Output content in parallel jobs
 37 | 
 38 |     static void Main()
 39 |     {
 40 |         //Get source and target search service info and index names from appsettings.json file
 41 |         //Set up source and target search service clients
 42 |         ConfigurationSetup();
 43 | 
 44 |         //Backup the source index
 45 |         Console.WriteLine("\nSTART INDEX BACKUP");
 46 |         BackupIndexAndDocuments();
 47 | 
 48 |         //Recreate and import content to target index
 49 |         Console.WriteLine("\nSTART INDEX RESTORE");
 50 |         DeleteIndex();
 51 |         CreateTargetIndex();
 52 |         ImportFromJSON();
 53 |         Console.WriteLine("\n  Waiting 10 seconds for target to index content...");
 54 |         Console.WriteLine("  NOTE: For really large indexes it may take longer to index all content.\n");
 55 |         Thread.Sleep(10000);
 56 | 
 57 |         // Validate all content is in target index
 58 |         int sourceCount = GetCurrentDocCount(SourceSearchClient);
 59 |         int targetCount = GetCurrentDocCount(TargetSearchClient);
 60 |         Console.WriteLine("\nSAFEGUARD CHECK: Source and target index counts should match");
 61 |         Console.WriteLine(" Source index contains {0} docs", sourceCount);
 62 |         Console.WriteLine(" Target index contains {0} docs\n", targetCount);
 63 | 
 64 |         Console.WriteLine("Press any key to continue...");
 65 |         Console.ReadLine();
 66 |     }
 67 | 
 68 |     static void ConfigurationSetup()
 69 |     {
 70 | 
 71 |         IConfigurationBuilder builder = new ConfigurationBuilder().AddJsonFile("appsettings.json");
 72 |         IConfigurationRoot configuration = builder.Build();
 73 | 
 74 |         SourceSearchServiceName = configuration["SourceSearchServiceName"];
 75 |         SourceAdminKey = configuration["SourceAdminKey"];
 76 |         SourceIndexName = configuration["SourceIndexName"];
 77 |         TargetSearchServiceName = configuration["TargetSearchServiceName"];
 78 |         TargetAdminKey = configuration["TargetAdminKey"];
 79 |         TargetIndexName = configuration["TargetIndexName"];
 80 |         BackupDirectory = configuration["BackupDirectory"];
 81 | 
 82 |         Console.WriteLine("CONFIGURATION:");
 83 |         Console.WriteLine("\n  Source service and index {0}, {1}", SourceSearchServiceName, SourceIndexName);
 84 |         Console.WriteLine("\n  Target service and index: {0}, {1}", TargetSearchServiceName, TargetIndexName);
 85 |         Console.WriteLine("\n  Backup directory: " + BackupDirectory);
 86 |         Console.WriteLine("\nDoes this look correct? Press any key to continue, Ctrl+C to cancel.");
 87 |         Console.ReadLine();
 88 | 
 89 |         SourceIndexClient = new SearchIndexClient(new Uri("https://" + SourceSearchServiceName + ".search.windows.net"), new AzureKeyCredential(SourceAdminKey));
 90 |         SourceSearchClient = SourceIndexClient.GetSearchClient(SourceIndexName);
 91 | 
 92 | 
 93 |         TargetIndexClient = new SearchIndexClient(new Uri($"https://" + TargetSearchServiceName + ".search.windows.net"), new AzureKeyCredential(TargetAdminKey));
 94 |         TargetSearchClient = TargetIndexClient.GetSearchClient(TargetIndexName);
 95 |     }
 96 | 
 97 |     static void BackupIndexAndDocuments()
 98 |     {
 99 |         // Backup the index schema to the specified backup directory
100 |         Console.WriteLine("\n Backing up source index schema to {0}\n", Path.Combine(BackupDirectory, SourceIndexName + ".schema"));
101 | 
102 |         File.WriteAllText(Path.Combine(BackupDirectory, SourceIndexName + ".schema"), GetIndexSchema());
103 | 
104 |         // Extract the content to JSON files
105 |         int SourceDocCount = GetCurrentDocCount(SourceSearchClient);
106 |         WriteIndexDocuments(SourceDocCount);     // Output content from index to json files
107 |     }
108 | 
109 |     static void WriteIndexDocuments(int CurrentDocCount)
110 |     {
111 |         // Write document files in batches (per MaxBatchSize) in parallel
112 |         int FileCounter = 0;
113 |         for (int batch = 0; batch <= (CurrentDocCount / MaxBatchSize); batch += ParallelizedJobs)
114 |         {
115 | 
116 |             List<Task> tasks = new List<Task>();
117 |             for (int job = 0; job < ParallelizedJobs; job++)
118 |             {
119 |                 FileCounter++;
120 |                 int fileCounter = FileCounter;
121 |                 if ((fileCounter - 1) * MaxBatchSize < CurrentDocCount)
122 |                 {
123 |                     Console.WriteLine(" Backing up source documents to {0} - (batch size = {1})", Path.Combine(BackupDirectory, SourceIndexName + fileCounter + ".json"), MaxBatchSize);
124 | 
125 |                     tasks.Add(Task.Factory.StartNew(() =>
126 |                         ExportToJSON((fileCounter - 1) * MaxBatchSize, Path.Combine(BackupDirectory, $"{SourceIndexName}{fileCounter}.json"))
127 |                     ));
128 |                 }
129 | 
130 |             }
131 |             Task.WaitAll(tasks.ToArray());  // Wait for all the stored procs in the group to complete
132 |         }
133 | 
134 |         return;
135 |     }
136 | 
137 |     static void ExportToJSON(int Skip, string FileName)
138 |     {
139 |         // Extract all the documents from the selected index to JSON files in batches of 500 docs / file
140 |         string json = string.Empty;
141 |         try
142 |         {
143 |             SearchOptions options = new SearchOptions()
144 |             {
145 |                 SearchMode = SearchMode.All,
146 |                 Size = MaxBatchSize,
147 |                 Skip = Skip
148 |             };
149 | 
150 |             SearchResults<SearchDocument> response = SourceSearchClient.Search<SearchDocument>("*", options);
151 | 
152 |             foreach (var doc in response.GetResults())
153 |             {
154 |                 json += JsonSerializer.Serialize(doc.Document) + ",";
155 |                 json = json.Replace("\"Latitude\":", "\"type\": \"Point\", \"coordinates\": [");
156 |                 json = json.Replace("\"Longitude\":", "");
157 |                 json = json.Replace(",\"IsEmpty\":false,\"Z\":null,\"M\":null,\"CoordinateSystem\":{\"EpsgId\":4326,\"Id\":\"4326\",\"Name\":\"WGS84\"}", "]");
158 |                 json += "\n";
159 |             }
160 | 
161 |             // Output the formatted content to a file
162 |             json = json.Substring(0, json.Length - 3); // remove trailing comma
163 |             File.WriteAllText(FileName, "{\"value\": [");
164 |             File.AppendAllText(FileName, json);
165 |             File.AppendAllText(FileName, "]}");
166 |             Console.WriteLine("  Total documents: {0}", response.GetResults().Count().ToString());
167 |             json = string.Empty;
168 |         }
169 |         catch (Exception ex)
170 |         {
171 |             Console.WriteLine("Error: {0}", ex.Message);
172 |         }
173 |     }
174 | 
175 |     static string GetIDFieldName()
176 |     {
177 |         // Find the id field of this index
178 |         string IDFieldName = string.Empty;
179 |         try
180 |         {
181 |             var schema = SourceIndexClient.GetIndex(SourceIndexName);
182 |             foreach (var field in schema.Value.Fields)
183 |             {
184 |                 if (field.IsKey == true)
185 |                 {
186 |                     IDFieldName = Convert.ToString(field.Name);
187 |                     break;
188 |                 }
189 |             }
190 | 
191 |         }
192 |         catch (Exception ex)
193 |         {
194 |             Console.WriteLine("Error: {0}", ex.Message);
195 |         }
196 | 
197 |         return IDFieldName;
198 |     }
199 | 
200 |     static string GetIndexSchema()
201 |     {
202 |         // Extract the schema for this index
203 |         // We use REST here because we can take the response as-is
204 | 
205 |         Uri ServiceUri = new Uri("https://" + SourceSearchServiceName + ".search.windows.net");
206 |         HttpClient HttpClient = new HttpClient();
207 |         HttpClient.DefaultRequestHeaders.Add("api-key", SourceAdminKey);
208 | 
209 |         string Schema = string.Empty;
210 |         try
211 |         {
212 |             Uri uri = new Uri(ServiceUri, "/indexes/" + SourceIndexName);
213 |             HttpResponseMessage response = AzureSearchHelper.SendSearchRequest(HttpClient, HttpMethod.Get, uri);
214 |             AzureSearchHelper.EnsureSuccessfulSearchResponse(response);
215 |             Schema = response.Content.ReadAsStringAsync().Result;
216 |         }
217 |         catch (Exception ex)
218 |         {
219 |             Console.WriteLine("Error: {0}", ex.Message);
220 |         }
221 | 
222 |         return Schema;
223 |     }
224 | 
225 |     private static bool DeleteIndex()
226 |     {
227 |         Console.WriteLine("\n  Delete target index {0} in {1} search service, if it exists", TargetIndexName, TargetSearchServiceName);
228 |         // Delete the index if it exists
229 |         try
230 |         {
231 |             TargetIndexClient.DeleteIndex(TargetIndexName);
232 |         }
233 |         catch (Exception ex)
234 |         {
235 |             Console.WriteLine("  Error deleting index: {0}\n", ex.Message);
236 |             Console.WriteLine("  Did you remember to set your SearchServiceName and SearchServiceApiKey?\n");
237 |             return false;
238 |         }
239 | 
240 |         return true;
241 |     }
242 | 
243 |     static void CreateTargetIndex()
244 |     {
245 |         Console.WriteLine("\n  Create target index {0} in {1} search service", TargetIndexName, TargetSearchServiceName);
246 |         // Use the schema file to create a copy of this index
247 |         // I like using REST here since I can just take the response as-is
248 | 
249 |         string json = File.ReadAllText(Path.Combine(BackupDirectory, SourceIndexName + ".schema"));
250 | 
251 |         // Do some cleaning of this file to change index name, etc
252 |         json = "{" + json.Substring(json.IndexOf("\"name\""));
253 |         int indexOfIndexName = json.IndexOf("\"", json.IndexOf("name\"") + 5) + 1;
254 |         int indexOfEndOfIndexName = json.IndexOf("\"", indexOfIndexName);
255 |         json = json.Substring(0, indexOfIndexName) + TargetIndexName + json.Substring(indexOfEndOfIndexName);
256 | 
257 |         Uri ServiceUri = new Uri("https://" + TargetSearchServiceName + ".search.windows.net");
258 |         HttpClient HttpClient = new HttpClient();
259 |         HttpClient.DefaultRequestHeaders.Add("api-key", TargetAdminKey);
260 | 
261 |         try
262 |         {
263 |             Uri uri = new Uri(ServiceUri, "/indexes");
264 |             HttpResponseMessage response = AzureSearchHelper.SendSearchRequest(HttpClient, HttpMethod.Post, uri, json);
265 |             response.EnsureSuccessStatusCode();
266 |         }
267 |         catch (Exception ex)
268 |         {
269 |             Console.WriteLine("  Error: {0}", ex.Message);
270 |         }
271 |     }
272 | 
273 |     static int GetCurrentDocCount(SearchClient searchClient)
274 |     {
275 |         // Get the current doc count of the specified index
276 |         try
277 |         {
278 |             SearchOptions options = new SearchOptions
279 |             {
280 |                 SearchMode = SearchMode.All,
281 |                 IncludeTotalCount = true
282 |             };
283 | 
284 |             SearchResults<Dictionary<string, object>> response = searchClient.Search<Dictionary<string, object>>("*", options);
285 |             return Convert.ToInt32(response.TotalCount);
286 |         }
287 |         catch (Exception ex)
288 |         {
289 |             Console.WriteLine("  Error: {0}", ex.Message);
290 |         }
291 | 
292 |         return -1;
293 |     }
294 | 
295 |     static void ImportFromJSON()
296 |     {
297 |         Console.WriteLine("\n  Upload index documents from saved JSON files");
298 |         // Take JSON file and import this as-is to target index
299 |         Uri ServiceUri = new Uri("https://" + TargetSearchServiceName + ".search.windows.net");
300 |         HttpClient HttpClient = new HttpClient();
301 |         HttpClient.DefaultRequestHeaders.Add("api-key", TargetAdminKey);
302 | 
303 |         try
304 |         {
305 |             foreach (string fileName in Directory.GetFiles(BackupDirectory, SourceIndexName + "*.json"))
306 |             {
307 |                 Console.WriteLine("  -Uploading documents from file {0}", fileName);
308 |                 string json = File.ReadAllText(fileName);
309 |                 Uri uri = new Uri(ServiceUri, "/indexes/" + TargetIndexName + "/docs/index");
310 |                 HttpResponseMessage response = AzureSearchHelper.SendSearchRequest(HttpClient, HttpMethod.Post, uri, json);
311 |                 response.EnsureSuccessStatusCode();
312 |             }
313 |         }
314 |         catch (Exception ex)
315 |         {
316 |             Console.WriteLine("  Error: {0}", ex.Message);
317 |         }
318 |     }
319 | }
320 | 


--------------------------------------------------------------------------------
/index-backup-restore/v11/AzureSearchBackupRestoreIndex/appsettings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "SourceSearchServiceName": "<YourBackupSearchServicName>",
 3 |   "SourceAdminKey": "<YourBackupAdminKey>",
 4 |   "SourceIndexName": "<YourBackupIndexName>",
 5 |   "TargetSearchServiceName": "<YourRestoreServiceName>",
 6 |   "TargetAdminKey": "<YourRestoreAdminKey>",
 7 |   "TargetIndexName": "<YourRestoreIndexName>",
 8 |   "BackupDirectory": "<YourBackupDirectory>"
 9 | }
10 | 


--------------------------------------------------------------------------------
/search-aggregations/Program.cs:
--------------------------------------------------------------------------------
  1 | ﻿// See https://aka.ms/new-console-template for more information
  2 | 
  3 | using Azure;
  4 | using Azure.Search.Documents;
  5 | using Azure.Search.Documents.Indexes;
  6 | using Azure.Search.Documents.Indexes.Models;
  7 | using Azure.Search.Documents.Models;
  8 | using MathNet.Numerics.Statistics;
  9 | using Microsoft.Extensions.Configuration;
 10 | using System.Globalization;
 11 | 
 12 | // Load app settings
 13 | IConfigurationRoot appSettings = new ConfigurationBuilder()
 14 |     .AddJsonFile("appsettings.json")
 15 |     .Build();
 16 | // Search service endpoint
 17 | var endpoint = new Uri(appSettings["searchServiceUrl"]);
 18 | // Admin key to search service
 19 | var credential = new AzureKeyCredential(appSettings["adminKey"]);
 20 | // Number of samples to upload to the index
 21 | var sampleValueCount = long.Parse(appSettings["sampleValueCount"]);
 22 | // Maximum value to generate as a sample
 23 | var sampleValueMax = double.Parse(appSettings["sampleValueMax"]);
 24 | // Minimum value to generate as a sample
 25 | var sampleValueMin = double.Parse(appSettings["sampleValueMin"]);
 26 | // Name of index to store samples
 27 | string sampleIndexName = appSettings["sampleIndexName"];
 28 | 
 29 | // Create sample index schema
 30 | var sampleIndex = new SearchIndex(sampleIndexName)
 31 | {
 32 |     Fields =
 33 |     {
 34 |         new SearchField("id", SearchFieldDataType.String) { IsKey = true },
 35 |         new SearchField("value", SearchFieldDataType.Double) { IsFilterable = true }
 36 |     }
 37 | };
 38 | Console.WriteLine("Dropping and recreating sample index...");
 39 | var searchIndexClient = new SearchIndexClient(endpoint, credential);
 40 | await searchIndexClient.DeleteIndexAsync(sampleIndexName);
 41 | await searchIndexClient.CreateIndexAsync(sampleIndex);
 42 | 
 43 | // Create sample values
 44 | var sampleValues = new double[sampleValueCount];
 45 | var random = new Random();
 46 | for (int i = 0; i < sampleValueCount; i++)
 47 | {
 48 |     sampleValues[i] = (random.NextDouble() * (sampleValueMax - sampleValueMin)) + sampleValueMin;
 49 | }
 50 | 
 51 | // Helper function: Create Search Documents from samples
 52 | IEnumerable<SearchDocument> GetDocuments()
 53 | {
 54 |     for (int i = 0; i < sampleValues.Length; i++)
 55 |     {
 56 |         double sampleValue = sampleValues[i];
 57 |         yield return new SearchDocument
 58 |         {
 59 |             ["id"] = i.ToString(CultureInfo.InvariantCulture),
 60 |             ["value"] = sampleValue
 61 |         };
 62 |     }
 63 | }
 64 | 
 65 | Console.WriteLine("Uploading samples to sample index...");
 66 | var searchClient = new SearchClient(endpoint, sampleIndexName, credential);
 67 | IndexDocumentsResult response = await searchClient.UploadDocumentsAsync(GetDocuments());
 68 | if (response.Results.Any(result => !result.Succeeded))
 69 | {
 70 |     throw new RequestFailedException($"Failed to upload documents, error {response.Results.First(result => !result.Succeeded).ErrorMessage}");
 71 | }
 72 | 
 73 | // Wait a few seconds before querying documents that were just uploaded
 74 | // Learn more at https://learn.microsoft.com/rest/api/searchservice/addupdate-or-delete-documents#response
 75 | TimeSpan delay = TimeSpan.FromSeconds(5);
 76 | Console.WriteLine("Waiting {0} seconds before computing statistics...", delay.TotalSeconds);
 77 | await Task.Delay(TimeSpan.FromSeconds(5));
 78 | 
 79 | Console.WriteLine("Computing statistics for all samples...");
 80 | await GetAggregateStatisticsUsingPaging(
 81 |     sampleValues,
 82 |     await searchClient.SearchAsync<SearchDocument>("*"));
 83 | 
 84 | // Use filters to restrict which values are queried
 85 | double halfPoint = (sampleValueMax + sampleValueMin) / 2.0;
 86 | Console.WriteLine("Computing statistics for all samples less than {0:.##}...", halfPoint);
 87 | await GetAggregateStatisticsUsingPaging(
 88 |     sampleValues.Where(sample => sample < halfPoint),
 89 |     await searchClient.SearchAsync<SearchDocument>(
 90 |         "*",
 91 |         options: new SearchOptions
 92 |         {
 93 |             Filter = SearchFilter.Create($"value lt {halfPoint}")
 94 |         }));
 95 | 
 96 | // Page through a query and compute statistics from specific values in the result
 97 | // Note that you cannot page through more than 100,000 values at a time.
 98 | // To learn more see https://learn.microsoft.com/dotnet/api/azure.search.documents.searchoptions.skip
 99 | async Task GetAggregateStatisticsUsingPaging(IEnumerable<double> sampleValues, SearchResults<SearchDocument> searchResults)
100 | {
101 |     var runningStatistics = new RunningStatistics();
102 |     double sum = 0;
103 |     AsyncPageable<SearchResult<SearchDocument>> resultPages = searchResults.GetResultsAsync();
104 |     await foreach (Page<SearchResult<SearchDocument>> results in resultPages.AsPages())
105 |     {
106 |         double[] pageValues = results.Values.Select(result => result.Document["value"]).Cast<double>().ToArray();
107 |         runningStatistics.PushRange(pageValues);
108 |         sum += pageValues.Sum();
109 |     }
110 | 
111 |     Console.WriteLine("Expected Count: {0}, Aggregated Count: {1}", sampleValues.Count(), runningStatistics.Count);
112 |     Console.WriteLine("Expected Average: {0:.##}, Aggregated Average: {1:.##}", sampleValues.Average(), runningStatistics.Mean);
113 |     Console.WriteLine("Expected Min: {0:.##}, Aggregated Min: {1:.##}", sampleValues.Min(), runningStatistics.Minimum);
114 |     Console.WriteLine("Expected Max: {0:.##}, Aggregated Max: {1:.##}", sampleValues.Max(), runningStatistics.Maximum);
115 |     Console.WriteLine("Expected Sum: {0:.##}, Aggregated Sum: {1:.##}", sampleValues.Sum(), sum);
116 | }
117 | 


--------------------------------------------------------------------------------
/search-aggregations/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | page_type: sample
 3 | languages:
 4 |   - csharp
 5 | name: Compute aggregations over a search index
 6 | description: "Compute aggregations such as mean over a search index using a query."
 7 | products:
 8 |   - azure
 9 |   - azure-cognitive-search
10 | ---
11 | 
12 | # Compute aggregations over a search index
13 | 
14 | ![Flask sample MIT license badge](https://img.shields.io/badge/license-MIT-green.svg)
15 | 
16 | In this sample, create a simple search index and upload random data to it. This sample illustrates how aggregations can be computed from the random data, and how the data can be filtered using a query.
17 | 
18 | ## Prerequisites
19 | 
20 | + [Azure AI Search](search-create-app-portal.md)
21 | + [Visual Studio](https://visualstudio.microsoft.com/downloads/)
22 | + [Azure.Search.Documents NuGet package](https://www.nuget.org/packages/Azure.Search.Documents/)
23 | 
24 | In contrast with other tutorials, this one uses an index with randomly generated data. No preliminary service or index setup is required.
25 | 
26 | ## Setup
27 | 
28 | 1. Clone or download this sample repository.
29 | 1. Extract contents if the download is a zip file. Make sure the files are read-write.
30 | 
31 | ## Run the sample
32 | 
33 | 1. Open a solution in Visual Studio.
34 | 
35 | 1. Modify **appsettings.json** to use your search service URI and admin API key. The URI is a full URL in the format of `https://<service-name>.search.windows.net`. The admin API key is an alphanumeric string that you can obtain from the portal, PowerShell, or CLI.
36 | 
37 |    ```json
38 |    {
39 |      "searchServiceUrl": "<YOUR-SEARCH-SERVICE-URI>",
40 |      "adminKey": "<YOUR-SEARCH-SERVICE-API-KEY>"
41 |    }
42 |    ```
43 | 
44 | 1. Press **F5** to compile and run the project.
45 | 
46 | ## Next steps
47 | 
48 | You can learn more about Azure AI Search on the [official documentation site](https://docs.microsoft.com/azure/search).


--------------------------------------------------------------------------------
/search-aggregations/appsettings.json:
--------------------------------------------------------------------------------
1 | {
2 |   "searchServiceUrl": "",
3 |   "adminKey": "",
4 |   "sampleValueCount": 1000,
5 |   "sampleValueMax": 100,
6 |   "sampleValueMin": 1,
7 |   "sampleIndexName":  "example-aggregations"
8 | }


--------------------------------------------------------------------------------
/search-aggregations/search-aggregations.csproj:
--------------------------------------------------------------------------------
 1 | <Project Sdk="Microsoft.NET.Sdk">
 2 | 
 3 |   <PropertyGroup>
 4 |     <OutputType>Exe</OutputType>
 5 |     <TargetFramework>net6.0</TargetFramework>
 6 |     <RootNamespace>search_aggregations</RootNamespace>
 7 |     <ImplicitUsings>enable</ImplicitUsings>
 8 |     <Nullable>disable</Nullable>
 9 |   </PropertyGroup>
10 | 
11 |   <ItemGroup>
12 |     <PackageReference Include="Azure.Search.Documents" Version="11.4.0" />
13 |     <PackageReference Include="MathNet.Numerics" Version="5.0.0" />
14 |     <PackageReference Include="Microsoft.Extensions.Configuration.Json" Version="7.0.0" />
15 |   </ItemGroup>
16 | 
17 |   <ItemGroup>
18 |     <None Update="appsettings.json">
19 |       <CopyToOutputDirectory>Always</CopyToOutputDirectory>
20 |     </None>
21 |   </ItemGroup>
22 | 
23 | </Project>
24 | 


--------------------------------------------------------------------------------