├── .vscode └── settings.json ├── 02-migrate ├── .env ├── create-azure-resources.ps1 └── create-mongodb-vcore-cluster.bicep ├── 03-manage ├── .env ├── create-azure-resources.ps1 ├── create-mongodb-vcore-cluster.bicep ├── node.js │ ├── Blobs │ │ └── loadData.js │ ├── Workload │ │ └── runRandomCRUD.js │ ├── load-data-run-workload.js │ └── package.json └── python │ ├── Blobs │ └── loadData.py │ ├── Workload │ └── runRandomCRUD.py │ └── load-data-run-workload.py ├── 04-vector-search ├── .env ├── create-azure-resources.ps1 ├── create-mongodb-vcore-cluster.bicep ├── node.js │ ├── Blobs │ │ ├── loadAndVectorize.js │ │ └── webDownload.js │ ├── Collections │ │ ├── customers.js │ │ ├── products.js │ │ └── salesOrders.js │ ├── SearchComponents │ │ ├── completion.js │ │ ├── embeddings.js │ │ ├── indexes.js │ │ ├── searches.js │ │ └── vectorSearch.js │ ├── load-and-vectorize-data.js │ └── package.json └── python │ ├── Blobs │ ├── loadAndVectorize.py │ └── webDownload.py │ ├── Collections │ ├── customers.py │ ├── products.py │ └── salesOrders.py │ ├── SearchComponents │ ├── completion.py │ ├── embeddings.py │ ├── indexes.py │ ├── searches.py │ └── vectorSearch.py │ └── load-and-vectorize-data.py ├── 05-deploy-with-aks ├── create-azure-resources.ps1 ├── create-mongodb-vcore-cluster.bicep └── node.js │ ├── .env │ ├── .gitignore │ ├── Blobs │ ├── loadAndVectorize.js │ └── webDownload.js │ ├── Collections │ ├── customers.js │ ├── products.js │ └── salesOrders.js │ ├── SearchComponents │ ├── completion.js │ ├── embeddings.js │ ├── indexes.js │ ├── searches.js │ └── vectorSearch.js │ ├── app.js │ ├── load-and-vectorize-data.js │ ├── package-lock.json │ ├── package.json │ └── public │ ├── images │ ├── bike-header.png │ ├── bike1.png │ ├── bike2.png │ ├── bike3.png │ ├── bike4.png │ ├── bike5.png │ ├── bike6.png │ ├── bike7.png │ ├── bike8.png │ └── bike9.png │ ├── index.html │ ├── options.js │ ├── script.js │ └── styles.css ├── 06-guided-project ├── create-azure-resources.ps1 ├── create-mongodb-vcore-cluster.bicep └── node.js │ ├── .env │ ├── .gitignore │ ├── Blobs │ ├── loadAndVectorize.js │ └── webDownload.js │ ├── Collections │ ├── customers.js │ ├── products.js │ └── salesOrders.js │ ├── SearchComponents │ ├── completion.js │ ├── embeddings.js │ ├── indexes.js │ ├── searches.js │ └── vectorSearch.js │ ├── app.js │ ├── load-and-vectorize-data.js │ ├── package-lock.json │ ├── package.json │ └── public │ ├── images │ ├── bike-header.png │ ├── bike1.png │ ├── bike2.png │ ├── bike3.png │ ├── bike4.png │ ├── bike5.png │ ├── bike6.png │ ├── bike7.png │ ├── bike8.png │ └── bike9.png │ ├── index.html │ ├── options.js │ ├── script.js │ └── styles.css ├── Instructions ├── 00-env-file.md ├── 00-powershell-script.md ├── 01-create-account.md ├── 02-migrate.md ├── 03-manage.md ├── 04-vector-search.md ├── 05-prepare-copilot-application.md ├── 06-create-azure-kubernetes-cluster.md ├── GenericLabSetup.md └── read.me ├── LICENSE ├── _config.yml ├── _layouts ├── default.html ├── home.html └── page.html ├── _sass ├── code.scss └── theme.scss ├── assets ├── Microsoft-logo_rgb_c-wht.png ├── css │ └── style.scss ├── js │ └── script.js └── microsoft-gray.png ├── data └── cosmicworks │ ├── customers.json │ ├── products.json │ └── salesOrders.json └── index.md /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "cSpell.words": [ 3 | "pymongo" 4 | ] 5 | } -------------------------------------------------------------------------------- /02-migrate/.env: -------------------------------------------------------------------------------- 1 | randomIdentifier= 2 | location= 3 | changeSubscription= 4 | subscriptionName= 5 | skipCreatingResourceGroup= 6 | resourceGroup= 7 | 8 | skipCreatingCosmosDBCluster= 9 | skipCreatingCosmosDBPublicIPFirewallRule= 10 | cosmosCluster= 11 | cosmosClusterLocation= 12 | cosmosDbEndpoint= 13 | cosmosClusterAdmin= 14 | cosmosClusterPassword= 15 | cosmosdbDatabase="cosmicworks" -------------------------------------------------------------------------------- /02-migrate/create-mongodb-vcore-cluster.bicep: -------------------------------------------------------------------------------- 1 | 2 | @description('Azure Cosmos DB MongoDB vCore cluster name') 3 | @maxLength(40) 4 | param clusterName string = '' //= 'msdocs-${uniqueString(resourceGroup().id)}' 5 | 6 | @description('Location for the cluster.') 7 | param location string = '' //= resourceGroup().location 8 | 9 | @description('Username for admin user') 10 | param adminUsername string = '' 11 | 12 | @description('Public IP address to allow access to the cluster') 13 | param publicIp string = '0.0.0.0' 14 | 15 | @description('Public IP address rule name for local access to the cluster') 16 | param publicIpRuleName string = 'labMachineIPAccessRule' 17 | 18 | @secure() 19 | @description('Password for admin user') 20 | //@minLength(8) 21 | @maxLength(128) 22 | param adminPassword string = '' 23 | 24 | resource cluster 'Microsoft.DocumentDB/mongoClusters@2023-03-01-preview' = { 25 | name: clusterName 26 | location: location 27 | properties: { 28 | administratorLogin: adminUsername 29 | administratorLoginPassword: adminPassword 30 | nodeGroupSpecs: [ 31 | { 32 | kind: 'Shard' 33 | nodeCount: 1 34 | sku: 'M30' 35 | diskSizeGB: 128 36 | enableHa: false 37 | } 38 | ] 39 | } 40 | } 41 | 42 | 43 | 44 | resource firewallRules 'Microsoft.DocumentDB/mongoClusters/firewallRules@2023-03-01-preview' = { 45 | parent: cluster 46 | name: 'AllowAllAzureServices' 47 | properties: { 48 | startIpAddress: '0.0.0.0' 49 | endIpAddress: '0.0.0.0' 50 | } 51 | } 52 | 53 | resource firewallRules_local_access 'Microsoft.DocumentDB/mongoClusters/firewallRules@2023-03-01-preview' = { 54 | parent: cluster 55 | name: publicIpRuleName 56 | properties: { 57 | startIpAddress: publicIp 58 | endIpAddress: publicIp 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /03-manage/.env: -------------------------------------------------------------------------------- 1 | randomIdentifier= 2 | location= 3 | changeSubscription= 4 | subscriptionName= 5 | skipCreatingResourceGroup= 6 | resourceGroup= 7 | 8 | skipCreatingCosmosDBCluster= 9 | skipCreatingCosmosDBPublicIPFirewallRule= 10 | cosmosCluster= 11 | cosmosClusterLocation= 12 | cosmosDbEndpoint= 13 | cosmosClusterAdmin= 14 | cosmosClusterPassword= 15 | cosmosdbDatabase="cosmicworks" 16 | 17 | skipCreatingStorageAccount= 18 | storageAccountName= 19 | storageAccountLocation= 20 | storageAccountSKU= 21 | storageAccountKind= 22 | 23 | skipCreatingLogAnalyticsWorkspace= 24 | logAnalyticsWorkspaceName= 25 | logAnalyticsWorkspaceLocation= -------------------------------------------------------------------------------- /03-manage/create-mongodb-vcore-cluster.bicep: -------------------------------------------------------------------------------- 1 | 2 | @description('Azure Cosmos DB MongoDB vCore cluster name') 3 | @maxLength(40) 4 | param clusterName string = '' //= 'msdocs-${uniqueString(resourceGroup().id)}' 5 | 6 | @description('Location for the cluster.') 7 | param location string = '' //= resourceGroup().location 8 | 9 | @description('Username for admin user') 10 | param adminUsername string = '' 11 | 12 | @description('Public IP address to allow access to the cluster') 13 | param publicIp string = '0.0.0.0' 14 | 15 | @description('Public IP address rule name for local access to the cluster') 16 | param publicIpRuleName string = 'labMachineIPAccessRule' 17 | 18 | @secure() 19 | @description('Password for admin user') 20 | //@minLength(8) 21 | @maxLength(128) 22 | param adminPassword string = '' 23 | 24 | resource cluster 'Microsoft.DocumentDB/mongoClusters@2023-03-01-preview' = { 25 | name: clusterName 26 | location: location 27 | properties: { 28 | administratorLogin: adminUsername 29 | administratorLoginPassword: adminPassword 30 | nodeGroupSpecs: [ 31 | { 32 | kind: 'Shard' 33 | nodeCount: 1 34 | sku: 'M40' 35 | diskSizeGB: 128 36 | enableHa: false 37 | } 38 | ] 39 | } 40 | } 41 | 42 | 43 | 44 | resource firewallRules 'Microsoft.DocumentDB/mongoClusters/firewallRules@2023-03-01-preview' = { 45 | parent: cluster 46 | name: 'AllowAllAzureServices' 47 | properties: { 48 | startIpAddress: '0.0.0.0' 49 | endIpAddress: '0.0.0.0' 50 | } 51 | } 52 | 53 | resource firewallRules_local_access 'Microsoft.DocumentDB/mongoClusters/firewallRules@2023-03-01-preview' = { 54 | parent: cluster 55 | name: publicIpRuleName 56 | properties: { 57 | startIpAddress: publicIp 58 | endIpAddress: publicIp 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /03-manage/node.js/Blobs/loadData.js: -------------------------------------------------------------------------------- 1 | // Importing required modules 2 | const fs = require('fs'); 3 | const path = require('path'); 4 | const mongodb = require('mongodb'); 5 | const { MongoClient, updateOne } = require('mongodb'); 6 | 7 | // Function to load the local blob data to MongoDB cluster 8 | async function loadLocalBlobDataToMongoDBCluster(client, dataFolder, cosmosDbMongoDbDatabase, batchSize) { 9 | // Read JSON documents from the data folder 10 | const localBlobsFiles = fs.readdirSync(dataFolder); 11 | 12 | // Loop through each file in the data folder 13 | for (const blobFile of localBlobsFiles) { 14 | let batchNumber = 1; 15 | 16 | // Process only JSON files 17 | if (blobFile.includes('.json')) { 18 | console.log(`\n(${new Date().toISOString()}) ${blobFile}`); 19 | 20 | // Read the content of the file and parse it as JSON 21 | const fileContent = fs.readFileSync(path.join(dataFolder, blobFile), 'utf-8'); 22 | const jsonData = JSON.parse(fileContent); 23 | 24 | const totalNumberOfDocuments = jsonData.length; 25 | 26 | // Process only if there are documents in the JSON file 27 | if (totalNumberOfDocuments >= 0) { 28 | // Get the collection name from the file name 29 | const collectionName = blobFile.split(".json")[0]; 30 | 31 | // Get the database and the collection 32 | const db = client.db(cosmosDbMongoDbDatabase); 33 | const collection = db.collection(collectionName); 34 | let currentDocIdx = 0; 35 | 36 | let operations = []; 37 | 38 | // Loop through each document in the JSON file 39 | for (let doc of jsonData) { 40 | currentDocIdx++; 41 | 42 | // Prepare the update operation for the document 43 | operations.push({ 44 | updateOne: { 45 | filter: { "_id": doc["_id"] }, 46 | update: { "$set": doc }, 47 | upsert: true 48 | } 49 | }); 50 | 51 | // Write the operations to the database in batches 52 | if (operations.length === batchSize) { 53 | console.log(`\tWriting collection ${collectionName}, batch size ${batchSize}, batch ${batchNumber}, number of documents processed so far ${currentDocIdx}.`); 54 | await collection.bulkWrite(operations, { ordered: false }); 55 | operations = []; 56 | batchNumber++; 57 | } 58 | } 59 | 60 | // Write any remaining operations to the database 61 | if (operations.length > 0) { 62 | console.log(`\tWriting collection ${collectionName}, batch size ${batchSize}, batch ${batchNumber}, number of documents processed so far ${currentDocIdx}.`); 63 | await collection.bulkWrite(operations, { ordered: false }); 64 | } 65 | 66 | console.log(`(${new Date().toISOString()}) Collection ${collectionName}, total number of documents processed ${currentDocIdx} .\n`); 67 | } 68 | } 69 | } 70 | } 71 | 72 | // Export the function 73 | module.exports.loadLocalBlobDataToMongoDBCluster = loadLocalBlobDataToMongoDBCluster; -------------------------------------------------------------------------------- /03-manage/node.js/Workload/runRandomCRUD.js: -------------------------------------------------------------------------------- 1 | const faker = require('faker'); 2 | const MongoClient = require('mongodb').MongoClient; 3 | 4 | 5 | process.on('message', async (message) => { 6 | if (message.command === 'start') { 7 | const client = await MongoClient.connect(message.connectionString); 8 | runCRUDOperation(client, message.database); 9 | } else if (message.command === 'stop') { 10 | process.exit(); 11 | } 12 | }); 13 | 14 | // Function to create a random customer document and insert it into the 'customers' collection 15 | async function createRandomCustomer(db) { 16 | // Generate a fake customer document 17 | const customer = { 18 | type: "customer", 19 | customerId: faker.datatype.uuid(), 20 | firstName: faker.name.firstName(), 21 | lastName: faker.name.lastName(), 22 | emailAddress: faker.internet.email(), 23 | phoneNumber: faker.phone.phoneNumber(), 24 | creationDate: faker.date.past(), 25 | addresses: [ 26 | { 27 | addressLine1: faker.address.streetAddress(), 28 | city: faker.address.city(), 29 | state: faker.address.state(), 30 | country: faker.address.country(), 31 | zipCode: faker.address.zipCode(), 32 | } 33 | ], 34 | password: { 35 | hash: faker.datatype.uuid(), 36 | salt: faker.datatype.uuid(), 37 | }, 38 | salesOrderCount: faker.datatype.number({ min: 1, max: 20 }), 39 | }; 40 | await db.collection('customers').insertOne(customer); 41 | } 42 | 43 | // Function to create a random product document and insert it into the 'products' collection 44 | async function createRandomProduct(db) { 45 | // Generate a fake product document 46 | const product = { 47 | categoryId: faker.datatype.uuid(), 48 | categoryName: faker.company.catchPhrase(), 49 | sku: faker.datatype.uuid(), 50 | name: faker.company.catchPhrase(), 51 | description: faker.lorem.sentence(), 52 | price: faker.datatype.float({ min: 10, max: 1000 }), 53 | tags: [ 54 | { 55 | _id: faker.datatype.uuid(), 56 | name: faker.lorem.word(), 57 | } 58 | ], 59 | }; 60 | await db.collection('products').insertOne(product); 61 | } 62 | 63 | // Function to create a random sales order document and insert it into the 'salesOrders' collection 64 | async function createRandomSalesOrder(db) { 65 | // Generate a fake sales order document 66 | const sales_order = { 67 | type: "salesOrder", 68 | customerId: faker.datatype.uuid(), 69 | orderDate: faker.date.past(), 70 | shipDate: faker.date.future(), 71 | details: Array.from({ length: faker.datatype.number({ min: 1, max: 10 }) }, () => ({ 72 | sku: faker.datatype.uuid(), 73 | name: faker.company.catchPhrase(), 74 | price: faker.datatype.float({ min: 10, max: 1000 }), 75 | quantity: faker.datatype.number({ min: 1, max: 10 }), 76 | })), 77 | }; 78 | await db.collection('salesOrders').insertOne(sales_order); 79 | } 80 | 81 | // Function to run a random CRUD operation on a random collection 82 | async function runRandomCRUDOperation(client, cosmos_db_mongodb_database) { 83 | const db = client.db(cosmos_db_mongodb_database); 84 | 85 | // Create threads for performing CRUD operations on random collections 86 | const collections = ['customers', 'products', 'salesOrders']; 87 | const num_collections = faker.datatype.number({ min: 1, max: collections.length }); // Random number of collections 88 | const selectedCollections = faker.helpers.shuffle(collections).slice(0, num_collections); 89 | 90 | for (const collection of selectedCollections) { 91 | await performRandomCRUDOnCollection(db, collection); 92 | } 93 | 94 | // Wait for a random time between 1 and 5 seconds 95 | await new Promise(resolve => setTimeout(resolve, faker.datatype.float({ min: 1000, max: 5000 }))); 96 | } 97 | 98 | // Function to perform a random CRUD operation on a collection 99 | async function performRandomCRUDOnCollection(db, collection) { 100 | const operation = faker.random.arrayElement(['create', 'read', 'update', 'delete']); 101 | 102 | // Perform the chosen CRUD operation 103 | if (operation === 'create') { 104 | if (collection === 'customers') { 105 | await createRandomCustomer(db); 106 | } else if (collection === 'products') { 107 | await createRandomProduct(db); 108 | } else if (collection === 'salesOrders') { 109 | await createRandomSalesOrder(db); 110 | } 111 | } else if (operation === 'read') { 112 | await db.collection(collection).findOne(); 113 | } else if (operation === 'update') { 114 | const document = await db.collection(collection).findOne(); 115 | if (document) { 116 | await db.collection(collection).updateOne({ _id: document._id }, { $set: { name: faker.company.catchPhrase() } }); 117 | } 118 | } else if (operation === 'delete') { 119 | const document = await db.collection(collection).findOne(); 120 | if (document) { 121 | await db.collection(collection).deleteOne({ _id: document._id }); 122 | } 123 | } 124 | } 125 | 126 | // Function to continuously run CRUD operations until the user presses the 'esc' key or 'q' 127 | async function runCRUDOperation(client, cosmos_db_mongodb_database) { 128 | console.log("Starting CRUD operations. Press 'q' to stop."); 129 | 130 | // Continuously run CRUD operations until the stop event is set 131 | while (true) { 132 | await runRandomCRUDOperation(client, cosmos_db_mongodb_database); 133 | } 134 | 135 | console.log("CRUD operations stopped."); 136 | } 137 | 138 | // Export the functions 139 | module.exports.runCRUDOperation = runCRUDOperation; -------------------------------------------------------------------------------- /03-manage/node.js/load-data-run-workload.js: -------------------------------------------------------------------------------- 1 | // Import necessary modules 2 | const LoadData = require('./Blobs/loadData'); 3 | const runRandomCRUD = require('./Workload/runRandomCRUD'); 4 | const dotenv = require('dotenv'); 5 | const MongoClient = require('mongodb').MongoClient; 6 | 7 | const readlineSync = require('readline-sync'); 8 | const { fork } = require('child_process'); 9 | 10 | // Load environment variables 11 | dotenv.config({ path: '../.env' }); 12 | 13 | let client; 14 | 15 | async function main() { 16 | // Define constants 17 | const data_folder = "../../data/cosmicworks/"; 18 | const batch_size = 1000; 19 | 20 | try { 21 | // Get Configuration Settings from environment variables 22 | let cosmosdb_connection_string = process.env.cosmosDbEndpoint; 23 | const cosmos_db_mongodb_database = process.env.cosmosdbDatabase; 24 | const cosmos_mongo_user = process.env.cosmosClusterAdmin; 25 | const cosmos_mongo_pwd = process.env.cosmosClusterPassword; 26 | 27 | // Replace placeholders in the connection string with actual values 28 | cosmosdb_connection_string = cosmosdb_connection_string.replace("", encodeURIComponent(cosmos_mongo_user)); 29 | cosmosdb_connection_string = cosmosdb_connection_string.replace("", encodeURIComponent(cosmos_mongo_pwd)); 30 | 31 | // Connect to MongoDB server 32 | const client = new MongoClient(cosmosdb_connection_string); 33 | await client.connect(); 34 | 35 | let userInput = ""; 36 | while (userInput !== "0") { 37 | console.clear(); 38 | console.log("Please select an option:"); 39 | console.log("\t1. Load local data into MongoDB and create vector index."); 40 | console.log("\t2. Run workload on Database."); 41 | console.log("\t0. End"); 42 | userInput = readlineSync.keyIn("Option: "); 43 | 44 | // Handle user input 45 | if (userInput === "0") { 46 | process.exit(0); 47 | } else if (!["1", "2"].includes(userInput)) { 48 | console.log("Invalid option. Please try again."); 49 | continue; 50 | } 51 | 52 | console.log(`\nYou selected option ${userInput}.\n`); 53 | 54 | // Load data into MongoDB and create vector index if user selected option 1 or 2 55 | if (userInput === "1") { 56 | await LoadData.loadLocalBlobDataToMongoDBCluster(client, data_folder, cosmos_db_mongodb_database, batch_size); 57 | } 58 | 59 | // Run a vector search if user selected option 3 60 | if (userInput === "2") { 61 | const child = fork('./Workload/runRandomCRUD.js'); 62 | 63 | child.send({ 64 | command: 'start', 65 | connectionString: cosmosdb_connection_string, // Pass the connection string, not the client 66 | database: cosmos_db_mongodb_database 67 | }); 68 | 69 | // Listen for key presses in the main process 70 | readLineInput = readlineSync.keyIn('', {hideEchoBack: true, mask: '', limit: 'qesc'}) 71 | if (readLineInput) { 72 | // If 'q' or 'esc' is pressed, send a message to the child process to stop 73 | child.send({ command: 'stop' }); 74 | } 75 | } 76 | 77 | console.log("\nPress Enter to continue..."); 78 | readlineSync.question(""); 79 | } 80 | } catch (ex) { 81 | // Log any errors 82 | console.error(ex); 83 | } finally { 84 | if (client) { 85 | await client.close(); 86 | } 87 | } 88 | } 89 | 90 | // Run the main function 91 | main(); -------------------------------------------------------------------------------- /03-manage/node.js/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "load-data-run-workload", 3 | "version": "1.0.0", 4 | "description": "Create a Node.js app that loads data into an Azure Cosmos DB MongoDB database and runs a workload against that data.", 5 | "main": "load-data-run-workload.js", 6 | "scripts": { 7 | "start": "node load-data-run-workload.js" 8 | }, 9 | "dependencies": { 10 | "@azure/storage-blob": "12.17.0", 11 | "async-retry": "1.3.3", 12 | "dotenv": "16.4.5", 13 | "faker": "5.5.3", 14 | "mongodb": "6.4.0", 15 | "readline-sync": "^1.4.10", 16 | "child_process": "^1.0.2" 17 | } 18 | } -------------------------------------------------------------------------------- /03-manage/python/Blobs/loadData.py: -------------------------------------------------------------------------------- 1 | # Import necessary modules 2 | import os 3 | import json 4 | import datetime 5 | from pymongo import UpdateOne 6 | 7 | # Function to load local blob data to MongoDB cluster 8 | def loadLocalBlobDataToMongoDBCluster(client, data_folder,cosmos_db_mongodb_database,batch_size): 9 | # Get list of files in the data folder 10 | local_blobs_files = os.listdir(data_folder) 11 | 12 | # Iterate over each file in the folder 13 | for blob_file in local_blobs_files: 14 | batch_number = 1 15 | 16 | # Process only JSON files 17 | if blob_file.find(".json") >= 0: 18 | print("\n(" + str(datetime.datetime.now()) + ") " + blob_file) 19 | 20 | # Open the file and load its content 21 | with open(data_folder+blob_file, 'r') as file: 22 | file_content = file.read() 23 | json_data = json.loads(file_content) 24 | 25 | # Get the total number of documents in the file 26 | total_number_of_documents = len(json_data) 27 | 28 | if total_number_of_documents >= 0: 29 | # Get the collection name from the file name 30 | collection_name = blob_file[:blob_file.find(".json")] 31 | 32 | # Get the database and collection 33 | db = client[cosmos_db_mongodb_database] 34 | collection = db[collection_name] 35 | current_doc_idx = 0 36 | 37 | operations = [] 38 | 39 | # Iterate over each document in the JSON data 40 | for doc in json_data: 41 | current_doc_idx = current_doc_idx + 1 42 | 43 | # Prepare the update operation for the document 44 | operations.append(UpdateOne({"_id": doc["_id"]},{"$set": doc}, upsert=True)) 45 | 46 | # Write to the collection in batches 47 | if (len(operations) == batch_size): 48 | print(f"\tWriting collection {collection_name}, batch size {batch_size}, batch {batch_number}, number of documents processed so far {current_doc_idx}.") 49 | collection.bulk_write(operations,ordered=False) 50 | operations = [] 51 | batch_number = batch_number + 1 52 | 53 | # Write any remaining operations to the collection 54 | if (len(operations) > 0): 55 | print(f"\tWriting collection {collection_name}, batch size {batch_size}, batch {batch_number}, number of documents processed so far {current_doc_idx}.") 56 | collection.bulk_write(operations,ordered=False) 57 | 58 | print(f"(" + str(datetime.datetime.now()) + ") " + f"Collection {collection_name}, total number of documents processed {current_doc_idx} .\n") 59 | -------------------------------------------------------------------------------- /03-manage/python/Workload/runRandomCRUD.py: -------------------------------------------------------------------------------- 1 | import pymongo 2 | import random 3 | import time 4 | import threading 5 | from faker import Faker 6 | import keyboard 7 | 8 | fake = Faker() # Create a Faker instance for generating fake data 9 | 10 | # Function to create a random customer document and insert it into the 'customers' collection 11 | def createRandomCustomer(db): 12 | # Generate a fake customer document 13 | customer = { 14 | "type": "customer", 15 | "customerId": fake.uuid4(), 16 | "firstName": fake.first_name(), 17 | "lastName": fake.last_name(), 18 | "emailAddress": fake.email(), 19 | "phoneNumber": fake.phone_number(), 20 | "creationDate": fake.date_time_this_decade(), 21 | "addresses": [ 22 | { 23 | "addressLine1": fake.street_address(), 24 | "city": fake.city(), 25 | "state": fake.state(), 26 | "country": fake.country(), 27 | "zipCode": fake.zipcode(), 28 | } 29 | ], 30 | "password": { 31 | "hash": fake.sha256(), 32 | "salt": fake.sha1(), 33 | }, 34 | "salesOrderCount": random.randint(1, 20), 35 | } 36 | db.customers.insert_one(customer) 37 | 38 | # Function to create a random product document and insert it into the 'products' collection 39 | def createRandomProduct(db): 40 | # Generate a fake product document 41 | product = { 42 | "categoryId": fake.uuid4(), 43 | "categoryName": fake.catch_phrase(), 44 | "sku": fake.bothify(text='??-####', letters='ABCDEFGHIJKLMNOPQRSTUVWXYZ'), 45 | "name": fake.catch_phrase(), 46 | "description": fake.sentence(), 47 | "price": random.uniform(10, 1000), 48 | "tags": [ 49 | { 50 | "_id": fake.uuid4(), 51 | "name": fake.word(), 52 | } 53 | ], 54 | } 55 | db.products.insert_one(product) 56 | 57 | # Function to create a random sales order document and insert it into the 'salesOrders' collection 58 | def createRandomSalesOrder(db): 59 | # Generate a fake sales order document 60 | sales_order = { 61 | "type": "salesOrder", 62 | "customerId": fake.uuid4(), 63 | "orderDate": fake.date_time_this_decade(), 64 | "shipDate": fake.date_time_this_decade(), 65 | "details": [ 66 | { 67 | "sku": fake.bothify(text='??-####', letters='ABCDEFGHIJKLMNOPQRSTUVWXYZ'), 68 | "name": fake.catch_phrase(), 69 | "price": random.uniform(10, 1000), 70 | "quantity": random.randint(1, 10), 71 | } for _ in range(random.randint(1, 10)) 72 | ], 73 | } 74 | db.salesOrders.insert_one(sales_order) 75 | 76 | # Function to run a random CRUD operation on a random collection 77 | def runRandomCRUDOperation(client, cosmos_db_mongodb_database): 78 | db = client[cosmos_db_mongodb_database] 79 | 80 | # Create threads for performing CRUD operations on random collections 81 | threads = [] 82 | collections = ['customers', 'products', 'salesOrders'] 83 | num_collections = random.randint(1, len(collections)) # Random number of collections 84 | for collection in random.sample(collections, num_collections): 85 | t = threading.Thread(target=performRandomCRUDOnCollection, args=(db, collection,)) 86 | threads.append(t) 87 | t.start() 88 | 89 | # Wait for all threads to complete 90 | for t in threads: 91 | t.join() 92 | 93 | # Wait for a random time between 1 and 5 seconds 94 | time.sleep(random.uniform(1, 5)) 95 | 96 | # Function to perform a random CRUD operation on a collection 97 | def performRandomCRUDOnCollection(db, collection): 98 | operation = random.choice(['create', 'read', 'update', 'delete']) 99 | 100 | # Perform the chosen CRUD operation 101 | if operation == 'create': 102 | if collection == 'customers': 103 | createRandomCustomer(db) 104 | elif collection == 'products': 105 | createRandomProduct(db) 106 | elif collection == 'salesOrders': 107 | createRandomSalesOrder(db) 108 | elif operation == 'read': 109 | document = db[collection].find_one() 110 | elif operation == 'update': 111 | document = db[collection].find_one() 112 | if document: 113 | db[collection].update_one({'_id': document['_id']}, {'$set': {'name': fake.catch_phrase()}}) 114 | elif operation == 'delete': 115 | document = db[collection].find_one() 116 | if document: 117 | db[collection].delete_one({'_id': document['_id']}) 118 | 119 | # Function to continuously run CRUD operations until the user presses the 'esc' key or 'q' 120 | def runCRUDOperation(client, cosmos_db_mongodb_database): 121 | print("Starting CRUD operations. Press 'q' or 'esc' to stop.") 122 | stop_event = threading.Event() 123 | 124 | def on_press(e): 125 | if e.name == 'q' or e.name == 'esc': 126 | stop_event.set() # Set the stop event 127 | return False # Stop listener 128 | 129 | # Start the key press listener 130 | keyboard.on_press(on_press) 131 | 132 | # Continuously run CRUD operations until the stop event is set 133 | while not stop_event.is_set(): 134 | runRandomCRUDOperation(client, cosmos_db_mongodb_database) 135 | 136 | print("CRUD operations stopped.") -------------------------------------------------------------------------------- /03-manage/python/load-data-run-workload.py: -------------------------------------------------------------------------------- 1 | # Import necessary modules and functions 2 | import Blobs.loadData as loadData 3 | import Workload.runRandomCRUD as runRandomCRUD 4 | 5 | import os 6 | import os.path 7 | import urllib 8 | 9 | from dotenv import load_dotenv 10 | 11 | import pymongo 12 | 13 | def main(): 14 | # Define variables 15 | data_folder = "../../data/cosmicworks/" 16 | batch_size = 1000 17 | 18 | try: 19 | # Load environment variables from .env file 20 | load_dotenv("../.env") 21 | cosmosdb_connection_string = os.getenv('cosmosDbEndpoint') 22 | 23 | cosmos_db_mongodb_database = os.getenv('cosmosdbDatabase') 24 | 25 | cosmos_mongo_user = os.getenv('cosmosClusterAdmin') 26 | cosmos_mongo_pwd = os.getenv('cosmosClusterPassword') 27 | 28 | # Replace placeholders in the connection string with actual values 29 | cosmosdb_connection_string = cosmosdb_connection_string.replace("", urllib.parse.quote_plus(cosmos_mongo_user)) 30 | cosmosdb_connection_string = cosmosdb_connection_string.replace("", urllib.parse.quote_plus(cosmos_mongo_pwd)) 31 | 32 | # Connect to MongoDB server 33 | client = pymongo.MongoClient(cosmosdb_connection_string) 34 | 35 | # User interaction loop 36 | user_input = "" 37 | while user_input.lower() != "0": 38 | os.system('cls' if os.name == 'nt' else 'clear') 39 | print("Please select an option:") 40 | print("\t1. Load local data into MongoDB.") 41 | print("\t2. Run workload on Database.") 42 | print("\t0. End") 43 | user_input = input("Option: ") 44 | 45 | # Handle user input 46 | if user_input == "0": 47 | break 48 | elif user_input not in ["1", "2"]: 49 | print("Invalid option. Please try again.") 50 | continue 51 | 52 | # Load data into MongoDB and create vector index 53 | if user_input == "1": 54 | loadData.loadLocalBlobDataToMongoDBCluster(client, data_folder,cosmos_db_mongodb_database,batch_size) 55 | 56 | # Run a vector search 57 | if user_input == "2": 58 | runRandomCRUD.runCRUDOperation(client, cosmos_db_mongodb_database) 59 | 60 | print("\nPress Enter to continue...") 61 | input() 62 | 63 | except Exception as ex: 64 | print(ex) 65 | 66 | # Run the main function if the script is run as a standalone program 67 | if __name__ == "__main__": 68 | main() -------------------------------------------------------------------------------- /04-vector-search/.env: -------------------------------------------------------------------------------- 1 | randomIdentifier= 2 | location= 3 | changeSubscription= 4 | subscriptionName= 5 | skipCreatingResourceGroup= 6 | resourceGroup= 7 | 8 | skipCreatingCosmosDBCluster= 9 | skipCreatingCosmosDBPublicIPFirewallRule= 10 | cosmosCluster= 11 | cosmosClusterLocation= 12 | cosmosDbEndpoint= 13 | cosmosClusterAdmin= 14 | cosmosClusterPassword= 15 | cosmosdbDatabase="cosmicworks" 16 | 17 | skipCreatingAzureOpenAIAccount= 18 | cognitiveServicesKind="OpenAI" 19 | OpenAIAccount= 20 | OpenAIAccountLocation= 21 | OpenAIAccountSKU="s0" 22 | OpenAIEndpoint= 23 | OpenAIKey1= 24 | OpenAIVersion="2023-05-15" 25 | 26 | skipCreatingAzureOpenAIDeployment= 27 | OpenAIDeploymentName= 28 | OpenAIDeploymentModel="text-embedding-ada-002" 29 | OpenAIDeploymentModelFormat="OpenAI" 30 | OpenAIDeploymentModelVersion="2" 31 | OpenAIDeploymentSKU="Standard" 32 | OpenAIDeploymentSKUCapacity=100 33 | 34 | skipCreatingAzureOpenAICompletionDeployment= 35 | OpenAICompletionDeploymentName= 36 | OpenAICompletionDeploymentModel="gpt-35-turbo" 37 | OpenAICompletionDeploymentModelFormat="OpenAI" 38 | OpenAICompletionDeploymentModelVersion="0301" 39 | OpenAICompletionDeploymentSKU="Standard" 40 | OpenAICompletionDeploymentSKUCapacity=100 -------------------------------------------------------------------------------- /04-vector-search/create-mongodb-vcore-cluster.bicep: -------------------------------------------------------------------------------- 1 | 2 | @description('Azure Cosmos DB MongoDB vCore cluster name') 3 | @maxLength(40) 4 | param clusterName string = '' //= 'msdocs-${uniqueString(resourceGroup().id)}' 5 | 6 | @description('Location for the cluster.') 7 | param location string = '' //= resourceGroup().location 8 | 9 | @description('Username for admin user') 10 | param adminUsername string = '' 11 | 12 | @description('Public IP address to allow access to the cluster') 13 | param publicIp string = '0.0.0.0' 14 | 15 | @description('Public IP address rule name for local access to the cluster') 16 | param publicIpRuleName string = 'labMachineIPAccessRule' 17 | 18 | @secure() 19 | @description('Password for admin user') 20 | //@minLength(8) 21 | @maxLength(128) 22 | param adminPassword string = '' 23 | 24 | resource cluster 'Microsoft.DocumentDB/mongoClusters@2023-03-01-preview' = { 25 | name: clusterName 26 | location: location 27 | properties: { 28 | administratorLogin: adminUsername 29 | administratorLoginPassword: adminPassword 30 | nodeGroupSpecs: [ 31 | { 32 | kind: 'Shard' 33 | nodeCount: 1 34 | sku: 'M30' 35 | diskSizeGB: 128 36 | enableHa: false 37 | } 38 | ] 39 | } 40 | } 41 | 42 | 43 | 44 | resource firewallRules 'Microsoft.DocumentDB/mongoClusters/firewallRules@2023-03-01-preview' = { 45 | parent: cluster 46 | name: 'AllowAllAzureServices' 47 | properties: { 48 | startIpAddress: '0.0.0.0' 49 | endIpAddress: '0.0.0.0' 50 | } 51 | } 52 | 53 | resource firewallRules_local_access 'Microsoft.DocumentDB/mongoClusters/firewallRules@2023-03-01-preview' = { 54 | parent: cluster 55 | name: publicIpRuleName 56 | properties: { 57 | startIpAddress: publicIp 58 | endIpAddress: publicIp 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /04-vector-search/node.js/Blobs/loadAndVectorize.js: -------------------------------------------------------------------------------- 1 | // Importing required modules 2 | const fs = require('fs'); 3 | const path = require('path'); 4 | const mongodb = require('mongodb'); 5 | const { MongoClient, updateOne } = require('mongodb'); 6 | 7 | // Importing custom modules 8 | const Customers = require('../Collections/customers'); 9 | const Products = require('../Collections/products'); 10 | const SalesOrders = require('../Collections/salesOrders'); 11 | const Indexes = require('../SearchComponents/indexes'); 12 | 13 | // Function to load and vectorize local blob data to MongoDB cluster 14 | async function loadAndVectorizeLocalBlobDataToMongoDBCluster(client, dataFolder, cosmosDbMongoDbDatabase, batchSize, embeddingsDeployment, AzureOpenAIClient, processCustomersVector, processProductsVector, processSalesOrdersVector) { 15 | // Read JSON documents from the data folder 16 | const localBlobsFiles = fs.readdirSync(dataFolder); 17 | 18 | // Loop through each file in the data folder 19 | for (const blobFile of localBlobsFiles) { 20 | let batchNumber = 1; 21 | 22 | // Process only JSON files 23 | if (blobFile.includes('.json')) { 24 | console.log(`\n(${new Date().toISOString()}) ${blobFile}`); 25 | 26 | // Read the content of the file and parse it as JSON 27 | const fileContent = fs.readFileSync(path.join(dataFolder, blobFile), 'utf-8'); 28 | const jsonData = JSON.parse(fileContent); 29 | 30 | const totalNumberOfDocuments = jsonData.length; 31 | 32 | // Process only if there are documents in the JSON file 33 | if (totalNumberOfDocuments >= 0) { 34 | // Get the collection name from the file name 35 | const collectionName = blobFile.split(".json")[0]; 36 | 37 | // Get the database and the collection 38 | const db = client.db(cosmosDbMongoDbDatabase); 39 | const collection = db.collection(collectionName); 40 | let currentDocIdx = 0; 41 | 42 | let operations = []; 43 | 44 | let indexList = []; 45 | 46 | // Loop through each document in the JSON file 47 | for (let doc of jsonData) { 48 | currentDocIdx++; 49 | 50 | // Generate embeddings for the document based on the collection name 51 | if (collectionName === "customers" && processCustomersVector) { 52 | doc = await Customers.generateCustomerEmbedding(doc, embeddingsDeployment, AzureOpenAIClient); 53 | } else if (collectionName === "products" && processProductsVector) { 54 | doc = await Products.generateProductEmbedding(doc, embeddingsDeployment, AzureOpenAIClient); 55 | } else if (collectionName === "salesOrders" && processSalesOrdersVector) { 56 | doc = await SalesOrders.generateSalesOrderEmbedding(doc, embeddingsDeployment, AzureOpenAIClient); 57 | } 58 | 59 | // Log the progress for every 100 documents processed 60 | if (currentDocIdx % 100 === 0 && ((processCustomersVector && collectionName === "customers") || (processProductsVector && collectionName === "products") || (processSalesOrdersVector && collectionName === "salesOrders"))) { 61 | console.log(`\t${currentDocIdx} out of ${totalNumberOfDocuments} docs vectorized.`); 62 | } 63 | 64 | // Prepare the update operation for the document 65 | operations.push({ 66 | updateOne: { 67 | filter: { "_id": doc["_id"] }, 68 | update: { "$set": doc }, 69 | upsert: true 70 | } 71 | }); 72 | 73 | // Write the operations to the database in batches 74 | if (operations.length === batchSize) { 75 | console.log(`\tWriting collection ${collectionName}, batch size ${batchSize}, batch ${batchNumber}, number of documents processed so far ${currentDocIdx}.`); 76 | await collection.bulkWrite(operations, { ordered: false }); 77 | operations = []; 78 | batchNumber++; 79 | } 80 | } 81 | 82 | // Log the completion of vectorization 83 | if ((processCustomersVector && collectionName === "customers") || (processProductsVector && collectionName === "products") || (processSalesOrdersVector && collectionName === "salesOrders")) { 84 | console.log(`\t${totalNumberOfDocuments} out of ${totalNumberOfDocuments} docs vectorized.`); 85 | } 86 | 87 | // Write any remaining operations to the database 88 | if (operations.length > 0) { 89 | console.log(`\tWriting collection ${collectionName}, batch size ${batchSize}, batch ${batchNumber}, number of documents processed so far ${currentDocIdx}.`); 90 | await collection.bulkWrite(operations, { ordered: false }); 91 | } 92 | 93 | console.log(`(${new Date().toISOString()}) Collection ${collectionName}, total number of documents processed ${currentDocIdx} .\n`); 94 | 95 | // Replace this line with the lab's code 96 | 97 | } 98 | } 99 | } 100 | } 101 | 102 | // Export the function 103 | module.exports.loadAndVectorizeLocalBlobDataToMongoDBCluster = loadAndVectorizeLocalBlobDataToMongoDBCluster; -------------------------------------------------------------------------------- /04-vector-search/node.js/Blobs/webDownload.js: -------------------------------------------------------------------------------- 1 | // Import the BlobServiceClient from the Azure Storage Blob package 2 | const { BlobServiceClient } = require('@azure/storage-blob'); 3 | 4 | // Import the file system module 5 | const fs = require('fs'); 6 | 7 | // Import the path module 8 | const path = require('path'); 9 | 10 | // Define an asynchronous function to download files from Azure Blob Storage if they don't exist locally 11 | async function downloadFilesFromBlobIfTheyDontExist(accountUrl, containerName, dataFolder) { 12 | // Create a new BlobServiceClient 13 | const blobServiceClient = new BlobServiceClient(accountUrl); 14 | 15 | // Get a ContainerClient for the specified container 16 | const containerClient = blobServiceClient.getContainerClient(containerName); 17 | 18 | // List all blobs in the container 19 | let blobs = containerClient.listBlobsFlat(); 20 | 21 | // Iterate over each blob 22 | for await (const blob of blobs) { 23 | // Construct the local file path 24 | const filePath = path.join(dataFolder, blob.name); 25 | 26 | // Check if the file already exists locally 27 | if (!fs.existsSync(filePath)) { 28 | // If the file doesn't exist locally, download it from Azure Blob Storage 29 | 30 | // Get a BlobClient for the blob 31 | const blobClient = containerClient.getBlobClient(blob.name); 32 | 33 | // Download the blob 34 | const downloadBlockBlobResponse = await blobClient.download(0); 35 | 36 | // Create a write stream for the local file 37 | const fileStream = fs.createWriteStream(filePath); 38 | 39 | // Pipe the downloaded blob to the file stream 40 | downloadBlockBlobResponse.readableStreamBody.pipe(fileStream); 41 | } 42 | } 43 | } 44 | 45 | // Export the function 46 | module.exports.downloadFilesFromBlobIfTheyDontExist = downloadFilesFromBlobIfTheyDontExist; -------------------------------------------------------------------------------- /04-vector-search/node.js/Collections/customers.js: -------------------------------------------------------------------------------- 1 | // Import the Embeddings module from the SearchComponents directory 2 | const Embeddings = require('../SearchComponents/embeddings'); 3 | 4 | // Function to convert a list of address objects into a single string 5 | function getCustomerAddresses(addresses) { 6 | let addressesString = ""; 7 | 8 | // Iterate over each address in the list 9 | for (let idx = 0; idx < addresses.length; idx++) { 10 | const address = addresses[idx]; 11 | // Concatenate each address field into the addressesString 12 | addressesString += (idx > 0 ? "; " : "") + 13 | (address.addressLine1 ? "Address Line - " + address.addressLine1 : "") + 14 | (address.addressLine2 ? " " + address.addressLine2 : "") + 15 | (address.city ? ", city - " + address.city : "") + 16 | (address.state ? ", state - " + address.state : "") + 17 | (address.country ? ", country - " + address.country : "") + 18 | (address.zipCode ? ", zipcode - " + address.zipCode : "") + 19 | (address.location ? ", location - " + address.location : ""); 20 | } 21 | 22 | // Return the concatenated string of addresses 23 | return addressesString; 24 | } 25 | 26 | // Asynchronous function to generate embeddings for various customer fields 27 | async function generateCustomerEmbedding(customer, embeddingsDeployment, AzureOpenAIClient) { 28 | // If the customer has a type, generate an embedding for it 29 | if (customer.type) { 30 | customer.customerTypeVector = await Embeddings.generateEmbeddings(customer.type, embeddingsDeployment, AzureOpenAIClient); 31 | } 32 | 33 | // If the customer has a title, generate an embedding for it 34 | if (customer.title) { 35 | customer.customerTitleVector = await Embeddings.generateEmbeddings(customer.title, embeddingsDeployment, AzureOpenAIClient); 36 | } 37 | 38 | // If the customer has a first and last name, generate an embedding for it 39 | if (customer.firstName && customer.lastName) { 40 | customer.customerNameVector = await Embeddings.generateEmbeddings(customer.firstName + " " + customer.lastName, embeddingsDeployment, AzureOpenAIClient); 41 | } 42 | 43 | // If the customer has an email address, generate an embedding for it 44 | if (customer.emailAddress) { 45 | customer.customerEmailAddressVector = await Embeddings.generateEmbeddings(customer.emailAddress, embeddingsDeployment, AzureOpenAIClient); 46 | } 47 | 48 | // If the customer has a phone number, generate an embedding for it 49 | if (customer.phoneNumber) { 50 | customer.customerPhoneNumberVector = await Embeddings.generateEmbeddings(customer.phoneNumber, embeddingsDeployment, AzureOpenAIClient); 51 | } 52 | 53 | // Get the string representation of the customer's addresses 54 | const address = getCustomerAddresses(customer.addresses); 55 | // If the customer has addresses, generate an embedding for them 56 | if (address.length > 0) { 57 | customer.customerAddressesVector = await Embeddings.generateEmbeddings(address, embeddingsDeployment, AzureOpenAIClient); 58 | } 59 | 60 | // Return the customer object with the added embeddings 61 | return customer; 62 | } 63 | 64 | // Export the generateCustomerEmbedding function 65 | module.exports.generateCustomerEmbedding = generateCustomerEmbedding; -------------------------------------------------------------------------------- /04-vector-search/node.js/Collections/products.js: -------------------------------------------------------------------------------- 1 | // Import the Embeddings module from the SearchComponents directory 2 | const Embeddings = require('../SearchComponents/embeddings'); 3 | 4 | // Asynchronous function to generate an embedding for a product 5 | async function generateProductEmbedding(product, embeddingsDeployment, AzureOpenAIClient) { 6 | 7 | // Replace this line with the lab's code 8 | 9 | } 10 | 11 | // Export the generateProductEmbedding function 12 | module.exports.generateProductEmbedding = generateProductEmbedding; -------------------------------------------------------------------------------- /04-vector-search/node.js/Collections/salesOrders.js: -------------------------------------------------------------------------------- 1 | // Import the Embeddings module from the SearchComponents directory 2 | const Embeddings = require('../SearchComponents/embeddings'); 3 | 4 | // Function to convert a list of sales order details into a single string 5 | function getSalesOrderDetails(details) { 6 | let detailsString = ""; 7 | 8 | // Iterate over each detail in the list 9 | for (let idx = 0; idx < details.length; idx++) { 10 | const detail = details[idx]; 11 | // Concatenate each detail's SKU and name into the detailsString 12 | detailsString += (idx > 0 ? "; " : "") + 13 | (detail.sku ? detail.sku : "") + 14 | (detail.name ? "," + detail.name : ""); 15 | } 16 | 17 | // Return the concatenated string of details 18 | return detailsString; 19 | } 20 | 21 | // Asynchronous function to generate an embedding for a sales order 22 | async function generateSalesOrderEmbedding(salesOrder, embeddingsDeployment, AzureOpenAIClient) { 23 | // Get the string representation of the sales order's details 24 | const detail = getSalesOrderDetails(salesOrder.details); 25 | // If the detail string has content, generate an embedding for it 26 | if (detail.length > 0) { 27 | // The embedding is generated using the Embeddings module's generateEmbeddings function 28 | // The resulting embedding is stored in the salesOrder object under the key "salesOrderDetailVector" 29 | salesOrder.salesOrderDetailVector = await Embeddings.generateEmbeddings(detail, embeddingsDeployment, AzureOpenAIClient); 30 | } 31 | 32 | // Return the salesOrder object with the added embedding 33 | return salesOrder; 34 | } 35 | 36 | // Export the generateSalesOrderEmbedding function 37 | module.exports.generateSalesOrderEmbedding = generateSalesOrderEmbedding; -------------------------------------------------------------------------------- /04-vector-search/node.js/SearchComponents/completion.js: -------------------------------------------------------------------------------- 1 | // Asynchronous function to generate a completion for a given prompt 2 | const generateCompletion = async (prompt, completionDeployment, AzureOpenAICompletionClient, userInput) => { 3 | 4 | // Replace this line with the lab's code 5 | 6 | } 7 | 8 | // Export the generateCompletion function 9 | module.exports.generateCompletion = generateCompletion; -------------------------------------------------------------------------------- /04-vector-search/node.js/SearchComponents/embeddings.js: -------------------------------------------------------------------------------- 1 | // Import the async-retry module 2 | const retry = require('async-retry'); 3 | 4 | // Asynchronous function to generate embeddings from a string of text 5 | async function generateEmbeddings(text, embeddingsDeployment, AzureOpenAIClient) { 6 | 7 | // Replace this line with the lab's code 8 | 9 | } 10 | 11 | // Export the generateEmbeddings function 12 | module.exports.generateEmbeddings = generateEmbeddings; -------------------------------------------------------------------------------- /04-vector-search/node.js/SearchComponents/indexes.js: -------------------------------------------------------------------------------- 1 | // Asynchronous function to create vector indexes in a MongoDB collection 2 | async function createVectorIndexes(collection, indexList, db, collectionName) { 3 | 4 | // Replace this line with the lab's code 5 | 6 | } 7 | 8 | // Export the createVectorIndexes function 9 | module.exports.createVectorIndexes = createVectorIndexes; -------------------------------------------------------------------------------- /04-vector-search/node.js/SearchComponents/searches.js: -------------------------------------------------------------------------------- 1 | // Import the required modules 2 | const VectorSearch = require('../SearchComponents/vectorSearch'); 3 | const Completion = require('../SearchComponents/completion'); 4 | 5 | // Asynchronous function to run a vector search 6 | async function runVectorSearch(embeddingsDeployment, AzureOpenAIClient, client, cosmosDbMongodbDatabase, rl) { 7 | 8 | // Replace this line with the lab's code 9 | 10 | } 11 | 12 | // Asynchronous function to run a GPT-3 search 13 | async function runGPTSearch(embeddingsDeployment, AzureOpenAIClient, completionDeployment, client, cosmosDbMongodbDatabase, rl) { 14 | 15 | // Replace this line with the lab's code 16 | 17 | } 18 | 19 | // Export the runVectorSearch and runGPTSearch functions 20 | module.exports.runVectorSearch = runVectorSearch; 21 | module.exports.runGPTSearch = runGPTSearch; -------------------------------------------------------------------------------- /04-vector-search/node.js/SearchComponents/vectorSearch.js: -------------------------------------------------------------------------------- 1 | // Import the Embeddings module 2 | const Embeddings = require('../SearchComponents/embeddings'); 3 | 4 | // Asynchronous function to perform a vector search 5 | async function vectorSearch(query, vectorColumn, collection, embeddingsDeployment, AzureOpenAIClient, numResults = 3) { 6 | 7 | // Replace this line with the lab's code 8 | 9 | } 10 | 11 | // Export the vectorSearch function 12 | module.exports.vectorSearch = vectorSearch; -------------------------------------------------------------------------------- /04-vector-search/node.js/load-and-vectorize-data.js: -------------------------------------------------------------------------------- 1 | // Import necessary modules 2 | const Searches = require('./SearchComponents/searches'); 3 | const WebDownload = require('./Blobs/webDownload'); 4 | const LoadAndVectorize = require('./Blobs/loadAndVectorize'); 5 | const readline = require('readline'); 6 | const rl = readline.createInterface({ 7 | input: process.stdin, 8 | output: process.stdout 9 | }); 10 | const dotenv = require('dotenv'); 11 | const MongoClient = require('mongodb').MongoClient; 12 | const { AzureOpenAI } = require("openai"); 13 | const apiVersion = "2024-07-01-preview"; 14 | 15 | // Load environment variables 16 | dotenv.config({ path: '../.env' }); 17 | 18 | let client; 19 | 20 | async function main() { 21 | // Define constants 22 | const load_data_from_azure_blob = true; 23 | const azure_blob_account = "https://cosmosdbcosmicworks.blob.core.windows.net"; 24 | const blob_container = "cosmic-works-mongo-vcore"; 25 | const data_folder = "../../data/cosmicworks/"; 26 | const batch_size = 1000; 27 | const process_customers_vector = false; 28 | const process_products_vector = true; 29 | const process_sales_orders_vector = false; 30 | 31 | try { 32 | // Get Configuration Settings from environment variables 33 | let cosmosdb_connection_string = process.env.cosmosDbEndpoint; 34 | const cosmos_db_mongodb_database = process.env.cosmosdbDatabase; 35 | const cosmos_mongo_user = process.env.cosmosClusterAdmin; 36 | const cosmos_mongo_pwd = process.env.cosmosClusterPassword; 37 | const ai_endpoint = process.env.OpenAIEndpoint; 38 | const ai_key = process.env.OpenAIKey1; 39 | const embeddings_deployment = process.env.OpenAIDeploymentName; 40 | const completion_deployment = process.env.OpenAICompletionDeploymentName; 41 | 42 | // Initialize Azure OpenAI client 43 | const AzureOpenAIClient = new AzureOpenAI({endpoint: ai_endpoint, apiKey: ai_key, apiVersion: apiVersion}); 44 | 45 | // Replace placeholders in the connection string with actual values 46 | cosmosdb_connection_string = cosmosdb_connection_string.replace("", encodeURIComponent(cosmos_mongo_user)); 47 | cosmosdb_connection_string = cosmosdb_connection_string.replace("", encodeURIComponent(cosmos_mongo_pwd)); 48 | 49 | // Connect to MongoDB server 50 | const client = new MongoClient(cosmosdb_connection_string); 51 | await client.connect(); 52 | 53 | let userInput = ""; 54 | while (userInput !== "0") { 55 | console.clear(); 56 | console.log("Please select an option:"); 57 | console.log("\t1. Download data locally, load it into MongoDB and create vector index."); 58 | console.log("\t2. Load local data into MongoDB and create vector index."); 59 | console.log("\t3. Run a vector search"); 60 | console.log("\t4. Run a GPT search"); 61 | console.log("\t0. End"); 62 | userInput = await new Promise(resolve => rl.question("Option: ", resolve)); 63 | 64 | // Handle user input 65 | if (userInput === "0") { 66 | process.exit(0); 67 | } else if (!["1", "2", "3", "4"].includes(userInput)) { 68 | console.log("Invalid option. Please try again."); 69 | continue; 70 | } 71 | 72 | // Download data from Azure Blob if user selected option 1 73 | if (userInput === "1") { 74 | if (load_data_from_azure_blob) { 75 | await WebDownload.downloadFilesFromBlobIfTheyDontExist(azure_blob_account, blob_container, data_folder); 76 | } 77 | } 78 | 79 | // Load data into MongoDB and create vector index if user selected option 1 or 2 80 | if (userInput === "1" || userInput === "2") { 81 | await LoadAndVectorize.loadAndVectorizeLocalBlobDataToMongoDBCluster(client, data_folder, cosmos_db_mongodb_database, batch_size, embeddings_deployment, AzureOpenAIClient, process_customers_vector, process_products_vector, process_sales_orders_vector); 82 | } 83 | 84 | // Run a vector search if user selected option 3 85 | if (userInput === "3") { 86 | await Searches.runVectorSearch(embeddings_deployment, AzureOpenAIClient, client, cosmos_db_mongodb_database, rl); 87 | } 88 | 89 | // Run a GPT search if user selected option 4 90 | if (userInput === "4") { 91 | await Searches.runGPTSearch(embeddings_deployment, AzureOpenAIClient, completion_deployment, client, cosmos_db_mongodb_database, rl); 92 | } 93 | 94 | console.log("\nPress Enter to continue..."); 95 | await new Promise(resolve => rl.question("", resolve)); 96 | } 97 | } catch (ex) { 98 | // Log any errors 99 | console.error(ex); 100 | } finally { 101 | // Close readline interface and MongoDB client 102 | rl.close(); 103 | if (client) { 104 | await client.close(); 105 | } 106 | } 107 | } 108 | 109 | // Run the main function 110 | main(); 111 | -------------------------------------------------------------------------------- /04-vector-search/node.js/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "cosmosdb-vector-search", 3 | "version": "1.0.0", 4 | "description": "Create a Node.js app that loads data into an Azure Cosmos DB MongoDB database and creates and uses a Vector.", 5 | "main": "load-and-vectorize-data.js", 6 | "scripts": { 7 | "start": "node load-and-vectorize-data.js" 8 | }, 9 | "dependencies": { 10 | "mongodb": "6.4.0", 11 | "@azure/storage-blob": "12.17.0", 12 | "readline": "1.3.0", 13 | "async-retry": "1.3.3", 14 | "dotenv": "16.4.5" 15 | } 16 | } -------------------------------------------------------------------------------- /04-vector-search/python/Blobs/loadAndVectorize.py: -------------------------------------------------------------------------------- 1 | # Import necessary modules 2 | import os 3 | import json 4 | import datetime 5 | from pymongo import UpdateOne 6 | import Collections.customers as Customers 7 | import Collections.products as Products 8 | import Collections.salesOrders as SalesOrders 9 | import SearchComponents.indexes as Indexes 10 | 11 | # Function to load and vectorize local blob data to MongoDB cluster 12 | def loadAndVectorizeLocalBlobDataToMongoDBCluster(client, data_folder,cosmos_db_mongodb_database,batch_size,embeddings_deployment,AzureOpenAIClient, process_customers_vector, process_products_vector, process_sales_orders_vector): 13 | # Get list of files in the data folder 14 | local_blobs_files = os.listdir(data_folder) 15 | 16 | # Iterate over each file in the folder 17 | for blob_file in local_blobs_files: 18 | batch_number = 1 19 | 20 | # Process only JSON files 21 | if blob_file.find(".json") >= 0: 22 | print("\n(" + str(datetime.datetime.now()) + ") " + blob_file) 23 | 24 | # Open the file and load its content 25 | with open(data_folder+blob_file, 'r') as file: 26 | file_content = file.read() 27 | json_data = json.loads(file_content) 28 | 29 | # Get the total number of documents in the file 30 | total_number_of_documents = len(json_data) 31 | 32 | if total_number_of_documents >= 0: 33 | # Get the collection name from the file name 34 | collection_name = blob_file[:blob_file.find(".json")] 35 | 36 | # Get the database and collection 37 | db = client[cosmos_db_mongodb_database] 38 | collection = db[collection_name] 39 | current_doc_idx = 0 40 | 41 | operations = [] 42 | 43 | index_list = [] 44 | 45 | # Iterate over each document in the JSON data 46 | for doc in json_data: 47 | current_doc_idx = current_doc_idx + 1 48 | 49 | # Generate embeddings for the document based on the collection type 50 | if collection_name == "customers" and process_customers_vector: 51 | doc = Customers.generateCustomerEmbedding(doc,embeddings_deployment,AzureOpenAIClient) 52 | 53 | elif collection_name == "products" and process_products_vector: 54 | doc = Products.generateProductEmbedding(doc,embeddings_deployment,AzureOpenAIClient) 55 | 56 | elif collection_name == "salesOrders" and process_sales_orders_vector: 57 | doc = SalesOrders.generateSalesOrderEmbedding(doc,embeddings_deployment,AzureOpenAIClient) 58 | 59 | # Print progress for every 100 documents processed 60 | if current_doc_idx % 100 == 0 and ((process_customers_vector and collection_name == "customers") or (process_products_vector and collection_name == "products") or (process_sales_orders_vector and collection_name == "salesOrders")): 61 | print(f"\t{current_doc_idx} out of {total_number_of_documents} docs vectorized.") 62 | 63 | # Prepare the update operation for the document 64 | operations.append(UpdateOne({"_id": doc["_id"]},{"$set": doc}, upsert=True)) 65 | 66 | # Write to the collection in batches 67 | if (len(operations) == batch_size): 68 | print(f"\tWriting collection {collection_name}, batch size {batch_size}, batch {batch_number}, number of documents processed so far {current_doc_idx}.") 69 | collection.bulk_write(operations,ordered=False) 70 | operations = [] 71 | batch_number = batch_number + 1 72 | 73 | # Print the total number of documents vectorized 74 | if (process_customers_vector and collection_name == "customers") or (process_products_vector and collection_name == "products") or (process_sales_orders_vector and collection_name == "salesOrders"): 75 | print(f"\t{total_number_of_documents} out of {total_number_of_documents} docs vectorized.") 76 | 77 | # Write any remaining operations to the collection 78 | if (len(operations) > 0): 79 | print(f"\tWriting collection {collection_name}, batch size {batch_size}, batch {batch_number}, number of documents processed so far {current_doc_idx}.") 80 | collection.bulk_write(operations,ordered=False) 81 | 82 | print(f"(" + str(datetime.datetime.now()) + ") " + f"Collection {collection_name}, total number of documents processed {current_doc_idx} .\n") 83 | 84 | pass # Replace this line with the lab's code 85 | -------------------------------------------------------------------------------- /04-vector-search/python/Blobs/webDownload.py: -------------------------------------------------------------------------------- 1 | # Import necessary modules 2 | from azure.storage.blob import BlobServiceClient 3 | import os.path 4 | 5 | # Function to download files from Azure Blob Storage if they don't exist locally 6 | def downloadFilesFromBlobIfTheyDontExist(account_url, container_name, data_folder): 7 | # Create a BlobServiceClient object which will be used to create a container client 8 | blob_service_client = BlobServiceClient(account_url=account_url) 9 | # Get a Container Client using the BlobServiceClient 10 | container_client = blob_service_client.get_container_client(container_name) 11 | 12 | # List all blobs in the container 13 | blob_list = container_client.list_blobs() 14 | for blob in blob_list: 15 | 16 | # Construct the file path where the blob will be saved locally 17 | file_path = data_folder + blob.name 18 | 19 | # Check if the file already exists locally. If not, download it. 20 | if not os.path.isfile(file_path): 21 | # Get a Blob Client for the blob 22 | blob_file = blob_service_client.get_blob_client(container=container_name, blob=blob.name) 23 | # Open a local file in write mode 24 | with open(file=file_path, mode='wb') as local_file: 25 | # Download the blob to a stream 26 | file_stream = blob_file.download_blob() 27 | # Write the stream to the local file 28 | local_file.write(file_stream.readall()) -------------------------------------------------------------------------------- /04-vector-search/python/Collections/customers.py: -------------------------------------------------------------------------------- 1 | # Import the embeddings module 2 | import SearchComponents.embeddings as Embeddings 3 | 4 | # Function to get a string representation of customer addresses 5 | def getCustomerAddresses(addresses): 6 | addresses_string = "" 7 | 8 | # Iterate over each address in the addresses list 9 | for idx, address in enumerate(addresses): 10 | # Concatenate the address details to the addresses_string 11 | addresses_string= addresses_string + ("; " if idx > 0 else "") \ 12 | + ("Address Line - " + address["addressLine1"] if address["addressLine1"] else "") \ 13 | + (" " + address["addressLine2"] if address["addressLine2"] else "") \ 14 | + (", city - " + address["city"] if address["city"] else "") \ 15 | + (", state - " + address["state"] if address["state"] else "") \ 16 | + (", country - " + address["country"] if address["country"] else "") \ 17 | + (", zipcode - " + address["zipCode"] if address["zipCode"] else "") \ 18 | + (", location - " + address["location"] if address["location"] else "") 19 | 20 | # Return the concatenated string of addresses 21 | return addresses_string 22 | 23 | # Function to generate embeddings for customer data 24 | def generateCustomerEmbedding(customer,embeddings_deployment,AzureOpenAIClient): 25 | # Generate embeddings for customer type if it exists 26 | if customer["type"]: 27 | customer["customerTypeVector"] = Embeddings.generateEmbeddings (customer["type"],embeddings_deployment,AzureOpenAIClient) 28 | 29 | # Generate embeddings for customer title if it exists 30 | if customer["title"]: 31 | customer["customerTitleVector"] = Embeddings.generateEmbeddings (customer["title"],embeddings_deployment,AzureOpenAIClient) 32 | 33 | # Generate embeddings for customer name if it exists 34 | if customer["firstName"]+" "+customer["lastName"]: 35 | customer["customerNameVector"] = Embeddings.generateEmbeddings (customer["firstName"]+" "+customer["lastName"],embeddings_deployment,AzureOpenAIClient) 36 | 37 | # Generate embeddings for customer email address if it exists 38 | if customer["emailAddress"]: 39 | customer["customerEmailAddressVector"] = Embeddings.generateEmbeddings (customer["emailAddress"],embeddings_deployment,AzureOpenAIClient) 40 | 41 | # Generate embeddings for customer phone number if it exists 42 | if customer["phoneNumber"]: 43 | customer["customerPhoneNumberVector"] = Embeddings.generateEmbeddings (customer["phoneNumber"],embeddings_deployment,AzureOpenAIClient) 44 | 45 | # Get the string representation of customer addresses 46 | address = getCustomerAddresses(customer["addresses"]) 47 | # Generate embeddings for customer addresses if they exist 48 | if len(address) > 0: 49 | customer["customerAddressesVector"] = Embeddings.generateEmbeddings (address,embeddings_deployment,AzureOpenAIClient) 50 | 51 | # Return the customer data with generated embeddings 52 | return customer -------------------------------------------------------------------------------- /04-vector-search/python/Collections/products.py: -------------------------------------------------------------------------------- 1 | # Import the embeddings module 2 | import SearchComponents.embeddings as Embeddings 3 | 4 | # Function to generate embeddings for product data 5 | def generateProductEmbedding(product,embeddings_deployment,AzureOpenAIClient): 6 | 7 | pass # Replace this line with the lab's code 8 | -------------------------------------------------------------------------------- /04-vector-search/python/Collections/salesOrders.py: -------------------------------------------------------------------------------- 1 | # Import the embeddings module 2 | import SearchComponents.embeddings as Embeddings 3 | 4 | # Function to get a string representation of sales order details 5 | def getSalesOrderDetails(details): 6 | details_string = "" 7 | 8 | # Iterate over each detail in the details list 9 | for idx, detail in enumerate(details): 10 | # Concatenate the SKU and name of the detail to the details_string 11 | details_string= details_string + ("; " if idx > 0 else "") \ 12 | + (detail["sku"] if detail["sku"] else "") \ 13 | + ("," + detail["name"] if detail["name"] else "") 14 | 15 | # Return the concatenated string of details 16 | return details_string 17 | 18 | # Function to generate embeddings for sales order data 19 | def generateSalesOrderEmbedding(salesOrder,embeddings_deployment,AzureOpenAIClient): 20 | # Get the string representation of sales order details 21 | detail=getSalesOrderDetails(salesOrder["details"]) 22 | # If the detail string has length greater than 0 23 | if len(detail): 24 | # Generate embeddings for the detail string 25 | salesOrder["salesOrderDetailVector"] = Embeddings.generateEmbeddings (detail,embeddings_deployment,AzureOpenAIClient) 26 | 27 | # Return the sales order data with the generated embeddings 28 | return salesOrder -------------------------------------------------------------------------------- /04-vector-search/python/SearchComponents/completion.py: -------------------------------------------------------------------------------- 1 | # Import the json module 2 | import json 3 | 4 | # Function to generate completions for a given prompt 5 | def generateCompletion(prompt,completion_deployment,AzureOpenAICompletionClient,user_input): 6 | 7 | pass # Replace this line with the lab's code 8 | -------------------------------------------------------------------------------- /04-vector-search/python/SearchComponents/embeddings.py: -------------------------------------------------------------------------------- 1 | # Import the retry and wait_random_exponential functions from the tenacity module 2 | # These will be used to retry the generateEmbeddings function in case of failure 3 | from tenacity import retry, wait_random_exponential, stop_after_attempt 4 | 5 | # Import the time and json modules 6 | import time 7 | import json 8 | 9 | # Decorate the generateEmbeddings function with the retry decorator 10 | # This will retry the function if it fails, with a random exponential wait time between 1 and 20 seconds, and will stop after 10 attempts 11 | @retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(10)) 12 | def generateEmbeddings(text,embeddings_deployment,AzureOpenAIClient): 13 | 14 | pass # Replace this line with the lab's code 15 | 16 | -------------------------------------------------------------------------------- /04-vector-search/python/SearchComponents/indexes.py: -------------------------------------------------------------------------------- 1 | # Function to create vector indexes in a MongoDB collection 2 | def createVectorIndexes(collection, index_list, db, collection_name): 3 | 4 | pass # Replace this line with the lab's code 5 | 6 | -------------------------------------------------------------------------------- /04-vector-search/python/SearchComponents/searches.py: -------------------------------------------------------------------------------- 1 | # Import the necessary modules and components 2 | import SearchComponents.vectorSearch as VectorSearch 3 | import SearchComponents.completion as Completion 4 | import os 5 | 6 | # Function to run a vector search 7 | def runVectorSearch(embeddings_deployment, AzureOpenAIClient, client, cosmos_db_mongodb_database): 8 | 9 | pass # Replace this line with the lab's code 10 | 11 | 12 | # Function to run a GPT search 13 | def runGPTSearch(embeddings_deployment, AzureOpenAIClient, completion_deployment, AzureOpenAICompletionClient, client, cosmos_db_mongodb_database): 14 | 15 | pass # Replace this line with the lab's code 16 | -------------------------------------------------------------------------------- /04-vector-search/python/SearchComponents/vectorSearch.py: -------------------------------------------------------------------------------- 1 | # Import the embeddings module from the SearchComponents package 2 | import SearchComponents.embeddings as Embeddings 3 | 4 | # Define the vectorSearch function 5 | def vectorSearch(query, vector_column, collection, embeddings_deployment, AzureOpenAIClient ,num_results=3): 6 | 7 | pass # Replace this line with the lab's code 8 | -------------------------------------------------------------------------------- /04-vector-search/python/load-and-vectorize-data.py: -------------------------------------------------------------------------------- 1 | # Import necessary modules and functions 2 | import SearchComponents.searches as Searches 3 | import Blobs.webDownload as WebDownload 4 | import Blobs.loadAndVectorize as LoadAndVectorize 5 | 6 | import os 7 | import os.path 8 | import urllib 9 | 10 | from dotenv import load_dotenv 11 | 12 | import pymongo 13 | 14 | from openai import AzureOpenAI 15 | 16 | def main(): 17 | # Define variables 18 | load_data_from_azure_blob = True 19 | azure_blob_account = "https://cosmosdbcosmicworks.blob.core.windows.net" 20 | blob_container = "cosmic-works-mongo-vcore" 21 | data_folder = "../../data/cosmicworks/" 22 | batch_size = 1000 23 | process_customers_vector = False 24 | process_products_vector = True 25 | process_sales_orders_vector = False 26 | 27 | try: 28 | # Load environment variables from .env file 29 | load_dotenv("../.env") 30 | cosmosdb_connection_string = os.getenv('cosmosDbEndpoint') 31 | 32 | cosmos_db_mongodb_database = os.getenv('cosmosdbDatabase') 33 | 34 | cosmos_mongo_user = os.getenv('cosmosClusterAdmin') 35 | cosmos_mongo_pwd = os.getenv('cosmosClusterPassword') 36 | 37 | ai_endpoint = os.getenv('OpenAIEndpoint') 38 | ai_key = os.getenv('OpenAIKey1') 39 | ai_version = os.getenv('OpenAIVersion') 40 | ai_deployment = os.getenv('OpenAIDeploymentName') 41 | ai_completion = os.getenv('OpenAICompletionDeploymentName') 42 | 43 | embeddings_deployment = os.getenv('OpenAIDeploymentModel') 44 | completion_deployment = os.getenv('OpenAICompletionDeploymentModel') 45 | 46 | # Create AzureOpenAI client for embeddings 47 | AzureOpenAIClient = AzureOpenAI( 48 | azure_endpoint = ai_endpoint 49 | , api_key = ai_key 50 | , api_version = ai_version 51 | , azure_deployment = ai_deployment 52 | ) 53 | 54 | # Create AzureOpenAI client for completion 55 | AzureOpenAICompletionClient = AzureOpenAI( 56 | azure_endpoint = ai_endpoint 57 | , api_key = ai_key 58 | , api_version = ai_version 59 | , azure_deployment = ai_completion 60 | ) 61 | 62 | # Replace placeholders in the connection string with actual values 63 | cosmosdb_connection_string = cosmosdb_connection_string.replace("", urllib.parse.quote_plus(cosmos_mongo_user)) 64 | cosmosdb_connection_string = cosmosdb_connection_string.replace("", urllib.parse.quote_plus(cosmos_mongo_pwd)) 65 | 66 | # Connect to MongoDB server 67 | client = pymongo.MongoClient(cosmosdb_connection_string) 68 | 69 | # User interaction loop 70 | user_input = "" 71 | while user_input.lower() != "0": 72 | os.system('cls' if os.name == 'nt' else 'clear') 73 | print("Please select an option:") 74 | print("\t1. Download data locally, load it into MongoDB and create vector index.") 75 | print("\t2. Load local data into MongoDB and create vector index.") 76 | print("\t3. Run a vector search") 77 | print("\t4. Run a GPT search") 78 | print("\t0. End") 79 | user_input = input("Option: ") 80 | 81 | # Handle user input 82 | if user_input == "0": 83 | break 84 | elif user_input not in ["1", "2", "3", "4"]: 85 | print("Invalid option. Please try again.") 86 | continue 87 | 88 | # Download data from Azure blob storage 89 | if user_input == "1": 90 | if load_data_from_azure_blob: 91 | WebDownload.downloadFilesFromBlobIfTheyDontExist(azure_blob_account, blob_container, data_folder) 92 | 93 | # Load data into MongoDB and create vector index 94 | if user_input == "1" or user_input == "2": 95 | LoadAndVectorize.loadAndVectorizeLocalBlobDataToMongoDBCluster(client, data_folder,cosmos_db_mongodb_database,batch_size,embeddings_deployment, AzureOpenAIClient, process_customers_vector, process_products_vector, process_sales_orders_vector) 96 | 97 | # Run a vector search 98 | if user_input == "3": 99 | Searches.runVectorSearch(embeddings_deployment, AzureOpenAIClient,client, cosmos_db_mongodb_database) 100 | 101 | # Run a GPT search 102 | if user_input == "4": 103 | Searches.runGPTSearch(embeddings_deployment, AzureOpenAIClient, completion_deployment, AzureOpenAICompletionClient, client, cosmos_db_mongodb_database) 104 | 105 | print("\nPress Enter to continue...") 106 | input() 107 | 108 | except Exception as ex: 109 | print(ex) 110 | 111 | # Run the main function if the script is run as a standalone program 112 | if __name__ == "__main__": 113 | main() -------------------------------------------------------------------------------- /05-deploy-with-aks/create-mongodb-vcore-cluster.bicep: -------------------------------------------------------------------------------- 1 | 2 | @description('Azure Cosmos DB MongoDB vCore cluster name') 3 | @maxLength(40) 4 | param clusterName string = '' //= 'msdocs-${uniqueString(resourceGroup().id)}' 5 | 6 | @description('Location for the cluster.') 7 | param location string = '' //= resourceGroup().location 8 | 9 | @description('Username for admin user') 10 | param adminUsername string = '' 11 | 12 | @description('Public IP address to allow access to the cluster') 13 | param publicIp string = '0.0.0.0' 14 | 15 | @description('Public IP address rule name for local access to the cluster') 16 | param publicIpRuleName string = 'labMachineIPAccessRule' 17 | 18 | @secure() 19 | @description('Password for admin user') 20 | //@minLength(8) 21 | @maxLength(128) 22 | param adminPassword string = '' 23 | 24 | resource cluster 'Microsoft.DocumentDB/mongoClusters@2023-03-01-preview' = { 25 | name: clusterName 26 | location: location 27 | properties: { 28 | administratorLogin: adminUsername 29 | administratorLoginPassword: adminPassword 30 | nodeGroupSpecs: [ 31 | { 32 | kind: 'Shard' 33 | nodeCount: 1 34 | sku: 'M30' 35 | diskSizeGB: 128 36 | enableHa: false 37 | } 38 | ] 39 | } 40 | } 41 | 42 | 43 | 44 | resource firewallRules 'Microsoft.DocumentDB/mongoClusters/firewallRules@2023-03-01-preview' = { 45 | parent: cluster 46 | name: 'AllowAllAzureServices' 47 | properties: { 48 | startIpAddress: '0.0.0.0' 49 | endIpAddress: '0.0.0.0' 50 | } 51 | } 52 | 53 | resource firewallRules_local_access 'Microsoft.DocumentDB/mongoClusters/firewallRules@2023-03-01-preview' = { 54 | parent: cluster 55 | name: publicIpRuleName 56 | properties: { 57 | startIpAddress: publicIp 58 | endIpAddress: publicIp 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /05-deploy-with-aks/node.js/.env: -------------------------------------------------------------------------------- 1 | randomIdentifier= 2 | location= 3 | changeSubscription= 4 | subscriptionName= 5 | skipCreatingResourceGroup= 6 | resourceGroup= 7 | 8 | skipCreatingCosmosDBCluster= 9 | skipCreatingCosmosDBPublicIPFirewallRule= 10 | cosmosCluster= 11 | cosmosClusterLocation= 12 | cosmosDbEndpoint= 13 | cosmosClusterAdmin= 14 | cosmosClusterPassword= 15 | cosmosdbDatabase="cosmicworks" 16 | 17 | skipCreatingAzureOpenAIAccount= 18 | cognitiveServicesKind="OpenAI" 19 | OpenAIAccount= 20 | OpenAIAccountLocation= 21 | OpenAIAccountSKU="s0" 22 | OpenAIEndpoint= 23 | OpenAIKey1= 24 | OpenAIVersion="2023-05-15" 25 | 26 | skipCreatingAzureOpenAIDeployment= 27 | OpenAIDeploymentName= 28 | OpenAIDeploymentModel="text-embedding-ada-002" 29 | OpenAIDeploymentModelFormat="OpenAI" 30 | OpenAIDeploymentModelVersion="2" 31 | OpenAIDeploymentSKU="Standard" 32 | OpenAIDeploymentSKUCapacity=100 33 | 34 | skipCreatingAzureOpenAICompletionDeployment= 35 | OpenAICompletionDeploymentName= 36 | OpenAICompletionDeploymentModel="gpt-35-turbo" 37 | OpenAICompletionDeploymentModelFormat="OpenAI" 38 | OpenAICompletionDeploymentModelVersion="0301" 39 | OpenAICompletionDeploymentSKU="Standard" 40 | OpenAICompletionDeploymentSKUCapacity=100 -------------------------------------------------------------------------------- /05-deploy-with-aks/node.js/.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/ -------------------------------------------------------------------------------- /05-deploy-with-aks/node.js/Blobs/loadAndVectorize.js: -------------------------------------------------------------------------------- 1 | // Importing required modules 2 | const fs = require('fs'); 3 | const path = require('path'); 4 | const mongodb = require('mongodb'); 5 | const { MongoClient, updateOne } = require('mongodb'); 6 | 7 | // Importing custom modules 8 | const Customers = require('../Collections/customers'); 9 | const Products = require('../Collections/products'); 10 | const SalesOrders = require('../Collections/salesOrders'); 11 | const Indexes = require('../SearchComponents/indexes'); 12 | 13 | // Function to load and vectorize local blob data to MongoDB cluster 14 | async function loadAndVectorizeLocalBlobDataToMongoDBCluster(client, dataFolder, cosmosDbMongoDbDatabase, batchSize, embeddingsDeployment, AzureOpenAIClient, processCustomersVector, processProductsVector, processSalesOrdersVector) { 15 | // Read JSON documents from the data folder 16 | const localBlobsFiles = fs.readdirSync(dataFolder); 17 | 18 | // Loop through each file in the data folder 19 | for (const blobFile of localBlobsFiles) { 20 | let batchNumber = 1; 21 | 22 | // Process only JSON files 23 | if (blobFile.includes('.json')) { 24 | console.log(`\n(${new Date().toISOString()}) ${blobFile}`); 25 | 26 | // Read the content of the file and parse it as JSON 27 | const fileContent = fs.readFileSync(path.join(dataFolder, blobFile), 'utf-8'); 28 | const jsonData = JSON.parse(fileContent); 29 | 30 | const totalNumberOfDocuments = jsonData.length; 31 | 32 | // Process only if there are documents in the JSON file 33 | if (totalNumberOfDocuments >= 0) { 34 | // Get the collection name from the file name 35 | const collectionName = blobFile.split(".json")[0]; 36 | 37 | // Get the database and the collection 38 | const db = client.db(cosmosDbMongoDbDatabase); 39 | const collection = db.collection(collectionName); 40 | let currentDocIdx = 0; 41 | 42 | let operations = []; 43 | 44 | let indexList = []; 45 | 46 | // Loop through each document in the JSON file 47 | for (let doc of jsonData) { 48 | currentDocIdx++; 49 | 50 | // Generate embeddings for the document based on the collection name 51 | if (collectionName === "customers" && processCustomersVector) { 52 | doc = await Customers.generateCustomerEmbedding(doc, embeddingsDeployment, AzureOpenAIClient); 53 | } else if (collectionName === "products" && processProductsVector) { 54 | doc = await Products.generateProductEmbedding(doc, embeddingsDeployment, AzureOpenAIClient); 55 | } else if (collectionName === "salesOrders" && processSalesOrdersVector) { 56 | doc = await SalesOrders.generateSalesOrderEmbedding(doc, embeddingsDeployment, AzureOpenAIClient); 57 | } 58 | 59 | // Log the progress for every 100 documents processed 60 | if (currentDocIdx % 100 === 0 && ((processCustomersVector && collectionName === "customers") || (processProductsVector && collectionName === "products") || (processSalesOrdersVector && collectionName === "salesOrders"))) { 61 | console.log(`\t${currentDocIdx} out of ${totalNumberOfDocuments} docs vectorized.`); 62 | } 63 | 64 | // Prepare the update operation for the document 65 | operations.push({ 66 | updateOne: { 67 | filter: { "_id": doc["_id"] }, 68 | update: { "$set": doc }, 69 | upsert: true 70 | } 71 | }); 72 | 73 | // Write the operations to the database in batches 74 | if (operations.length === batchSize) { 75 | console.log(`\tWriting collection ${collectionName}, batch size ${batchSize}, batch ${batchNumber}, number of documents processed so far ${currentDocIdx}.`); 76 | await collection.bulkWrite(operations, { ordered: false }); 77 | operations = []; 78 | batchNumber++; 79 | } 80 | } 81 | 82 | // Log the completion of vectorization 83 | if ((processCustomersVector && collectionName === "customers") || (processProductsVector && collectionName === "products") || (processSalesOrdersVector && collectionName === "salesOrders")) { 84 | console.log(`\t${totalNumberOfDocuments} out of ${totalNumberOfDocuments} docs vectorized.`); 85 | } 86 | 87 | // Write any remaining operations to the database 88 | if (operations.length > 0) { 89 | console.log(`\tWriting collection ${collectionName}, batch size ${batchSize}, batch ${batchNumber}, number of documents processed so far ${currentDocIdx}.`); 90 | await collection.bulkWrite(operations, { ordered: false }); 91 | } 92 | 93 | console.log(`(${new Date().toISOString()}) Collection ${collectionName}, total number of documents processed ${currentDocIdx} .\n`); 94 | 95 | // Create the vector indexes for the collection 96 | if (processCustomersVector && collectionName === "customers") { 97 | indexList = [ 98 | ["customerTypeVectorSearchIndex", "customerTypeVector"], 99 | ["customerTitleVectorSearchIndex", "customerTitleVector"], 100 | ["customerNameVectorSearchIndex", "customerNameVector"], 101 | ["customerEmailAddressVectorSearchIndex", "customerEmailAddressVector"], 102 | ["customerPhoneNumberVectorSearchIndex", "customerPhoneNumberVector"], 103 | ["customerAddressesVectorSearchIndex", "customerAddressesVector"] 104 | ]; 105 | await Indexes.createVectorIndexes(collection, indexList, db, collectionName); 106 | } else if (processProductsVector && collectionName === "products") { 107 | indexList = [ 108 | ["productVectorSearchIndex", "productVector"] 109 | ]; 110 | await Indexes.createVectorIndexes(collection, indexList, db, collectionName); 111 | } else if (processSalesOrdersVector && collectionName === "salesOrders") { 112 | indexList = [ 113 | ["salesOrderDetailVectorSearchIndex", "salesOrderDetailVector"] 114 | ]; 115 | await Indexes.createVectorIndexes(collection, indexList, db, collectionName); 116 | } 117 | 118 | } 119 | } 120 | } 121 | } 122 | 123 | // Export the function 124 | module.exports.loadAndVectorizeLocalBlobDataToMongoDBCluster = loadAndVectorizeLocalBlobDataToMongoDBCluster; -------------------------------------------------------------------------------- /05-deploy-with-aks/node.js/Blobs/webDownload.js: -------------------------------------------------------------------------------- 1 | // Import the BlobServiceClient from the Azure Storage Blob package 2 | const { BlobServiceClient } = require('@azure/storage-blob'); 3 | 4 | // Import the file system module 5 | const fs = require('fs'); 6 | 7 | // Import the path module 8 | const path = require('path'); 9 | 10 | // Define an asynchronous function to download files from Azure Blob Storage if they don't exist locally 11 | async function downloadFilesFromBlobIfTheyDontExist(accountUrl, containerName, dataFolder) { 12 | // Create a new BlobServiceClient 13 | const blobServiceClient = new BlobServiceClient(accountUrl); 14 | 15 | // Get a ContainerClient for the specified container 16 | const containerClient = blobServiceClient.getContainerClient(containerName); 17 | 18 | // List all blobs in the container 19 | let blobs = containerClient.listBlobsFlat(); 20 | 21 | // Iterate over each blob 22 | for await (const blob of blobs) { 23 | // Construct the local file path 24 | const filePath = path.join(dataFolder, blob.name); 25 | 26 | // Check if the file already exists locally 27 | if (!fs.existsSync(filePath)) { 28 | // If the file doesn't exist locally, download it from Azure Blob Storage 29 | 30 | // Get a BlobClient for the blob 31 | const blobClient = containerClient.getBlobClient(blob.name); 32 | 33 | // Download the blob 34 | const downloadBlockBlobResponse = await blobClient.download(0); 35 | 36 | // Create a write stream for the local file 37 | const fileStream = fs.createWriteStream(filePath); 38 | 39 | // Pipe the downloaded blob to the file stream 40 | downloadBlockBlobResponse.readableStreamBody.pipe(fileStream); 41 | } 42 | } 43 | } 44 | 45 | // Export the function 46 | module.exports.downloadFilesFromBlobIfTheyDontExist = downloadFilesFromBlobIfTheyDontExist; -------------------------------------------------------------------------------- /05-deploy-with-aks/node.js/Collections/customers.js: -------------------------------------------------------------------------------- 1 | // Import the Embeddings module from the SearchComponents directory 2 | const Embeddings = require('../SearchComponents/embeddings'); 3 | 4 | // Function to convert a list of address objects into a single string 5 | function getCustomerAddresses(addresses) { 6 | let addressesString = ""; 7 | 8 | // Iterate over each address in the list 9 | for (let idx = 0; idx < addresses.length; idx++) { 10 | const address = addresses[idx]; 11 | // Concatenate each address field into the addressesString 12 | addressesString += (idx > 0 ? "; " : "") + 13 | (address.addressLine1 ? "Address Line - " + address.addressLine1 : "") + 14 | (address.addressLine2 ? " " + address.addressLine2 : "") + 15 | (address.city ? ", city - " + address.city : "") + 16 | (address.state ? ", state - " + address.state : "") + 17 | (address.country ? ", country - " + address.country : "") + 18 | (address.zipCode ? ", zipcode - " + address.zipCode : "") + 19 | (address.location ? ", location - " + address.location : ""); 20 | } 21 | 22 | // Return the concatenated string of addresses 23 | return addressesString; 24 | } 25 | 26 | // Asynchronous function to generate embeddings for various customer fields 27 | async function generateCustomerEmbedding(customer, embeddingsDeployment, AzureOpenAIClient) { 28 | // If the customer has a type, generate an embedding for it 29 | if (customer.type) { 30 | customer.customerTypeVector = await Embeddings.generateEmbeddings(customer.type, embeddingsDeployment, AzureOpenAIClient); 31 | } 32 | 33 | // If the customer has a title, generate an embedding for it 34 | if (customer.title) { 35 | customer.customerTitleVector = await Embeddings.generateEmbeddings(customer.title, embeddingsDeployment, AzureOpenAIClient); 36 | } 37 | 38 | // If the customer has a first and last name, generate an embedding for it 39 | if (customer.firstName && customer.lastName) { 40 | customer.customerNameVector = await Embeddings.generateEmbeddings(customer.firstName + " " + customer.lastName, embeddingsDeployment, AzureOpenAIClient); 41 | } 42 | 43 | // If the customer has an email address, generate an embedding for it 44 | if (customer.emailAddress) { 45 | customer.customerEmailAddressVector = await Embeddings.generateEmbeddings(customer.emailAddress, embeddingsDeployment, AzureOpenAIClient); 46 | } 47 | 48 | // If the customer has a phone number, generate an embedding for it 49 | if (customer.phoneNumber) { 50 | customer.customerPhoneNumberVector = await Embeddings.generateEmbeddings(customer.phoneNumber, embeddingsDeployment, AzureOpenAIClient); 51 | } 52 | 53 | // Get the string representation of the customer's addresses 54 | const address = getCustomerAddresses(customer.addresses); 55 | // If the customer has addresses, generate an embedding for them 56 | if (address.length > 0) { 57 | customer.customerAddressesVector = await Embeddings.generateEmbeddings(address, embeddingsDeployment, AzureOpenAIClient); 58 | } 59 | 60 | // Return the customer object with the added embeddings 61 | return customer; 62 | } 63 | 64 | // Export the generateCustomerEmbedding function 65 | module.exports.generateCustomerEmbedding = generateCustomerEmbedding; -------------------------------------------------------------------------------- /05-deploy-with-aks/node.js/Collections/products.js: -------------------------------------------------------------------------------- 1 | // Import the Embeddings module from the SearchComponents directory 2 | const Embeddings = require('../SearchComponents/embeddings'); 3 | 4 | // Asynchronous function to generate an embedding for a product 5 | async function generateProductEmbedding(product, embeddingsDeployment, AzureOpenAIClient) { 6 | 7 | // Construct a string representing the product's name and category 8 | const productName = "Category - " + product["categoryName"] + ", Name -" + product["name"]; 9 | 10 | // If the productName exists, generate an embedding for it 11 | if (productName) { 12 | // The embedding is generated using the Embeddings module's generateEmbeddings function 13 | // The resulting embedding is stored in the product object under the key "productVector" 14 | product["productVector"] = await Embeddings.generateEmbeddings(productName, embeddingsDeployment, AzureOpenAIClient); 15 | } 16 | 17 | // Return the product object with the added embedding 18 | return product; 19 | 20 | } 21 | 22 | // Export the generateProductEmbedding function 23 | module.exports.generateProductEmbedding = generateProductEmbedding; -------------------------------------------------------------------------------- /05-deploy-with-aks/node.js/Collections/salesOrders.js: -------------------------------------------------------------------------------- 1 | // Import the Embeddings module from the SearchComponents directory 2 | const Embeddings = require('../SearchComponents/embeddings'); 3 | 4 | // Function to convert a list of sales order details into a single string 5 | function getSalesOrderDetails(details) { 6 | let detailsString = ""; 7 | 8 | // Iterate over each detail in the list 9 | for (let idx = 0; idx < details.length; idx++) { 10 | const detail = details[idx]; 11 | // Concatenate each detail's SKU and name into the detailsString 12 | detailsString += (idx > 0 ? "; " : "") + 13 | (detail.sku ? detail.sku : "") + 14 | (detail.name ? "," + detail.name : ""); 15 | } 16 | 17 | // Return the concatenated string of details 18 | return detailsString; 19 | } 20 | 21 | // Asynchronous function to generate an embedding for a sales order 22 | async function generateSalesOrderEmbedding(salesOrder, embeddingsDeployment, AzureOpenAIClient) { 23 | // Get the string representation of the sales order's details 24 | const detail = getSalesOrderDetails(salesOrder.details); 25 | // If the detail string has content, generate an embedding for it 26 | if (detail.length > 0) { 27 | // The embedding is generated using the Embeddings module's generateEmbeddings function 28 | // The resulting embedding is stored in the salesOrder object under the key "salesOrderDetailVector" 29 | salesOrder.salesOrderDetailVector = await Embeddings.generateEmbeddings(detail, embeddingsDeployment, AzureOpenAIClient); 30 | } 31 | 32 | // Return the salesOrder object with the added embedding 33 | return salesOrder; 34 | } 35 | 36 | // Export the generateSalesOrderEmbedding function 37 | module.exports.generateSalesOrderEmbedding = generateSalesOrderEmbedding; -------------------------------------------------------------------------------- /05-deploy-with-aks/node.js/SearchComponents/completion.js: -------------------------------------------------------------------------------- 1 | // Asynchronous function to generate a completion for a given prompt 2 | const generateCompletion = async (prompt, completionDeployment, AzureOpenAICompletionClient, userInput) => { 3 | 4 | // Define the system prompt that sets the context for the AI 5 | const systemPrompt = ` 6 | You are an intelligent assistant for the Adventure Works Bike Shop. 7 | You are designed to provide helpful answers to user questions about the store inventory given the information about to be provided. 8 | - Only answer questions related to the information provided below, provide 3 clear suggestions in a list format. 9 | - Write two lines of whitespace between each answer in the list. 10 | - Only provide answers that have products that are part of the Adventure Works Bike Shop. 11 | - If you're unsure of an answer, you can say "I don't know" or "I'm not sure" and recommend users search themselves. 12 | `; 13 | 14 | // Initialize the messages array with the system prompt and user input 15 | let messages = [ 16 | {role: "system", content: systemPrompt}, 17 | {role: "user", content: userInput}, 18 | ]; 19 | 20 | // Add each item from the prompt to the messages array 21 | for (let item of prompt) { 22 | messages.push({role: "system", content: `${item.document.categoryName} ${item.document.name}`}); 23 | } 24 | 25 | // Call the Azure OpenAI Completion Client's getChatCompletions function with the completion deployment and messages 26 | // Await the response and store it in the response variable 27 | const response = await AzureOpenAICompletionClient.chat.completions.create({ messages: messages, model: completionDeployment }); 28 | 29 | // Return the response 30 | return response; 31 | 32 | } 33 | 34 | // Export the generateCompletion function 35 | module.exports.generateCompletion = generateCompletion; -------------------------------------------------------------------------------- /05-deploy-with-aks/node.js/SearchComponents/embeddings.js: -------------------------------------------------------------------------------- 1 | // Import the async-retry module 2 | const retry = require('async-retry'); 3 | 4 | // Asynchronous function to generate embeddings from a string of text 5 | async function generateEmbeddings(text, embeddingsDeployment, AzureOpenAIClient) { 6 | 7 | //Generate embeddings from string of text. 8 | //This will be used to vectorize data and user input for interactions with Azure OpenAI. 9 | // Use the async-retry module to attempt the following code block 10 | // If an error occurs, it will retry up to 10 times, with an increasing timeout between each attempt 11 | return await retry(async bail => { 12 | try { 13 | // Call the Azure OpenAI Client's getEmbeddings function with the embeddings deployment and text 14 | // Await the response and store it in the response variable 15 | const response = await AzureOpenAIClient.embeddings.create({ input: text, model: embeddingsDeployment }); 16 | 17 | // Extract the embeddings from the response data 18 | const embeddings = response.data[0].embedding; 19 | // Wait for 10 milliseconds to avoid rate limiting (change to 500 on AOAI for free tier) 20 | await new Promise(resolve => setTimeout(resolve, 10)); 21 | // Return the embeddings 22 | return embeddings; 23 | } catch (err) { 24 | // If a 429 error (Too Many Requests) is received, rethrow the error to trigger a retry 25 | if (err.response && err.response.status === 429) { 26 | throw err; 27 | } else { 28 | // For any other error, stop retrying and throw the error 29 | bail(err); 30 | } 31 | } 32 | }, { 33 | retries: 10, // Maximum number of retries 34 | minTimeout: 1000, // Minimum timeout between retries (1 second) 35 | factor: 2, // Factor by which the timeout increases each time 36 | maxTimeout: 20000 // Maximum timeout between retries (20 seconds) 37 | }); 38 | 39 | } 40 | 41 | // Export the generateEmbeddings function 42 | module.exports.generateEmbeddings = generateEmbeddings; -------------------------------------------------------------------------------- /05-deploy-with-aks/node.js/SearchComponents/indexes.js: -------------------------------------------------------------------------------- 1 | // Asynchronous function to create vector indexes in a MongoDB collection 2 | async function createVectorIndexes(collection, indexList, db, collectionName) { 3 | 4 | // Get the current indexes in the collection 5 | const collectionIndexes = await collection.indexInformation(); 6 | 7 | // Iterate over each index in the indexList 8 | for (let [indexName, vectorColumn] of indexList) { 9 | // Iterate over each index in the collection 10 | for (let index of Object.keys(collectionIndexes)) { 11 | // If the index already exists in the collection, drop it 12 | if (index === indexName) { 13 | await collection.dropIndex(indexName); 14 | break; 15 | } 16 | } 17 | 18 | // Create a new IVF index in the collection 19 | // The index is created using the MongoDB command function 20 | // The command specifies the collection to create the index in, the name of the index, 21 | // the key to index on, and the options for the CosmosDB search 22 | const commandResult = await db.command({ 23 | 'createIndexes': collectionName, 24 | 'indexes': [ 25 | { 26 | 'name': indexName, 27 | 'key': { 28 | [vectorColumn]: "cosmosSearch" 29 | }, 30 | 'cosmosSearchOptions': { 31 | 'kind': 'vector-ivf', 32 | 'numLists': 1, 33 | 'similarity': 'COS', 34 | 'dimensions': 1536 35 | } 36 | } 37 | ] 38 | }); 39 | } 40 | 41 | } 42 | 43 | // Export the createVectorIndexes function 44 | module.exports.createVectorIndexes = createVectorIndexes; -------------------------------------------------------------------------------- /05-deploy-with-aks/node.js/SearchComponents/searches.js: -------------------------------------------------------------------------------- 1 | // Import the required modules 2 | const VectorSearch = require('../SearchComponents/vectorSearch'); 3 | const Completion = require('../SearchComponents/completion'); 4 | 5 | function vectorSearchPrompt() { 6 | return "What would you like to know about our bike shop's inventory?"; 7 | } 8 | 9 | function GPTSearchPrompt() { 10 | return "What would you like to ask about our bike shop's inventory? Type 'end' to end the session."; 11 | } 12 | 13 | // Asynchronous function to run a vector search 14 | async function runVectorSearch(embeddingsDeployment, AzureOpenAIClient, client, cosmosDbMongodbDatabase, userInput) { 15 | 16 | // Define the maximum number of results, the vector column, and the collection name 17 | const maxResults = 20; 18 | const vectorColumn = "productVector"; 19 | const collectionName = "products"; 20 | 21 | // Connect to the database and get the collection 22 | const db = client.db(cosmosDbMongodbDatabase); 23 | const collection = db.collection(collectionName); 24 | 25 | // Run the vector search and return the results 26 | let resultArray = []; 27 | const results = await VectorSearch.vectorSearch(userInput, vectorColumn, collection, embeddingsDeployment, AzureOpenAIClient, maxResults); 28 | for (let result of results) { 29 | resultArray.push(`Similarity Score: ${result.similarityScore}, category: ${result.document.categoryName}, Product: ${result.document.name}`); 30 | } 31 | return resultArray; 32 | } 33 | 34 | // Asynchronous function to run a GPT-3 search 35 | async function runGPTSearch(embeddingsDeployment, AzureOpenAIClient, completionDeployment, client, cosmosDbMongodbDatabase, userInput) { 36 | 37 | // Define the maximum number of results, the vector column, and the collection name 38 | const maxResults = 20; 39 | const vectorColumn = "productVector"; 40 | const collectionName = "products"; 41 | 42 | // Connect to the database and get the collection 43 | const db = client.db(cosmosDbMongodbDatabase); 44 | const collection = db.collection(collectionName); 45 | 46 | // Run the vector search 47 | const resultsForPrompt = await VectorSearch.vectorSearch(userInput, vectorColumn, collection, embeddingsDeployment, AzureOpenAIClient, maxResults); 48 | 49 | // Generate completions based on the vector search results 50 | const completionsResults = await Completion.generateCompletion(resultsForPrompt, completionDeployment, AzureOpenAIClient, userInput); 51 | return completionsResults.choices[0].message.content; 52 | } 53 | 54 | // Export functions 55 | module.exports.runVectorSearch = runVectorSearch; 56 | module.exports.runGPTSearch = runGPTSearch; 57 | module.exports.vectorSearchPrompt = vectorSearchPrompt; 58 | module.exports.GPTSearchPrompt = GPTSearchPrompt; -------------------------------------------------------------------------------- /05-deploy-with-aks/node.js/SearchComponents/vectorSearch.js: -------------------------------------------------------------------------------- 1 | // Import the Embeddings module 2 | const Embeddings = require('../SearchComponents/embeddings'); 3 | 4 | // Asynchronous function to perform a vector search 5 | async function vectorSearch(query, vectorColumn, collection, embeddingsDeployment, AzureOpenAIClient, numResults = 3) { 6 | 7 | // Generate embeddings for the query using the Embeddings module 8 | const queryEmbedding = await Embeddings.generateEmbeddings(query, embeddingsDeployment, AzureOpenAIClient); 9 | 10 | // Define the aggregation pipeline for the MongoDB query 11 | // The pipeline first performs a search using the generated embeddings and the specified vector column 12 | // It then projects the results to include the similarity score and the original document 13 | const pipeline = [ 14 | { 15 | '$search': { 16 | "cosmosSearch": { 17 | "vector": queryEmbedding, 18 | "path": vectorColumn, 19 | "k": numResults 20 | }, 21 | "returnStoredSource": true 22 | } 23 | }, 24 | { '$project': { 'similarityScore': { '$meta': 'searchScore' }, 'document': '$$ROOT' } } 25 | ]; 26 | 27 | // Execute the aggregation pipeline on the collection and convert the results to an array 28 | const results = await collection.aggregate(pipeline).toArray(); 29 | // Return the results 30 | return results; 31 | 32 | } 33 | 34 | // Export the vectorSearch function 35 | module.exports.vectorSearch = vectorSearch; -------------------------------------------------------------------------------- /05-deploy-with-aks/node.js/app.js: -------------------------------------------------------------------------------- 1 | const express = require('express'); 2 | const path = require('path'); 3 | const app = express(); 4 | const data = require('./load-and-vectorize-data'); 5 | const port = process.env.PORT || 3000; 6 | let taskStatuses = {}; 7 | 8 | app.use(express.static(path.join(__dirname, 'public'))); 9 | 10 | app.listen(port, () => { 11 | console.log(`Server is running on port ${port}`); 12 | }); 13 | 14 | app.get('/', (req, res) => { 15 | res.sendFile(path.join(__dirname, 'public', 'index.html')); 16 | }); 17 | 18 | // TODO: Extend the application code 19 | 20 | app.get('/submitOption', async (req, res) => { 21 | try { 22 | const query = req.query.o; 23 | 24 | if (query === '1' || query === '2') { 25 | 26 | console.log("Starting long-running operation for query:", query); 27 | const taskId = Date.now(); 28 | taskStatuses[taskId] = 'pending'; 29 | 30 | data.processOption(query).then(() => { 31 | taskStatuses[taskId] = 'completed'; 32 | console.log("Task completed:", taskId); 33 | }); 34 | 35 | res.json({ result: "Please wait while the vector index is created. This may take a while.", 36 | taskId: taskId, status: 'pending' }); 37 | 38 | } else { 39 | 40 | const result = await data.processOption(query); 41 | res.json({result : result}); 42 | 43 | } 44 | } catch (error) { 45 | console.error("Error processing option:", error); 46 | res.status(500).send("An error occurred while processing the request."); 47 | } 48 | }); 49 | 50 | app.get('/submitQuery', async (req, res) => { 51 | try { 52 | 53 | const option = req.query.o; 54 | const query = req.query.q; 55 | 56 | if (option == '3') { 57 | 58 | const result = await data.doVectorSearch(query); 59 | 60 | // convert the array to a string for the client 61 | const resultString = result.join('\n'); 62 | res.send({result: resultString}); 63 | 64 | } else if (option == '4') { 65 | 66 | const result = await data.doGPTSearch(query); 67 | res.json({result : result}); 68 | 69 | } 70 | } catch (error) { 71 | console.error("Error processing option:", error); 72 | res.status(500).send("An error occurred while processing the request."); 73 | } 74 | }); 75 | 76 | app.get('/checkStatus', (req, res) => { 77 | const taskId = req.query.taskId; 78 | 79 | if (!taskId || !taskStatuses[taskId]) { 80 | return res.status(404).json({ error: "Task ID not found" }); 81 | } 82 | 83 | const status = taskStatuses[taskId]; 84 | res.json({ taskId, status }); 85 | }); 86 | -------------------------------------------------------------------------------- /05-deploy-with-aks/node.js/load-and-vectorize-data.js: -------------------------------------------------------------------------------- 1 | // Import necessary modules 2 | const Searches = require('./SearchComponents/searches'); 3 | const WebDownload = require('./Blobs/webDownload'); 4 | const LoadAndVectorize = require('./Blobs/loadAndVectorize'); 5 | const readline = require('readline'); 6 | const rl = readline.createInterface({ 7 | input: process.stdin, 8 | output: process.stdout 9 | }); 10 | const dotenv = require('dotenv'); 11 | const MongoClient = require('mongodb').MongoClient; 12 | const { AzureOpenAI } = require("openai"); 13 | const apiVersion = "2024-07-01-preview"; 14 | 15 | // Load environment variables 16 | dotenv.config({ path: '.env' }); 17 | 18 | let client; 19 | 20 | // Define constants 21 | const load_data_from_azure_blob = true; 22 | const azure_blob_account = "https://cosmosdbcosmicworks.blob.core.windows.net"; 23 | const blob_container = "cosmic-works-mongo-vcore"; 24 | const data_folder = "../../data/cosmicworks/"; 25 | const batch_size = 1000; 26 | const process_customers_vector = false; 27 | const process_products_vector = true; 28 | const process_sales_orders_vector = false; 29 | 30 | // Get Configuration Settings from environment variables 31 | let cosmosdb_connection_string = process.env.cosmosDbEndpoint; 32 | const cosmos_db_mongodb_database = process.env.cosmosdbDatabase; 33 | const cosmos_mongo_user = process.env.cosmosClusterAdmin; 34 | const cosmos_mongo_pwd = process.env.cosmosClusterPassword; 35 | const ai_endpoint = process.env.OpenAIEndpoint; 36 | const ai_key = process.env.OpenAIKey1; 37 | const embeddings_deployment = process.env.OpenAIDeploymentName; 38 | const completion_deployment = process.env.OpenAICompletionDeploymentName; 39 | 40 | // Initialize Azure OpenAI client 41 | const AzureOpenAIClient = new AzureOpenAI({endpoint: ai_endpoint, apiKey: ai_key, apiVersion: apiVersion}); 42 | 43 | // Replace placeholders in the connection string with actual values 44 | cosmosdb_connection_string = cosmosdb_connection_string.replace("", encodeURIComponent(cosmos_mongo_user)); 45 | cosmosdb_connection_string = cosmosdb_connection_string.replace("", encodeURIComponent(cosmos_mongo_pwd)); 46 | 47 | function getOptions() { 48 | return ["Download data locally, load it into MongoDB and create vector index.", 49 | "Load local data into MongoDB and create vector index.", 50 | "Run a vector search", 51 | "Run a GPT search"]; 52 | } 53 | 54 | async function processOption(userInput) { 55 | 56 | if (userInput === "1") { 57 | if (load_data_from_azure_blob) { 58 | await WebDownload.downloadFilesFromBlobIfTheyDontExist(azure_blob_account, blob_container, data_folder); 59 | } 60 | } 61 | 62 | // Load data into MongoDB and create vector index if user selected option 1 or 2 63 | if (userInput === "1" || userInput === "2") { 64 | 65 | // Connect to MongoDB server 66 | const client = new MongoClient(cosmosdb_connection_string); 67 | await client.connect(); 68 | 69 | try { 70 | await LoadAndVectorize.loadAndVectorizeLocalBlobDataToMongoDBCluster( 71 | client, 72 | data_folder, 73 | cosmos_db_mongodb_database, 74 | batch_size, 75 | embeddings_deployment, 76 | AzureOpenAIClient, 77 | process_customers_vector, 78 | process_products_vector, 79 | process_sales_orders_vector); 80 | return "Operation complete."; 81 | } catch (ex) { 82 | // Log any errors 83 | console.error(ex); 84 | } finally { 85 | if (client) { 86 | await client.close(); 87 | } 88 | } 89 | } 90 | 91 | // Return the vector search prompt if the user selected option 3 92 | if (userInput === "3") { 93 | return Searches.vectorSearchPrompt(); 94 | } 95 | 96 | // Return the GPT search prompt if user selected option 4 97 | if (userInput === "4") { 98 | return Searches.GPTSearchPrompt(); 99 | } 100 | } 101 | 102 | async function doGPTSearch(userInput) { 103 | 104 | // Connect to MongoDB server 105 | const client = new MongoClient(cosmosdb_connection_string); 106 | await client.connect(); 107 | 108 | try { 109 | const searchResult = await Searches.runGPTSearch( 110 | embeddings_deployment, 111 | AzureOpenAIClient, 112 | completion_deployment, 113 | client, 114 | cosmos_db_mongodb_database, 115 | userInput); 116 | return searchResult; 117 | } catch (ex) { 118 | // Log any errors 119 | console.error(ex); 120 | } finally { 121 | if (client) { 122 | await client.close(); 123 | } 124 | } 125 | } 126 | 127 | async function doVectorSearch(userInput) { 128 | 129 | // Connect to MongoDB server 130 | const client = new MongoClient(cosmosdb_connection_string); 131 | await client.connect(); 132 | 133 | try { 134 | const searchResult = await Searches.runVectorSearch( 135 | embeddings_deployment, 136 | AzureOpenAIClient, 137 | client, 138 | cosmos_db_mongodb_database, 139 | userInput); 140 | return searchResult; 141 | } catch (ex) { 142 | // Log any errors 143 | console.error(ex); 144 | } finally { 145 | if (client) { 146 | await client.close(); 147 | } 148 | } 149 | } 150 | 151 | // TODO: Export functions 152 | -------------------------------------------------------------------------------- /05-deploy-with-aks/node.js/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "cosmosdb-vector-search", 3 | "version": "1.0.0", 4 | "description": "Create a Node.js app that loads data into an Azure Cosmos DB MongoDB database and creates and uses a Vector.", 5 | "main": "app.js", 6 | "scripts": { 7 | "start": "node app.js" 8 | }, 9 | "dependencies": { 10 | "@azure/storage-blob": "12.17.0", 11 | "async-retry": "1.3.3", 12 | "dotenv": "16.4.5", 13 | "express": "^4.21.1", 14 | "mongodb": "6.4.0", 15 | "openai": "^4.69.0", 16 | "readline": "1.3.0" 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /05-deploy-with-aks/node.js/public/images/bike-header.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MicrosoftLearning/mslearn-cosmosdb-mongodb-vcore/1524bda43a20bd317dadcf3dcaa9a8fa217a9d48/05-deploy-with-aks/node.js/public/images/bike-header.png -------------------------------------------------------------------------------- /05-deploy-with-aks/node.js/public/images/bike1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MicrosoftLearning/mslearn-cosmosdb-mongodb-vcore/1524bda43a20bd317dadcf3dcaa9a8fa217a9d48/05-deploy-with-aks/node.js/public/images/bike1.png -------------------------------------------------------------------------------- /05-deploy-with-aks/node.js/public/images/bike2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MicrosoftLearning/mslearn-cosmosdb-mongodb-vcore/1524bda43a20bd317dadcf3dcaa9a8fa217a9d48/05-deploy-with-aks/node.js/public/images/bike2.png -------------------------------------------------------------------------------- /05-deploy-with-aks/node.js/public/images/bike3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MicrosoftLearning/mslearn-cosmosdb-mongodb-vcore/1524bda43a20bd317dadcf3dcaa9a8fa217a9d48/05-deploy-with-aks/node.js/public/images/bike3.png -------------------------------------------------------------------------------- /05-deploy-with-aks/node.js/public/images/bike4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MicrosoftLearning/mslearn-cosmosdb-mongodb-vcore/1524bda43a20bd317dadcf3dcaa9a8fa217a9d48/05-deploy-with-aks/node.js/public/images/bike4.png -------------------------------------------------------------------------------- /05-deploy-with-aks/node.js/public/images/bike5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MicrosoftLearning/mslearn-cosmosdb-mongodb-vcore/1524bda43a20bd317dadcf3dcaa9a8fa217a9d48/05-deploy-with-aks/node.js/public/images/bike5.png -------------------------------------------------------------------------------- /05-deploy-with-aks/node.js/public/images/bike6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MicrosoftLearning/mslearn-cosmosdb-mongodb-vcore/1524bda43a20bd317dadcf3dcaa9a8fa217a9d48/05-deploy-with-aks/node.js/public/images/bike6.png -------------------------------------------------------------------------------- /05-deploy-with-aks/node.js/public/images/bike7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MicrosoftLearning/mslearn-cosmosdb-mongodb-vcore/1524bda43a20bd317dadcf3dcaa9a8fa217a9d48/05-deploy-with-aks/node.js/public/images/bike7.png -------------------------------------------------------------------------------- /05-deploy-with-aks/node.js/public/images/bike8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MicrosoftLearning/mslearn-cosmosdb-mongodb-vcore/1524bda43a20bd317dadcf3dcaa9a8fa217a9d48/05-deploy-with-aks/node.js/public/images/bike8.png -------------------------------------------------------------------------------- /05-deploy-with-aks/node.js/public/images/bike9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MicrosoftLearning/mslearn-cosmosdb-mongodb-vcore/1524bda43a20bd317dadcf3dcaa9a8fa217a9d48/05-deploy-with-aks/node.js/public/images/bike9.png -------------------------------------------------------------------------------- /05-deploy-with-aks/node.js/public/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | AI Vector Search 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 |
15 | 16 | 24 |
25 | 26 | 29 | 30 |
31 |

Start Your Adventure

32 |
33 |
34 |
35 | Bike 1 36 |
Touring-1000
37 |
38 |
39 | 40 |
41 |
42 | Bike 2 43 |
Touring-3000
44 |
45 |
46 | 47 |
48 |
49 | Bike 3 50 |
Road-350
51 |
52 |
53 | 54 |
55 |
56 | Bike 4 57 |
Road-220
58 |
59 |
60 | 61 |
62 |
63 | Bike 5 64 |
Road-450
65 |
66 |
67 | 68 |
69 |
70 | Bike 6 71 |
Touring-1200
72 |
73 |
74 | 75 |
76 |
77 | Bike 8 78 |
Touring-2000
79 |
80 |
81 | 82 |
83 |
84 | Bike 9 85 |
Road-650
86 |
87 |
88 |
89 |
90 | 91 |
92 |
93 |

Virtual Assistant

94 |
95 | 98 | 101 |
102 |
103 |
104 | 105 |
106 |
107 | 108 | 109 |
110 |
111 | 112 |
113 |

© 2024 Contoso Bike Shop. All rights reserved.

114 |
115 | 116 | 117 | -------------------------------------------------------------------------------- /05-deploy-with-aks/node.js/public/options.js: -------------------------------------------------------------------------------- 1 | class OptionsComponent extends HTMLElement { 2 | constructor() { 3 | super(); 4 | } 5 | 6 | // This method runs when the element is inserted into the DOM 7 | connectedCallback() { 8 | this.loadOptions(); 9 | } 10 | 11 | // Fetch options from the server and display them 12 | async loadOptions() { 13 | 14 | try { 15 | const response = await fetch('/getOptions'); 16 | const options = await response.json(); 17 | 18 | const promptWrapper = $('
', { class: 'message assistant' }); 19 | const header = $('
', { class: 'header' }); 20 | 21 | // Create the robot icon element 22 | const robotIcon = $('', { 23 | class: 'bi bi-robot', 24 | 'aria-hidden': 'true' 25 | }); 26 | 27 | // Append the icon and the text to the header 28 | header.append(robotIcon).append(' Assistant'); 29 | 30 | const promptText = $('
', { class: 'text' }).text('Please select an option:'); 31 | 32 | // Create the structure of the custom component 33 | const wrapper = $('
', { class: 'message assistant options-wrapper' }); 34 | 35 | options.forEach((option, index) => { 36 | const button = $(' 98 | 101 |
102 |
103 |
104 | 105 |
106 |
107 | 108 | 109 |
110 |
111 | 112 |
113 |

© 2024 Contoso Bike Shop. All rights reserved.

114 |
115 | 116 | 117 | -------------------------------------------------------------------------------- /06-guided-project/node.js/public/options.js: -------------------------------------------------------------------------------- 1 | class OptionsComponent extends HTMLElement { 2 | constructor() { 3 | super(); 4 | } 5 | 6 | // This method runs when the element is inserted into the DOM 7 | connectedCallback() { 8 | this.loadOptions(); 9 | } 10 | 11 | // Fetch options from the server and display them 12 | async loadOptions() { 13 | 14 | try { 15 | const response = await fetch('/getOptions'); 16 | const options = await response.json(); 17 | 18 | const promptWrapper = $('
', { class: 'message assistant' }); 19 | const header = $('
', { class: 'header' }); 20 | 21 | // Create the robot icon element 22 | const robotIcon = $('', { 23 | class: 'bi bi-robot', 24 | 'aria-hidden': 'true' 25 | }); 26 | 27 | // Append the icon and the text to the header 28 | header.append(robotIcon).append(' Assistant'); 29 | 30 | const promptText = $('
', { class: 'text' }).text('Please select an option:'); 31 | 32 | // Create the structure of the custom component 33 | const wrapper = $('
', { class: 'message assistant options-wrapper' }); 34 | 35 | options.forEach((option, index) => { 36 | const button = $(' 16 |
17 | {% for language in site.data.languages %} {{ language.lang }}{% endfor %} 18 |
19 |
20 | 21 | 🗩 22 | 23 | 24 |
25 | 26 |
27 |
28 |
29 |
30 | {{ content }} 31 |
32 |
33 |
34 | 44 | 45 | 46 | 47 | 48 | -------------------------------------------------------------------------------- /_layouts/page.html: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | --- 4 | 5 | 6 | 26 |
27 |
28 |
29 | 35 |
36 |
37 | {{ content }} 38 |
39 |
40 |
41 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /_sass/code.scss: -------------------------------------------------------------------------------- 1 | pre code { 2 | border: 0; 3 | padding: 0; 4 | display: block; 5 | } 6 | 7 | p code { 8 | font-size: 0.8rem; 9 | } 10 | 11 | code, 12 | pre { 13 | font-size: .875rem; 14 | background-color: #f9f9f9; 15 | } 16 | 17 | code { 18 | border-radius: 2px; 19 | border: 1px solid #d3d6db; 20 | display: inline-block; 21 | padding: 0 7px; 22 | } 23 | 24 | code.hljs { 25 | background-color: transparent !important; 26 | } 27 | 28 | pre { 29 | overflow: auto; 30 | padding: 16px; 31 | margin: 16px 0 0; 32 | direction: ltr; 33 | white-space: pre; 34 | word-spacing: normal; 35 | word-break: normal; 36 | word-wrap: normal; 37 | -moz-tab-size: 4; 38 | -o-tab-size: 4; 39 | tab-size: 4; 40 | -webkit-hyphens: none; 41 | -ms-hyphens: none; 42 | hyphens: none; 43 | -webkit-overflow-scrolling: touch; 44 | border: 1px solid #d3d6db; 45 | } 46 | -------------------------------------------------------------------------------- /assets/Microsoft-logo_rgb_c-wht.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MicrosoftLearning/mslearn-cosmosdb-mongodb-vcore/1524bda43a20bd317dadcf3dcaa9a8fa217a9d48/assets/Microsoft-logo_rgb_c-wht.png -------------------------------------------------------------------------------- /assets/css/style.scss: -------------------------------------------------------------------------------- 1 | --- 2 | --- 3 | 4 | @import "theme"; 5 | -------------------------------------------------------------------------------- /assets/js/script.js: -------------------------------------------------------------------------------- 1 | $(function() { 2 | $('article img').each(function() { 3 | var src = $(this).attr('src'); 4 | $(this).wrap(''); 5 | }); 6 | $('article > h2').each(function() { 7 | $('nav.toc > ul').append( 8 | $('
  • ') 9 | .attr('class', 'nav-item') 10 | .append( 11 | $('') 12 | .attr('class', 'nav-link') 13 | .text($(this).text()) 14 | .attr('href', '#' + $(this).attr('id')) 15 | ) 16 | ); 17 | }); 18 | $('[data-spy="scroll"]').each(function () { 19 | var $spy = $(this).scrollspy('refresh') 20 | }); 21 | $('pre').each(function(index) { 22 | var generatedId = 'codeBlock' + index; 23 | var languageClass = $(this).children('code:first').attr('class').split(' ')[0]; 24 | var language = languageClass == 'language-sh' ? 'shell' : 25 | languageClass == 'language-js' ? 'javascript' : 26 | languageClass == 'language-xml' ? 'xml' : 27 | languageClass == 'language-sql' ? 'sql' : 28 | languageClass == 'language-csharp' ? 'c#' : 'code'; 29 | $(this).attr('id', generatedId); 30 | var header = $('
    ', { 31 | class: 'code-header mt-3 mb-0 bg-light d-flex justify-content-between border', 32 | }).append( 33 | $('', { 34 | class: 'mx-2 text-muted text-capitalize font-weight-light', 35 | html: language 36 | }) 37 | ).append( 38 | $('