-
16 |
-
17 | Get started by editing{" "}
18 |
19 | app/page.js 20 |21 | . 22 |
23 | - Save and see your changes instantly. 24 |
├── Dockerfile ├── app ├── __init__.py ├── core │ ├── config.py │ └── __init__.py ├── db │ ├── __init__.py │ ├── database.py │ └── schemas.py ├── api │ ├── __init__.py │ └── endpoints.py ├── models │ ├── __init__.py │ └── embeddings.py ├── services │ ├── __init__.py │ ├── test.py │ └── nlp.py └── main.py ├── docker-compose.yml ├── tests ├── __init__.py ├── test_api.py └── test_services.py ├── aisearch_cdk ├── tests │ ├── __init__.py │ └── unit │ │ ├── __init__.py │ │ └── test_aisearch_cdk_stack.py ├── aisearch_cdk │ ├── __init__.py │ └── aisearch_cdk_stack.py ├── requirements-dev.txt ├── requirements.txt ├── .gitignore ├── source.bat ├── app.py ├── README.md └── cdk.json ├── frontend ├── app │ ├── favicon.ico │ ├── globals.css │ ├── layout.js │ └── page.js ├── jsconfig.json ├── next.config.mjs ├── public │ ├── vercel.svg │ ├── window.svg │ ├── file.svg │ ├── globe.svg │ └── next.svg ├── postcss.config.mjs ├── tailwind.config.js ├── tailwind.config.mjs ├── package.json ├── .gitignore └── README.md ├── .gitignore ├── requirements.txt ├── README.md └── scripts └── data_loader.py /Dockerfile: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /app/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /app/core/config.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /app/db/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /app/db/database.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /app/db/schemas.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/test_api.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /app/api/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /app/core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /app/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/test_services.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /app/models/embeddings.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /app/services/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /aisearch_cdk/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /aisearch_cdk/aisearch_cdk/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /aisearch_cdk/tests/unit/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /aisearch_cdk/requirements-dev.txt: -------------------------------------------------------------------------------- 1 | pytest==6.2.5 2 | -------------------------------------------------------------------------------- /aisearch_cdk/requirements.txt: -------------------------------------------------------------------------------- 1 | aws-cdk-lib==2.162.1 2 | constructs>=10.0.0,<11.0.0 3 | -------------------------------------------------------------------------------- /frontend/app/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/runtime/ai-search-drift-measuring/master/frontend/app/favicon.ico -------------------------------------------------------------------------------- /frontend/jsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "paths": { 4 | "@/*": ["./*"] 5 | } 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /frontend/next.config.mjs: -------------------------------------------------------------------------------- 1 | /** @type {import('next').NextConfig} */ 2 | const nextConfig = {}; 3 | 4 | export default nextConfig; 5 | -------------------------------------------------------------------------------- /frontend/public/vercel.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /frontend/postcss.config.mjs: -------------------------------------------------------------------------------- 1 | /** @type {import('postcss-load-config').Config} */ 2 | const config = { 3 | plugins: { 4 | tailwindcss: {}, 5 | }, 6 | }; 7 | 8 | export default config; 9 | -------------------------------------------------------------------------------- /frontend/tailwind.config.js: -------------------------------------------------------------------------------- 1 | /** @type {import('tailwindcss').Config} */ 2 | module.exports = { 3 | content: [], 4 | theme: { 5 | extend: {}, 6 | }, 7 | plugins: [], 8 | } 9 | 10 | -------------------------------------------------------------------------------- /aisearch_cdk/.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | package-lock.json 3 | __pycache__ 4 | .pytest_cache 5 | .venv 6 | *.egg-info 7 | 8 | .venv/ 9 | 10 | # CDK asset staging directory 11 | .cdk.staging 12 | cdk.out 13 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | package-lock.json 3 | __pycache__ 4 | .pytest_cache 5 | .venv 6 | *.egg-info 7 | 8 | .venv/ 9 | 10 | # CDK asset staging directory 11 | .cdk.staging 12 | cdk.out 13 | 14 | .idea/ 15 | .idea 16 | 17 | .DS_STORE 18 | 19 | .env 20 | .env.production 21 | .env.stage 22 | .env.dev 23 | -------------------------------------------------------------------------------- /app/services/test.py: -------------------------------------------------------------------------------- 1 | from sentence_transformers import SentenceTransformer 2 | import tensorflow as tf 3 | 4 | print("TensorFlow version:", tf.__version__) 5 | print("NumPy version:", tf.__version__) 6 | 7 | model = SentenceTransformer('all-MiniLM-L6-v2') 8 | print("SentenceTransformer model loaded successfully!") 9 | -------------------------------------------------------------------------------- /frontend/public/window.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /frontend/public/file.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /app/main.py: -------------------------------------------------------------------------------- 1 | from fastapi import FastAPI 2 | from app.api import endpoints # Import your API endpoints 3 | 4 | print("Main module loaded!") 5 | 6 | # Initialize FastAPI app 7 | app = FastAPI() 8 | 9 | # Include API routers 10 | app.include_router(endpoints.router) 11 | 12 | # Define a basic root endpoint 13 | @app.get("/") 14 | def read_root(): 15 | return {"message": "Welcome to the AI Search Drift Measuring API!"} 16 | -------------------------------------------------------------------------------- /frontend/app/globals.css: -------------------------------------------------------------------------------- 1 | @tailwind base; 2 | @tailwind components; 3 | @tailwind utilities; 4 | 5 | :root { 6 | --background: #ffffff; 7 | --foreground: #171717; 8 | } 9 | 10 | @media (prefers-color-scheme: dark) { 11 | :root { 12 | --background: #0a0a0a; 13 | --foreground: #ededed; 14 | } 15 | } 16 | 17 | body { 18 | color: var(--foreground); 19 | background: var(--background); 20 | font-family: Arial, Helvetica, sans-serif; 21 | } 22 | -------------------------------------------------------------------------------- /frontend/tailwind.config.mjs: -------------------------------------------------------------------------------- 1 | /** @type {import('tailwindcss').Config} */ 2 | export default { 3 | content: [ 4 | "./pages/**/*.{js,ts,jsx,tsx,mdx}", 5 | "./components/**/*.{js,ts,jsx,tsx,mdx}", 6 | "./app/**/*.{js,ts,jsx,tsx,mdx}", 7 | ], 8 | theme: { 9 | extend: { 10 | colors: { 11 | background: "var(--background)", 12 | foreground: "var(--foreground)", 13 | }, 14 | }, 15 | }, 16 | plugins: [], 17 | }; 18 | -------------------------------------------------------------------------------- /aisearch_cdk/source.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | 3 | rem The sole purpose of this script is to make the command 4 | rem 5 | rem source .venv/bin/activate 6 | rem 7 | rem (which activates a Python virtualenv on Linux or Mac OS X) work on Windows. 8 | rem On Windows, this command just runs this batch file (the argument is ignored). 9 | rem 10 | rem Now we don't need to document a Windows command for activating a virtualenv. 11 | 12 | echo Executing .venv\Scripts\activate.bat for you 13 | .venv\Scripts\activate.bat 14 | -------------------------------------------------------------------------------- /frontend/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "frontend", 3 | "version": "0.1.0", 4 | "private": true, 5 | "scripts": { 6 | "dev": "next dev --turbopack", 7 | "build": "next build", 8 | "start": "next start", 9 | "lint": "next lint" 10 | }, 11 | "dependencies": { 12 | "next": "15.1.3", 13 | "react": "^19.0.0", 14 | "react-dom": "^19.0.0" 15 | }, 16 | "devDependencies": { 17 | "autoprefixer": "^10.4.20", 18 | "postcss": "^8.4.49", 19 | "tailwindcss": "^3.4.17" 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /aisearch_cdk/aisearch_cdk/aisearch_cdk_stack.py: -------------------------------------------------------------------------------- 1 | from aws_cdk import ( 2 | # Duration, 3 | Stack, 4 | # aws_sqs as sqs, 5 | ) 6 | from constructs import Construct 7 | 8 | class AisearchCdkStack(Stack): 9 | 10 | def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: 11 | super().__init__(scope, construct_id, **kwargs) 12 | 13 | # The code that defines your stack goes here 14 | 15 | # example resource 16 | # queue = sqs.Queue( 17 | # self, "AisearchCdkQueue", 18 | # visibility_timeout=Duration.seconds(300), 19 | # ) 20 | -------------------------------------------------------------------------------- /aisearch_cdk/tests/unit/test_aisearch_cdk_stack.py: -------------------------------------------------------------------------------- 1 | import aws_cdk as core 2 | import aws_cdk.assertions as assertions 3 | 4 | from aisearch_cdk.aisearch_cdk_stack import AisearchCdkStack 5 | 6 | # example tests. To run these tests, uncomment this file along with the example 7 | # resource in aisearch_cdk/aisearch_cdk_stack.py 8 | def test_sqs_queue_created(): 9 | app = core.App() 10 | stack = AisearchCdkStack(app, "aisearch-cdk") 11 | template = assertions.Template.from_stack(stack) 12 | 13 | # template.has_resource_properties("AWS::SQS::Queue", { 14 | # "VisibilityTimeout": 300 15 | # }) 16 | -------------------------------------------------------------------------------- /frontend/.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules 5 | /.pnp 6 | .pnp.* 7 | .yarn/* 8 | !.yarn/patches 9 | !.yarn/plugins 10 | !.yarn/releases 11 | !.yarn/versions 12 | 13 | # testing 14 | /coverage 15 | 16 | # next.js 17 | /.next/ 18 | /out/ 19 | 20 | # production 21 | /build 22 | 23 | # misc 24 | .DS_Store 25 | *.pem 26 | 27 | # debug 28 | npm-debug.log* 29 | yarn-debug.log* 30 | yarn-error.log* 31 | .pnpm-debug.log* 32 | 33 | # env files (can opt-in for committing if needed) 34 | .env* 35 | 36 | # vercel 37 | .vercel 38 | 39 | # typescript 40 | *.tsbuildinfo 41 | next-env.d.ts 42 | -------------------------------------------------------------------------------- /frontend/app/layout.js: -------------------------------------------------------------------------------- 1 | import { Geist, Geist_Mono } from "next/font/google"; 2 | import "./globals.css"; 3 | 4 | const geistSans = Geist({ 5 | variable: "--font-geist-sans", 6 | subsets: ["latin"], 7 | }); 8 | 9 | const geistMono = Geist_Mono({ 10 | variable: "--font-geist-mono", 11 | subsets: ["latin"], 12 | }); 13 | 14 | export const metadata = { 15 | title: "Create Next App", 16 | description: "Generated by create next app", 17 | }; 18 | 19 | export default function RootLayout({ children }) { 20 | return ( 21 | 22 |
25 | {children} 26 | 27 | 28 | ); 29 | } 30 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | annotated-types==0.7.0 2 | anyio==4.7.0 3 | certifi==2024.12.14 4 | charset-normalizer==3.4.1 5 | click==8.1.8 6 | exceptiongroup==1.2.2 7 | fastapi==0.115.6 8 | filelock==3.16.1 9 | fsspec==2024.12.0 10 | h11==0.14.0 11 | huggingface-hub==0.27.0 12 | idna==3.10 13 | Jinja2==3.1.5 14 | joblib==1.4.2 15 | MarkupSafe==3.0.2 16 | mpmath==1.3.0 17 | networkx==3.2.1 18 | numpy==2.0.2 19 | packaging==24.2 20 | pillow==11.0.0 21 | psycopg2==2.9.10 22 | pydantic==2.10.4 23 | pydantic_core==2.27.2 24 | PyYAML==6.0.2 25 | regex==2024.11.6 26 | requests==2.32.3 27 | safetensors==0.4.5 28 | scikit-learn==1.6.0 29 | scipy==1.13.1 30 | sentence-transformers==3.3.1 31 | sniffio==1.3.1 32 | starlette==0.41.3 33 | sympy==1.13.1 34 | threadpoolctl==3.5.0 35 | tokenizers==0.21.0 36 | torch==2.5.1 37 | tqdm==4.67.1 38 | transformers==4.47.1 39 | typing_extensions==4.12.2 40 | urllib3==2.3.0 41 | uvicorn==0.34.0 42 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AI Search Drift Detection and monitoring system 2 | 3 | ### Python application that measures drift on a models responses to user queries. 4 | 5 | This python application is a framework for drift detection on an LLM and consequential messaging for low tensors scores. 6 | 7 | 8 | ``` 9 | Stack: 10 | >sentence-transformers 11 | >fastapi 12 | >psychopg2 13 | >postgresql 14 | >tbd messaging service 15 | >next.js 16 | >tailwind 17 | ``` 18 | 19 | Note: Current build does not use an LLM and does not directly take user queries, instead it is placeholder for the end to end process being established in an mvp.' 20 | 21 | Todo: messaging, docker container, implement next.js and tailwind, drift data viz, add actual llm to detect drift against 22 | 23 | 24 | ##### to use: 25 | ``` 26 | uvicorn app.main:app --reload 27 | ``` 28 | 29 | ##### load: 30 | ``` 31 | http://127.0.0.1:8000 32 | ``` 33 | 34 | ##### returns 35 | ``` 36 | {"message": "Welcome to the AI Search Drift Measuring API!"} 37 | 38 | ``` -------------------------------------------------------------------------------- /frontend/public/globe.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /aisearch_cdk/app.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os 3 | 4 | import aws_cdk as cdk 5 | 6 | from aisearch_cdk.aisearch_cdk_stack import AisearchCdkStack 7 | 8 | 9 | app = cdk.App() 10 | AisearchCdkStack(app, "AisearchCdkStack", 11 | # If you don't specify 'env', this stack will be environment-agnostic. 12 | # Account/Region-dependent features and context lookups will not work, 13 | # but a single synthesized template can be deployed anywhere. 14 | 15 | # Uncomment the next line to specialize this stack for the AWS Account 16 | # and Region that are implied by the current CLI configuration. 17 | 18 | #env=cdk.Environment(account=os.getenv('CDK_DEFAULT_ACCOUNT'), region=os.getenv('CDK_DEFAULT_REGION')), 19 | 20 | # Uncomment the next line if you know exactly what Account and Region you 21 | # want to deploy the stack to. */ 22 | 23 | #env=cdk.Environment(account='123456789012', region='us-east-1'), 24 | 25 | # For more information, see https://docs.aws.amazon.com/cdk/latest/guide/environments.html 26 | ) 27 | 28 | app.synth() 29 | -------------------------------------------------------------------------------- /frontend/public/next.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /frontend/README.md: -------------------------------------------------------------------------------- 1 | This is a [Next.js](https://nextjs.org) project bootstrapped with [`create-next-app`](https://github.com/vercel/next.js/tree/canary/packages/create-next-app). 2 | 3 | ## Getting Started 4 | 5 | First, run the development server: 6 | 7 | ```bash 8 | npm run dev 9 | # or 10 | yarn dev 11 | # or 12 | pnpm dev 13 | # or 14 | bun dev 15 | ``` 16 | 17 | Open [http://localhost:3000](http://localhost:3000) with your browser to see the result. 18 | 19 | You can start editing the page by modifying `app/page.js`. The page auto-updates as you edit the file. 20 | 21 | This project uses [`next/font`](https://nextjs.org/docs/app/building-your-application/optimizing/fonts) to automatically optimize and load [Geist](https://vercel.com/font), a new font family for Vercel. 22 | 23 | ## Learn More 24 | 25 | To learn more about Next.js, take a look at the following resources: 26 | 27 | - [Next.js Documentation](https://nextjs.org/docs) - learn about Next.js features and API. 28 | - [Learn Next.js](https://nextjs.org/learn) - an interactive Next.js tutorial. 29 | 30 | You can check out [the Next.js GitHub repository](https://github.com/vercel/next.js) - your feedback and contributions are welcome! 31 | 32 | ## Deploy on Vercel 33 | 34 | The easiest way to deploy your Next.js app is to use the [Vercel Platform](https://vercel.com/new?utm_medium=default-template&filter=next.js&utm_source=create-next-app&utm_campaign=create-next-app-readme) from the creators of Next.js. 35 | 36 | Check out our [Next.js deployment documentation](https://nextjs.org/docs/app/building-your-application/deploying) for more details. 37 | -------------------------------------------------------------------------------- /aisearch_cdk/README.md: -------------------------------------------------------------------------------- 1 | 2 | # Welcome to your CDK Python project! 3 | 4 | This is a blank project for CDK development with Python. 5 | 6 | The `cdk.json` file tells the CDK Toolkit how to execute your app. 7 | 8 | This project is set up like a standard Python project. The initialization 9 | process also creates a virtualenv within this project, stored under the `.venv` 10 | directory. To create the virtualenv it assumes that there is a `python3` 11 | (or `python` for Windows) executable in your path with access to the `venv` 12 | package. If for any reason the automatic creation of the virtualenv fails, 13 | you can create the virtualenv manually. 14 | 15 | To manually create a virtualenv on MacOS and Linux: 16 | 17 | ``` 18 | $ python3 -m venv .venv 19 | ``` 20 | 21 | After the init process completes and the virtualenv is created, you can use the following 22 | step to activate your virtualenv. 23 | 24 | ``` 25 | $ source .venv/bin/activate 26 | ``` 27 | 28 | If you are a Windows platform, you would activate the virtualenv like this: 29 | 30 | ``` 31 | % .venv\Scripts\activate.bat 32 | ``` 33 | 34 | Once the virtualenv is activated, you can install the required dependencies. 35 | 36 | ``` 37 | $ pip install -r requirements.txt 38 | ``` 39 | 40 | At this point you can now synthesize the CloudFormation template for this code. 41 | 42 | ``` 43 | $ cdk synth 44 | ``` 45 | 46 | To add additional dependencies, for example other CDK libraries, just add 47 | them to your `setup.py` file and rerun the `pip install -r requirements.txt` 48 | command. 49 | 50 | ## Useful commands 51 | 52 | * `cdk ls` list all stacks in the app 53 | * `cdk synth` emits the synthesized CloudFormation template 54 | * `cdk deploy` deploy this stack to your default AWS account/region 55 | * `cdk diff` compare deployed stack with current state 56 | * `cdk docs` open CDK documentation 57 | 58 | Enjoy! 59 | -------------------------------------------------------------------------------- /scripts/data_loader.py: -------------------------------------------------------------------------------- 1 | import psycopg2 2 | import pickle 3 | from sentence_transformers import SentenceTransformer 4 | import nltk 5 | nltk.download('punkt_tab') 6 | from nltk.tokenize import sent_tokenize 7 | 8 | # Define the corpus 9 | example = """In computer science, lexical analysis, lexing or tokenization is the process of converting a sequence of characters (such as in a computer program or web page) into a sequence of lexical tokens (strings with an assigned and thus identified meaning). 10 | A program that performs lexical analysis may be termed a lexer, tokenizer, or scanner, although scanner is also a term for the first stage of a lexer. A lexer is generally combined with a parser, which together analyze the syntax of programming languages, web pages, and so forth.""" 11 | 12 | # Split the sentences into a list 13 | #corpus = example.splitlines() 14 | 15 | #tokenize the text into sentences using nltk sent_tokenize 16 | corpus = sent_tokenize(example) 17 | 18 | print(f"Stored sentence: {corpus}") 19 | 20 | # Connect to PostgreSQL 21 | conn = psycopg2.connect( 22 | dbname="aisearch", 23 | user="home", 24 | password="12345", 25 | host="localhost", 26 | port="5432" 27 | ) 28 | cursor = conn.cursor() 29 | 30 | # Load the model 31 | model = SentenceTransformer('all-MiniLM-L6-v2') 32 | 33 | # Add corpus to the database 34 | for sentence in corpus: 35 | try: 36 | # Encode the sentence 37 | embedding = model.encode(sentence) 38 | serialized_embedding = pickle.dumps(embedding) 39 | 40 | print(f"Storing sentence: {sentence}") 41 | 42 | # Insert into database 43 | cursor.execute( 44 | "INSERT INTO embeddings (text, embedding) VALUES (%s, %s)", 45 | (sentence, serialized_embedding) 46 | ) 47 | 48 | print(f"Stored sentence: {sentence}") 49 | except Exception as e: 50 | print(f"Error storing sentence: {sentence} -> {str(e)}") 51 | continue 52 | 53 | # Commit and close connection 54 | conn.commit() 55 | cursor.close() 56 | conn.close() 57 | 58 | print("Corpus added to the database!") 59 | -------------------------------------------------------------------------------- /app/services/nlp.py: -------------------------------------------------------------------------------- 1 | 2 | #get_ipython().system('pip install -U sentence-transformers') 3 | from sentence_transformers import util 4 | 5 | 6 | # Define a passage of text. 7 | example_text = """In computer science, lexical analysis, lexing or tokenization is the process of converting a sequence of characters (such as in a computer program or web page) into a sequence of lexical tokens (strings with an assigned and thus identified meaning). 8 | A program that performs lexical analysis may be termed a lexer, tokenizer, or scanner, although scanner is also a term for the first stage of a lexer. 9 | A lexer is generally combined with a parser, which together analyze the syntax of programming languages, web pages, and so forth. 10 | """ 11 | 12 | # Split the sentences in the text. 13 | sentences = example_text.splitlines() 14 | #sentences 15 | 16 | 17 | # Import the SentenceTransformer class from the sentence_transformers module and use the `all-MiniLM-L6-v2` model. 18 | from sentence_transformers import SentenceTransformer 19 | model = SentenceTransformer('all-MiniLM-L6-v2') 20 | 21 | 22 | # Get the vector embeddings for the sentences. 23 | search_index = model.encode(sentences) 24 | #print(search_index) 25 | 26 | 27 | 28 | # Each sentence has its own vector. 29 | print(len(search_index)) 30 | # Get the vector length for the first sentence 31 | print(len(search_index[0])) 32 | 33 | #start 34 | # Define potential answers to the query 35 | potential_answers = [ 36 | "Transformers are important for AI search.", 37 | "Tokenization converts text into tokens.", 38 | "Attention mechanisms are used in transformers." 39 | ] 40 | 41 | # Create a query and encode the query with the model. 42 | 43 | query = "Why are transformers important for search?" 44 | query_embedding = model.encode([query]) 45 | answers_embeddings = model.encode(potential_answers) 46 | 47 | # Loop through each potential answer and calculate cosine similarity 48 | for i, answer_embedding in enumerate(answers_embeddings): 49 | cosine_similarity_score = util.cos_sim(answer_embedding, query_embedding) 50 | print(f"Query: {query}") 51 | print(f"Potential Answer {i + 1}: {potential_answers[i]}") 52 | print(f"Similarity score: {cosine_similarity_score.item()}") 53 | print() 54 | 55 | 56 | 57 | 58 | # Loop through the sentence embeddings and compare each sentence embedding with our query embedding. 59 | # for i in range(len(search_index)): 60 | # index_embedding = search_index[i] 61 | # cosine_similarity_score = util.cos_sim(index_embedding, search_query) 62 | # print(f"Query: {query}") 63 | # print(f"Sentence {i+1}: {sentences[i]}") 64 | # print(f"Similarity score: {cosine_similarity_score}") 65 | # print() 66 | 67 | 68 | -------------------------------------------------------------------------------- /aisearch_cdk/cdk.json: -------------------------------------------------------------------------------- 1 | { 2 | "app": "python3 app.py", 3 | "watch": { 4 | "include": [ 5 | "**" 6 | ], 7 | "exclude": [ 8 | "README.md", 9 | "cdk*.json", 10 | "requirements*.txt", 11 | "source.bat", 12 | "**/__init__.py", 13 | "**/__pycache__", 14 | "tests" 15 | ] 16 | }, 17 | "context": { 18 | "@aws-cdk/aws-lambda:recognizeLayerVersion": true, 19 | "@aws-cdk/core:checkSecretUsage": true, 20 | "@aws-cdk/core:target-partitions": [ 21 | "aws", 22 | "aws-cn" 23 | ], 24 | "@aws-cdk-containers/ecs-service-extensions:enableDefaultLogDriver": true, 25 | "@aws-cdk/aws-ec2:uniqueImdsv2TemplateName": true, 26 | "@aws-cdk/aws-ecs:arnFormatIncludesClusterName": true, 27 | "@aws-cdk/aws-iam:minimizePolicies": true, 28 | "@aws-cdk/core:validateSnapshotRemovalPolicy": true, 29 | "@aws-cdk/aws-codepipeline:crossAccountKeyAliasStackSafeResourceName": true, 30 | "@aws-cdk/aws-s3:createDefaultLoggingPolicy": true, 31 | "@aws-cdk/aws-sns-subscriptions:restrictSqsDescryption": true, 32 | "@aws-cdk/aws-apigateway:disableCloudWatchRole": true, 33 | "@aws-cdk/core:enablePartitionLiterals": true, 34 | "@aws-cdk/aws-events:eventsTargetQueueSameAccount": true, 35 | "@aws-cdk/aws-ecs:disableExplicitDeploymentControllerForCircuitBreaker": true, 36 | "@aws-cdk/aws-iam:importedRoleStackSafeDefaultPolicyName": true, 37 | "@aws-cdk/aws-s3:serverAccessLogsUseBucketPolicy": true, 38 | "@aws-cdk/aws-route53-patters:useCertificate": true, 39 | "@aws-cdk/customresources:installLatestAwsSdkDefault": false, 40 | "@aws-cdk/aws-rds:databaseProxyUniqueResourceName": true, 41 | "@aws-cdk/aws-codedeploy:removeAlarmsFromDeploymentGroup": true, 42 | "@aws-cdk/aws-apigateway:authorizerChangeDeploymentLogicalId": true, 43 | "@aws-cdk/aws-ec2:launchTemplateDefaultUserData": true, 44 | "@aws-cdk/aws-secretsmanager:useAttachedSecretResourcePolicyForSecretTargetAttachments": true, 45 | "@aws-cdk/aws-redshift:columnId": true, 46 | "@aws-cdk/aws-stepfunctions-tasks:enableEmrServicePolicyV2": true, 47 | "@aws-cdk/aws-ec2:restrictDefaultSecurityGroup": true, 48 | "@aws-cdk/aws-apigateway:requestValidatorUniqueId": true, 49 | "@aws-cdk/aws-kms:aliasNameRef": true, 50 | "@aws-cdk/aws-autoscaling:generateLaunchTemplateInsteadOfLaunchConfig": true, 51 | "@aws-cdk/core:includePrefixInUniqueNameGeneration": true, 52 | "@aws-cdk/aws-efs:denyAnonymousAccess": true, 53 | "@aws-cdk/aws-opensearchservice:enableOpensearchMultiAzWithStandby": true, 54 | "@aws-cdk/aws-lambda-nodejs:useLatestRuntimeVersion": true, 55 | "@aws-cdk/aws-efs:mountTargetOrderInsensitiveLogicalId": true, 56 | "@aws-cdk/aws-rds:auroraClusterChangeScopeOfInstanceParameterGroupWithEachParameters": true, 57 | "@aws-cdk/aws-appsync:useArnForSourceApiAssociationIdentifier": true, 58 | "@aws-cdk/aws-rds:preventRenderingDeprecatedCredentials": true, 59 | "@aws-cdk/aws-codepipeline-actions:useNewDefaultBranchForCodeCommitSource": true, 60 | "@aws-cdk/aws-cloudwatch-actions:changeLambdaPermissionLogicalIdForLambdaAction": true, 61 | "@aws-cdk/aws-codepipeline:crossAccountKeysDefaultValueToFalse": true, 62 | "@aws-cdk/aws-codepipeline:defaultPipelineTypeToV2": true, 63 | "@aws-cdk/aws-kms:reduceCrossAccountRegionPolicyScope": true, 64 | "@aws-cdk/aws-eks:nodegroupNameAttribute": true, 65 | "@aws-cdk/aws-ec2:ebsDefaultGp3Volume": true, 66 | "@aws-cdk/aws-ecs:removeDefaultDeploymentAlarm": true, 67 | "@aws-cdk/custom-resources:logApiResponseDataPropertyTrueDefault": false, 68 | "@aws-cdk/aws-s3:keepNotificationInImportedBucket": false, 69 | "@aws-cdk/aws-ecs:reduceEc2FargateCloudWatchPermissions": true, 70 | "@aws-cdk/aws-ec2:ec2SumTImeoutEnabled": true, 71 | "@aws-cdk/aws-appsync:appSyncGraphQLAPIScopeLambdaPermission": true, 72 | "@aws-cdk/aws-rds:setCorrectValueForDatabaseInstanceReadReplicaInstanceResourceId": true, 73 | "@aws-cdk/core:cfnIncludeRejectComplexResourceUpdateCreatePolicyIntrinsics": true, 74 | "@aws-cdk/aws-lambda-nodejs:sdkV3ExcludeSmithyPackages": true 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /frontend/app/page.js: -------------------------------------------------------------------------------- 1 | import Image from "next/image"; 2 | 3 | export default function Home() { 4 | return ( 5 |
19 | app/page.js
20 |
21 | .
22 |