├── .env.example ├── .eslintrc.json ├── .gitignore ├── README.md ├── next.config.mjs ├── package-lock.json ├── package.json ├── postcss.config.cjs ├── prettier.config.cjs ├── prisma └── schema.prisma ├── public └── favicon.ico ├── semantic-search-openai-pinecone-input.png ├── semantic-search-openai-pinecone-query.png ├── semantic-search-openai-pinecone.png ├── src ├── components │ ├── Library.tsx │ ├── Loading.tsx │ ├── Main.tsx │ └── Search.tsx ├── env │ ├── client.mjs │ ├── schema.mjs │ └── server.mjs ├── pages │ ├── _app.tsx │ ├── api │ │ ├── auth │ │ │ └── [...nextauth].ts │ │ ├── examples.ts │ │ ├── restricted.ts │ │ └── trpc │ │ │ └── [trpc].ts │ └── index.tsx ├── server │ ├── common │ │ └── get-server-auth-session.ts │ ├── db │ │ └── client.ts │ └── trpc │ │ ├── context.ts │ │ ├── router │ │ ├── _app.ts │ │ ├── auth.ts │ │ ├── library.ts │ │ └── openai-pinecone.ts │ │ └── trpc.ts ├── styles │ └── globals.css ├── types │ └── next-auth.d.ts └── utils │ ├── openai.ts │ ├── pinecone.ts │ └── trpc.ts ├── tailwind.config.cjs └── tsconfig.json /.env.example: -------------------------------------------------------------------------------- 1 | # Since .env is gitignored, you can use .env.example to build a new `.env` file when you clone the repo. 2 | # Keep this file up-to-date when you add new variables to `.env`. 3 | 4 | # This file will be committed to version control, so make sure not to have any secrets in it. 5 | # If you are cloning this repo, create a copy of this file named `.env` and populate it with your secrets. 6 | 7 | # When adding additional env variables, the schema in /env/schema.mjs should be updated accordingly 8 | 9 | # Prisma 10 | # You can either use PlanetScale(MySQL) or neon.tech (Postgres); for neon, sample endpoint would be postgres://mharrvic:xxxxx@ep-xx-xxxx-xxxxx.ap-southeast-1.aws.neon.tech/search 11 | DATABASE_URL= 12 | 13 | # Next Auth 14 | # You can generate the secret via 'openssl rand -base64 32' on Linux 15 | # More info: https://next-auth.js.org/configuration/options#secret 16 | NEXTAUTH_SECRET= 17 | 18 | # Update this endpoint if deployed 19 | NEXTAUTH_URL=http://localhost:3000 20 | 21 | # Next Auth Google Provider 22 | GOOGLE_CLIENT_ID= 23 | GOOGLE_CLIENT_SECRET= 24 | 25 | # https://beta.openai.com/account/api-keys 26 | OPENAI_API_KEY= 27 | 28 | # https://app.pinecone.io/organizations 29 | PINECONE_API_KEY= 30 | 31 | # Creat an index first; put 1534 for dimension; copy the endpoint from the index dashboard, something like semantic-search-xxxxxx.svc.us-west1-gcp.pinecone.io 32 | PINECONE_BASE_URL= 33 | -------------------------------------------------------------------------------- /.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "parser": "@typescript-eslint/parser", 3 | "parserOptions": { 4 | "project": "./tsconfig.json" 5 | }, 6 | "plugins": ["@typescript-eslint"], 7 | "extends": ["next/core-web-vitals", "plugin:@typescript-eslint/recommended"], 8 | "rules": { 9 | "@typescript-eslint/consistent-type-imports": "warn" 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules 5 | /.pnp 6 | .pnp.js 7 | 8 | # testing 9 | /coverage 10 | 11 | # database 12 | /prisma/db.sqlite 13 | /prisma/db.sqlite-journal 14 | 15 | # next.js 16 | /.next/ 17 | /out/ 18 | next-env.d.ts 19 | 20 | # production 21 | /build 22 | 23 | # misc 24 | .DS_Store 25 | *.pem 26 | 27 | # debug 28 | npm-debug.log* 29 | yarn-debug.log* 30 | yarn-error.log* 31 | .pnpm-debug.log* 32 | 33 | # local env files 34 | # do not commit any .env files to git, except for the .env.example file. https://create.t3.gg/en/usage/env-variables#using-environment-variables 35 | .env 36 | .env*.local 37 | 38 | # vercel 39 | .vercel 40 | 41 | # typescript 42 | *.tsbuildinfo 43 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Semantic Search with OpenAI Embeddings and Pinecone 2 | 3 | Demo link: https://semantic-search.mharrvic.com 4 | 5 | You can use this https://public-semantic-search.mharrvic.com/ if you're having authentication issues. 6 | 7 | Repo link: [https://github.com/mharrvic/semantic-search-openai-pinecone](https://github.com/mharrvic/semantic-search-openai-pinecone) 8 | 9 | This is a demo app that shows how to use [OpenAI Embeddings](https://beta.openai.com/docs/guides/embeddings) and [Pinecone](https://pinecone.io) vector database to build a semantic search engine. Based on https://docs.pinecone.io/docs/semantic-text-search. You can run explore this freely using the free tier of Pinecone and OpenAI. 10 | 11 | ![semantic-search-openai-pinecone.png](semantic-search-openai-pinecone.png) 12 | 13 | ## Tech Stack 14 | 15 | - [Next.js](https://nextjs.org) 16 | - [NextAuth.js](https://next-auth.js.org) 17 | - [Prisma](https://prisma.io) 18 | - [Tailwind CSS](https://tailwindcss.com) 19 | - [tRPC](https://trpc.io) 20 | - [Pinecone vector db](https://pinecone.io) 21 | - [OpenAI Embeddings](https://beta.openai.com/docs/guides/embeddings) 22 | - [NeonDB serverless postgres db](neon.tech) 23 | 24 | 25 | ## Demo Video 26 | 27 | 28 | https://user-images.githubusercontent.com/15852818/209995984-25fefed6-956c-4c8e-90c5-e5a3db310c29.mp4 29 | 30 | 31 | 32 | https://user-images.githubusercontent.com/15852818/209996939-06a97708-7bed-4a8e-997a-7df99523727c.mp4 33 | 34 | 35 | 36 | 37 | ## Learn More 38 | 39 | 40 | ## Input record 41 | 42 | ![semantic-search-openai-pinecone-input.png](semantic-search-openai-pinecone-input.png) 43 | 44 | ### Query 45 | 46 | ![semantic-search-openai-pinecone-query.png](semantic-search-openai-pinecone-query.png) 47 | 48 | I recommend to watch this [youtube video](https://www.youtube.com/watch?v=5MaWmXwxFNQ) from AssemblyAI to learn more about embeddings. 49 | -------------------------------------------------------------------------------- /next.config.mjs: -------------------------------------------------------------------------------- 1 | // @ts-check 2 | /** 3 | * Run `build` or `dev` with `SKIP_ENV_VALIDATION` to skip env validation. 4 | * This is especially useful for Docker builds. 5 | */ 6 | !process.env.SKIP_ENV_VALIDATION && (await import("./src/env/server.mjs")); 7 | 8 | /** @type {import("next").NextConfig} */ 9 | const config = { 10 | images: { 11 | domains: ["cdn.discordapp.com"], 12 | }, 13 | reactStrictMode: true, 14 | swcMinify: true, 15 | i18n: { 16 | locales: ["en"], 17 | defaultLocale: "en", 18 | }, 19 | }; 20 | export default config; 21 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "semantic-search", 3 | "version": "0.1.0", 4 | "private": true, 5 | "scripts": { 6 | "build": "next build", 7 | "dev": "next dev", 8 | "postinstall": "prisma generate", 9 | "lint": "next lint", 10 | "start": "next start" 11 | }, 12 | "dependencies": { 13 | "@headlessui/react": "^1.7.7", 14 | "@heroicons/react": "^2.0.13", 15 | "@next-auth/prisma-adapter": "^1.0.5", 16 | "@prisma/client": "^4.5.0", 17 | "@tailwindcss/forms": "^0.5.3", 18 | "@tanstack/react-query": "^4.16.0", 19 | "@trpc/client": "^10.0.0", 20 | "@trpc/next": "^10.0.0", 21 | "@trpc/react-query": "^10.0.0", 22 | "@trpc/server": "^10.0.0", 23 | "clsx": "^1.2.1", 24 | "next": "13.1.1", 25 | "next-auth": "^4.18.3", 26 | "openai": "^3.1.0", 27 | "pinecone-client": "^1.0.0", 28 | "react": "18.2.0", 29 | "react-dom": "18.2.0", 30 | "react-hook-form": "^7.41.2", 31 | "react-hot-toast": "^2.4.0", 32 | "superjson": "1.9.1", 33 | "ulid": "^2.3.0", 34 | "zod": "^3.18.0" 35 | }, 36 | "devDependencies": { 37 | "@types/node": "^18.0.0", 38 | "@types/prettier": "^2.7.2", 39 | "@types/react": "^18.0.14", 40 | "@types/react-dom": "^18.0.5", 41 | "@typescript-eslint/eslint-plugin": "^5.33.0", 42 | "@typescript-eslint/parser": "^5.33.0", 43 | "autoprefixer": "^10.4.7", 44 | "eslint": "^8.26.0", 45 | "eslint-config-next": "13.1.1", 46 | "postcss": "^8.4.14", 47 | "prettier": "^2.8.1", 48 | "prettier-plugin-tailwindcss": "^0.2.1", 49 | "prisma": "^4.5.0", 50 | "tailwindcss": "^3.2.0", 51 | "typescript": "^4.8.4" 52 | }, 53 | "ct3aMetadata": { 54 | "initVersion": "6.11.6" 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /postcss.config.cjs: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | plugins: { 3 | tailwindcss: {}, 4 | autoprefixer: {}, 5 | }, 6 | }; 7 | -------------------------------------------------------------------------------- /prettier.config.cjs: -------------------------------------------------------------------------------- 1 | /** @type {import("prettier").Config} */ 2 | module.exports = { 3 | plugins: [require.resolve("prettier-plugin-tailwindcss")], 4 | }; 5 | -------------------------------------------------------------------------------- /prisma/schema.prisma: -------------------------------------------------------------------------------- 1 | // This is your Prisma schema file, 2 | // learn more about it in the docs: https://pris.ly/d/prisma-schema 3 | 4 | generator client { 5 | provider = "prisma-client-js" 6 | } 7 | 8 | datasource db { 9 | provider = "postgresql" 10 | // NOTE: When using postgresql, mysql or sqlserver, uncomment the @db.Text annotations in model Account below 11 | // Further reading: 12 | // https://next-auth.js.org/adapters/prisma#create-the-prisma-schema 13 | // https://www.prisma.io/docs/reference/api-reference/prisma-schema-reference#string 14 | url = env("DATABASE_URL") 15 | } 16 | 17 | model Example { 18 | id String @id @default(cuid()) 19 | createdAt DateTime @default(now()) 20 | updatedAt DateTime @updatedAt 21 | } 22 | 23 | // Necessary for Next auth 24 | model Account { 25 | id String @id @default(cuid()) 26 | userId String 27 | type String 28 | provider String 29 | providerAccountId String 30 | refresh_token String? // @db.Text 31 | access_token String? // @db.Text 32 | expires_at Int? 33 | token_type String? 34 | scope String? 35 | id_token String? // @db.Text 36 | session_state String? 37 | user User @relation(fields: [userId], references: [id], onDelete: Cascade) 38 | 39 | @@unique([provider, providerAccountId]) 40 | } 41 | 42 | model Session { 43 | id String @id @default(cuid()) 44 | sessionToken String @unique 45 | userId String 46 | expires DateTime 47 | user User @relation(fields: [userId], references: [id], onDelete: Cascade) 48 | } 49 | 50 | model User { 51 | id String @id @default(cuid()) 52 | name String? 53 | email String? @unique 54 | emailVerified DateTime? 55 | image String? 56 | accounts Account[] 57 | sessions Session[] 58 | libraries Library[] 59 | } 60 | 61 | model VerificationToken { 62 | identifier String 63 | token String @unique 64 | expires DateTime 65 | 66 | @@unique([identifier, token]) 67 | } 68 | 69 | model Library { 70 | id String @id @default(cuid()) 71 | embeddingId String 72 | createdAt DateTime @default(now()) 73 | updatedAt DateTime @updatedAt 74 | user User @relation(fields: [userId], references: [id]) 75 | userId String 76 | title String 77 | description String 78 | 79 | @@index([userId]) 80 | } 81 | -------------------------------------------------------------------------------- /public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mharrvic/semantic-search-openai-pinecone/83d0f20ee9a88c91a2f18973e1d5fcbe799f121d/public/favicon.ico -------------------------------------------------------------------------------- /semantic-search-openai-pinecone-input.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mharrvic/semantic-search-openai-pinecone/83d0f20ee9a88c91a2f18973e1d5fcbe799f121d/semantic-search-openai-pinecone-input.png -------------------------------------------------------------------------------- /semantic-search-openai-pinecone-query.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mharrvic/semantic-search-openai-pinecone/83d0f20ee9a88c91a2f18973e1d5fcbe799f121d/semantic-search-openai-pinecone-query.png -------------------------------------------------------------------------------- /semantic-search-openai-pinecone.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mharrvic/semantic-search-openai-pinecone/83d0f20ee9a88c91a2f18973e1d5fcbe799f121d/semantic-search-openai-pinecone.png -------------------------------------------------------------------------------- /src/components/Library.tsx: -------------------------------------------------------------------------------- 1 | import { trpc } from "../utils/trpc"; 2 | 3 | import type { SubmitHandler } from "react-hook-form"; 4 | import { useForm } from "react-hook-form"; 5 | import { LoadingResults } from "./Loading"; 6 | 7 | import toast from "react-hot-toast"; 8 | 9 | type Inputs = { 10 | title: string; 11 | text: string; 12 | }; 13 | 14 | const Library = () => { 15 | const { register, handleSubmit, reset } = useForm(); 16 | 17 | const { mutate, isLoading } = trpc.openAiPinecone.upsertEmbedding.useMutation( 18 | { 19 | onSuccess: () => { 20 | toast.remove(); 21 | toast.success("Saved!"); 22 | reset(); 23 | }, 24 | onError: () => { 25 | toast.error("Error!"); 26 | }, 27 | } 28 | ); 29 | 30 | const { 31 | data: libraryData, 32 | isLoading: libraryLoading, 33 | refetch, 34 | } = trpc.library.getMyLibrary.useQuery(); 35 | 36 | const onSubmit: SubmitHandler = async (data) => { 37 | toast.loading("Saving..."); 38 | mutate({ text: data.text, title: data.title }); 39 | refetch(); 40 | }; 41 | 42 | return ( 43 | <> 44 |
45 | 48 |
49 | 52 | 61 | 64 |