├── data-pipeline ├── ollama │ ├── docker-compose.yml │ ├── 3-semantic_search.py │ ├── 4-generative_search.py │ ├── 2-populate.py │ ├── 1-create_collection.py │ └── README.md ├── weaviate-embeddings-and-friendliai │ ├── 3-semantic_search.py │ ├── 4-generative_search.py │ ├── 2-populate.py │ └── 1-create_collection.py └── openai │ ├── 3-semantic_search.py │ ├── 4-generative_search.py │ ├── 2-populate.py │ ├── 1-create_collection.py │ └── README.md ├── .eslintrc.json ├── BookRecs.gif ├── .tutorial ├── video.json ├── 05-nextjs-app.md ├── 10-conclusion.md ├── 07-define-the-api.md ├── 08-recommendation-grid.md ├── 09-modal-view.md ├── 01-welcome.md ├── 04-basic-search-queries.md ├── 06-input-form.md ├── 03-loading-data.md └── 02-accounts-and-environment.md ├── public ├── favicon.ico ├── newsletter-subscribe.jpg └── replit.svg ├── blog-resources ├── article05.zip ├── postman-weaviate.zip ├── Search-With-Benefits.swiftpm │ ├── Assets.xcassets │ │ ├── Contents.json │ │ └── AppIcon.appiconset │ │ │ ├── AppIcon.png │ │ │ └── Contents.json │ ├── .swiftpm │ │ └── playgrounds │ │ │ ├── DocumentThumbnail.png │ │ │ ├── Workspace.plist │ │ │ ├── DocumentThumbnail.plist │ │ │ └── CachedManifest.plist │ ├── MyApp.swift │ ├── Package.swift │ ├── ContentView.swift │ └── SearchWithBenefitsViewModel.swift └── calcVolume.py ├── postcss.config.js ├── vercel.json ├── next.config.js ├── replit.nix ├── lib └── utils.ts ├── next-env.d.ts ├── .env.example ├── components.json ├── tsconfig.json ├── types.ts ├── pages ├── _app.tsx ├── api │ └── recommendations.ts └── index.tsx ├── components └── ui │ ├── input.tsx │ ├── tooltip.tsx │ ├── badge.tsx │ ├── popover.tsx │ ├── button.tsx │ ├── card.tsx │ └── dialog.tsx ├── package.json ├── LICENSE ├── .gitignore ├── styles ├── globals.css └── Home.module.css ├── tailwind.config.js ├── .replit ├── requirements.txt └── README.md /data-pipeline/ollama/docker-compose.yml: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "next/core-web-vitals" 3 | } 4 | -------------------------------------------------------------------------------- /BookRecs.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/BookRecs/HEAD/BookRecs.gif -------------------------------------------------------------------------------- /.tutorial/video.json: -------------------------------------------------------------------------------- 1 | { "embedUrl": "https://www.youtube.com/embed/ASZVEe2WkI4" } 2 | -------------------------------------------------------------------------------- /public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/BookRecs/HEAD/public/favicon.ico -------------------------------------------------------------------------------- /blog-resources/article05.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/BookRecs/HEAD/blog-resources/article05.zip -------------------------------------------------------------------------------- /public/newsletter-subscribe.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/BookRecs/HEAD/public/newsletter-subscribe.jpg -------------------------------------------------------------------------------- /blog-resources/postman-weaviate.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/BookRecs/HEAD/blog-resources/postman-weaviate.zip -------------------------------------------------------------------------------- /postcss.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | plugins: { 3 | tailwindcss: {}, 4 | autoprefixer: {}, 5 | }, 6 | } 7 | -------------------------------------------------------------------------------- /vercel.json: -------------------------------------------------------------------------------- 1 | { 2 | "functions": { 3 | "pages/api/*.ts": { 4 | "maxDuration": 300 5 | } 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /next.config.js: -------------------------------------------------------------------------------- 1 | /** @type {import('next').NextConfig} */ 2 | const nextConfig = { 3 | reactStrictMode: true, 4 | } 5 | 6 | module.exports = nextConfig 7 | -------------------------------------------------------------------------------- /blog-resources/Search-With-Benefits.swiftpm/Assets.xcassets/Contents.json: -------------------------------------------------------------------------------- 1 | { 2 | "info" : { 3 | "author" : "xcode", 4 | "version" : 1 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /replit.nix: -------------------------------------------------------------------------------- 1 | { pkgs }: { 2 | deps = [ 3 | pkgs.nodejs-18_x 4 | pkgs.nodePackages.typescript-language-server 5 | pkgs.nodePackages.yarn 6 | pkgs.replitPackages.jest 7 | ]; 8 | } -------------------------------------------------------------------------------- /lib/utils.ts: -------------------------------------------------------------------------------- 1 | import { type ClassValue, clsx } from "clsx" 2 | import { twMerge } from "tailwind-merge" 3 | 4 | export function cn(...inputs: ClassValue[]) { 5 | return twMerge(clsx(inputs)) 6 | } 7 | -------------------------------------------------------------------------------- /blog-resources/Search-With-Benefits.swiftpm/.swiftpm/playgrounds/DocumentThumbnail.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/BookRecs/HEAD/blog-resources/Search-With-Benefits.swiftpm/.swiftpm/playgrounds/DocumentThumbnail.png -------------------------------------------------------------------------------- /blog-resources/Search-With-Benefits.swiftpm/Assets.xcassets/AppIcon.appiconset/AppIcon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/BookRecs/HEAD/blog-resources/Search-With-Benefits.swiftpm/Assets.xcassets/AppIcon.appiconset/AppIcon.png -------------------------------------------------------------------------------- /next-env.d.ts: -------------------------------------------------------------------------------- 1 | /// 2 | /// 3 | 4 | // NOTE: This file should not be edited 5 | // see https://nextjs.org/docs/basic-features/typescript for more information. 6 | -------------------------------------------------------------------------------- /blog-resources/Search-With-Benefits.swiftpm/MyApp.swift: -------------------------------------------------------------------------------- 1 | import SwiftUI 2 | 3 | @main 4 | struct MyApp: App { 5 | var body: some Scene { 6 | WindowGroup { 7 | ContentView() 8 | } 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /blog-resources/calcVolume.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | # Sample calculation: Calculate the volume of a sphere with radius 5 4 | radius = 5 5 | volume = (4/3) * np.pi * (radius**3) 6 | 7 | # Print the volume with a label 8 | print("Volume of the sphere:", volume) -------------------------------------------------------------------------------- /.env.example: -------------------------------------------------------------------------------- 1 | WEAVIATE_API_KEY=INSERT_WEAVIATE_API_OPTIONAL_FOR_THIS_PROJECT 2 | WEAVIATE_CLUSTER_URL=INSERT_WEAVIATE_CLUSTER_URL_WITHOUT_HTTPS_OPTIONAL_FOR_THIS_PROJECT 3 | OPENAI_API_KEY=INSERT_OPENAI_API_KEY 4 | COHERE_API_KEY=INSERT_OPEN_API_KEY_HERE 5 | NEXT_PUBLIC_COHERE_CONFIGURED=INCLUDE_ONLY_IF_COHERE_IS_USED -------------------------------------------------------------------------------- /blog-resources/Search-With-Benefits.swiftpm/Assets.xcassets/AppIcon.appiconset/Contents.json: -------------------------------------------------------------------------------- 1 | { 2 | "images" : [ 3 | { 4 | "filename" : "AppIcon.png", 5 | "idiom" : "universal", 6 | "platform" : "ios", 7 | "size" : "1024x1024" 8 | } 9 | ], 10 | "info" : { 11 | "author" : "xcode", 12 | "version" : 1 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /components.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://ui.shadcn.com/schema.json", 3 | "style": "default", 4 | "rsc": false, 5 | "tsx": true, 6 | "tailwind": { 7 | "config": "tailwind.config.js", 8 | "css": "styles/globals.css", 9 | "baseColor": "slate", 10 | "cssVariables": true 11 | }, 12 | "aliases": { 13 | "utils": "@/lib/utils", 14 | "components": "@/components" 15 | } 16 | } -------------------------------------------------------------------------------- /public/replit.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /blog-resources/Search-With-Benefits.swiftpm/.swiftpm/playgrounds/Workspace.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | AppSettings 6 | 7 | appIconPlaceholderGlyphName 8 | clock 9 | appSettingsVersion 10 | 1 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /blog-resources/Search-With-Benefits.swiftpm/.swiftpm/playgrounds/DocumentThumbnail.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | DocumentThumbnailConfiguration 6 | 7 | accentColorHash 8 | 9 | MFe13OMFARMJ8HDqD5bDNSWxDg9LDdv8oq4TvGw4ZwM= 10 | 11 | appIconHash 12 | 13 | zgiDzGOmK0doOEL4mKqCc7ScHAx/T/IChBbB2SD0v60= 14 | 15 | thumbnailIsPrerendered 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /.tutorial/05-nextjs-app.md: -------------------------------------------------------------------------------- 1 | ## Setting Up the NextJS Application 2 | 3 | ### Project Creation and Configuration 4 | I used `create-next-app` to create the NextJS project and configured TailwindCSS from the start for styling. 5 | 6 | ### Application Interface 7 | The application interface is kept as simple as possible to limit complexity: 8 | 9 | 1. **Input Field**: To receive the prompt from the user. 10 | 2. **Grid View**: To show recommended books. 11 | 3. **Modal Overlay**: Acts as a single book view where we can read the description and book details. 12 | 13 | --- 14 | *Note: The next section of the tutorial will include detailed steps on configuring and running the NextJS application.* 15 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "es5", 4 | "lib": ["dom", "dom.iterable", "esnext"], 5 | "allowJs": true, 6 | "skipLibCheck": true, 7 | "strict": true, 8 | "forceConsistentCasingInFileNames": true, 9 | "noEmit": true, 10 | "esModuleInterop": true, 11 | "module": "esnext", 12 | "moduleResolution": "node", 13 | "resolveJsonModule": true, 14 | "isolatedModules": true, 15 | "jsx": "preserve", 16 | "incremental": true, 17 | "paths": { 18 | "types": ["./types.ts"], 19 | "@/*": ["./*"] 20 | 21 | } 22 | }, 23 | "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx"], 24 | "exclude": ["node_modules"] 25 | } 26 | -------------------------------------------------------------------------------- /types.ts: -------------------------------------------------------------------------------- 1 | export interface Book { 2 | isbn13: string; 3 | isbn10: string; 4 | title: string; 5 | subtitle: string; 6 | authors: string; 7 | categories: string; 8 | thumbnail: string; 9 | description: string; 10 | published_year: string; 11 | average_rating: string; 12 | num_pages: string; 13 | ratings_count: string; 14 | _additional: AdditionalType; 15 | } 16 | 17 | 18 | export interface NearTextType { 19 | concepts: [string] | []; 20 | certainty?: number; 21 | moveAwayFrom?: object; 22 | } 23 | 24 | export interface AdditionalType { 25 | generate: GenerateType 26 | } 27 | 28 | export interface GenerateType { 29 | error: string; 30 | singleResult: string; 31 | } -------------------------------------------------------------------------------- /pages/_app.tsx: -------------------------------------------------------------------------------- 1 | import '../styles/globals.css' 2 | import type { AppProps } from 'next/app' 3 | import Script from "next/script"; 4 | 5 | function MyApp({ Component, pageProps }: AppProps) { 6 | return ( 7 | <> 8 | 9 | 10 | 18 | 19 | 20 | 21 | ) 22 | 23 | } 24 | 25 | export default MyApp 26 | -------------------------------------------------------------------------------- /data-pipeline/ollama/3-semantic_search.py: -------------------------------------------------------------------------------- 1 | import weaviate 2 | from weaviate.classes.init import AdditionalConfig, Timeout 3 | 4 | client = weaviate.connect_to_local(additional_config=AdditionalConfig( 5 | timeout=Timeout(init=2, query=200, insert=120) # Values in seconds 6 | )) 7 | 8 | print(client.is_connected()) 9 | 10 | book_collection = client.collections.get(name="Book") 11 | 12 | # Semantic Search 13 | 14 | user_input = input("What query do you have for book recommendations? ") 15 | 16 | response = book_collection.query.near_text( 17 | query=user_input, 18 | limit=3 19 | ) 20 | 21 | print(f"Here are the recommended books for you based on your interest in {user_input}:") 22 | for book in response.objects: 23 | print(f"Book Title: {book.properties['title']}") 24 | print(f"Book Description: {book.properties['description']}") 25 | print('---\n\n\n') 26 | 27 | client.close() -------------------------------------------------------------------------------- /components/ui/input.tsx: -------------------------------------------------------------------------------- 1 | import * as React from "react" 2 | 3 | import { cn } from "@/lib/utils" 4 | 5 | export interface InputProps 6 | extends React.InputHTMLAttributes {} 7 | 8 | const Input = React.forwardRef( 9 | ({ className, type, ...props }, ref) => { 10 | return ( 11 | 20 | ) 21 | } 22 | ) 23 | Input.displayName = "Input" 24 | 25 | export { Input } 26 | -------------------------------------------------------------------------------- /.tutorial/10-conclusion.md: -------------------------------------------------------------------------------- 1 | ## Conclusion 2 | 3 | That's all there really is to using Weaviate to create a simple recommendation system in a NextJS application. 4 | 5 | ### Demo 6 | Let's put in a simple query like "I want to learn about basketball" and see what we get. This is awesome! We have several books related to Basketball, as expected, and when we click 'Learn More', we get the modal view for our book. 7 | 8 | ### Recap 9 | With just the logic you've seen in this video, we can create a fairly simple, yet robust recommendation system using Weaviate's vector database. 10 | 11 | ## Thank You! 12 | Thanks for watching this video, we hope you found it informative! Happy building with Weaviate, and if you have any questions, please share with us on our Community Slack, details in the description below! See you next time! 13 | 14 | --- 15 | 16 | *Note: Feel free to customize the application and add more features as per your requirement.* 17 | 18 | -------------------------------------------------------------------------------- /data-pipeline/weaviate-embeddings-and-friendliai/3-semantic_search.py: -------------------------------------------------------------------------------- 1 | import os 2 | import weaviate 3 | from weaviate.classes.init import Auth 4 | from weaviate.classes.init import AdditionalConfig, Timeout 5 | 6 | WEAVIATE_CLUSTER_URL = os.getenv('WEAVIATE_CLUSTER_URL') or 'https://zxzyqcyksbw7ozpm5yowa.c0.us-west2.gcp.weaviate.cloud' 7 | WEAVIATE_API_KEY = os.getenv('WEAVIATE_API_KEY') or 'n6mdfI32xrXF3DH76i8Pwc2IajzLZop2igb6' 8 | 9 | 10 | client = weaviate.connect_to_weaviate_cloud( 11 | cluster_url=WEAVIATE_CLUSTER_URL, 12 | auth_credentials=Auth.api_key(WEAVIATE_API_KEY), 13 | ) 14 | 15 | print(client.is_connected()) 16 | 17 | book_collection = client.collections.get(name="WeaviateEmbeddingBooks") 18 | 19 | # Semantic Search 20 | 21 | response = book_collection.query.near_text( 22 | query="biology", 23 | limit=3 24 | ) 25 | 26 | print() 27 | for book in response.objects: 28 | print(book.properties['title']) 29 | print(book.properties['description']) 30 | print(book.properties['categories']) 31 | print('---') 32 | -------------------------------------------------------------------------------- /data-pipeline/openai/3-semantic_search.py: -------------------------------------------------------------------------------- 1 | import os 2 | import weaviate 3 | from weaviate.classes.init import AdditionalConfig, Timeout 4 | 5 | WEAVIATE_CLUSTER_URL = os.getenv('WEAVIATE_CLUSTER_URL') or 'https://zxzyqcyksbw7ozpm5yowa.c0.us-west2.gcp.weaviate.cloud' 6 | WEAVIATE_API_KEY = os.getenv('WEAVIATE_API_KEY') or 'n6mdfI32xrXF3DH76i8Pwc2IajzLZop2igb6' 7 | OPENAI_API_KEY = os.getenv('OPENAI_API_KEY') 8 | 9 | client = weaviate.Client( 10 | url=WEAVIATE_CLUSTER_URL, 11 | auth_client_secret=weaviate.AuthApiKey(api_key=WEAVIATE_API_KEY), 12 | additional_headers={"X-OpenAI-Api-Key": OPENAI_API_KEY}) 13 | 14 | print(client.is_connected()) 15 | 16 | book_collection = client.collections.get(name="Book") 17 | 18 | # Semantic Search 19 | 20 | response = book_collection.query.near_text( 21 | query="biology", 22 | limit=3 23 | ) 24 | 25 | print() 26 | for book in response.objects: 27 | print(book.properties['title']) 28 | print(book.properties['description']) 29 | print(book.properties['categories']) 30 | print('---') 31 | -------------------------------------------------------------------------------- /.tutorial/07-define-the-api.md: -------------------------------------------------------------------------------- 1 | ## Setting Up the API Endpoint 2 | 3 | In this section, we will take a look at how we set up the API endpoint on our NextJS app that queries Weaviate on our dataset. 4 | 5 | ### API Directory Structure 6 | In the `./pages` folder, there should be a directory called `api/`. Within it is a `recommendations.ts` file that will be triggered when an HTTP request is sent to `/api/recommendations/`. 7 | 8 | ### API Endpoint 9 | The `recommendations.ts` endpoint performs the following actions: 10 | 11 | 1. **Extract Query**: It will extract the `query` from the request body. 12 | 13 | 2. **Query Weaviate**: It then passes the `query` into the `WeaviateClient` as a `nearText` object and queries the Book vectors through GraphQL. 14 | 15 | 3. **Send Result**: The result is then sent back to the client, which we have already set up to be stored in the state of the NextJS application. 16 | 17 | --- 18 | *Note: The next section of the tutorial will include detailed steps on running the NextJS application and interacting with the user interface.* 19 | -------------------------------------------------------------------------------- /data-pipeline/ollama/4-generative_search.py: -------------------------------------------------------------------------------- 1 | import weaviate 2 | from weaviate.classes.init import AdditionalConfig, Timeout 3 | 4 | client = weaviate.connect_to_local(additional_config=AdditionalConfig( 5 | timeout=Timeout(init=2, query=200, insert=120) # Values in seconds 6 | )) 7 | 8 | print(client.is_connected()) 9 | 10 | book_collection = client.collections.get(name="Book") 11 | 12 | # Generative Search 13 | 14 | user_input = input("What query do you have for book recommendations? ") 15 | 16 | 17 | response = book_collection.generate.near_text( 18 | query=user_input, 19 | limit=2, 20 | single_prompt="Explain why this book might be interesting to read. The book's title is {title}, with a description: {description}, and is in the genre: {categories}." 21 | ) 22 | 23 | 24 | print(f"Here are the recommended books for you based on your interest in {user_input}:") 25 | for book in response.objects: 26 | print(f"Book Title: {book.properties['title']}") 27 | print(f"Book Description: {book.properties['description']}") 28 | print('---\n\n\n') 29 | 30 | client.close() 31 | -------------------------------------------------------------------------------- /components/ui/tooltip.tsx: -------------------------------------------------------------------------------- 1 | import * as React from "react" 2 | import * as TooltipPrimitive from "@radix-ui/react-tooltip" 3 | 4 | import { cn } from "@/lib/utils" 5 | 6 | const TooltipProvider = TooltipPrimitive.Provider 7 | 8 | const Tooltip = TooltipPrimitive.Root 9 | 10 | const TooltipTrigger = TooltipPrimitive.Trigger 11 | 12 | const TooltipContent = React.forwardRef< 13 | React.ElementRef, 14 | React.ComponentPropsWithoutRef 15 | >(({ className, sideOffset = 4, ...props }, ref) => ( 16 | 25 | )) 26 | TooltipContent.displayName = TooltipPrimitive.Content.displayName 27 | 28 | export { Tooltip, TooltipTrigger, TooltipContent, TooltipProvider } 29 | -------------------------------------------------------------------------------- /data-pipeline/openai/4-generative_search.py: -------------------------------------------------------------------------------- 1 | import os 2 | import weaviate 3 | from weaviate.classes.init import AdditionalConfig, Timeout 4 | 5 | from dotenv import load_dotenv 6 | 7 | load_dotenv() 8 | 9 | WEAVIATE_CLUSTER_URL = os.getenv('WEAVIATE_CLUSTER_URL') or 'https://zxzyqcyksbw7ozpm5yowa.c0.us-west2.gcp.weaviate.cloud' 10 | WEAVIATE_API_KEY = os.getenv('WEAVIATE_API_KEY') or 'n6mdfI32xrXF3DH76i8Pwc2IajzLZop2igb6' 11 | OPENAI_API_KEY = os.getenv('OPENAI_API_KEY') 12 | 13 | client = weaviate.Client( 14 | url=WEAVIATE_CLUSTER_URL, 15 | auth_client_secret=weaviate.AuthApiKey(api_key=WEAVIATE_API_KEY), 16 | additional_headers={"X-OpenAI-Api-Key": OPENAI_API_KEY}) 17 | 18 | print(client.is_connected()) 19 | 20 | book_collection = client.collections.get(name="Book") 21 | 22 | # Generative Search 23 | 24 | response = book_collection.generate.near_text( 25 | query="technology, data structures and algorithms, distributed systems", 26 | limit=2, 27 | single_prompt="Explain why this book might be interesting to someone who likes playing the violin, rock climbing, and doing yoga. the book's title is {title}, with a description: {description}, and is in the genre: {categories}." 28 | ) 29 | 30 | 31 | print(response.objects[0].generated) # Inspect the first object 32 | -------------------------------------------------------------------------------- /components/ui/badge.tsx: -------------------------------------------------------------------------------- 1 | import * as React from "react" 2 | import { cva, type VariantProps } from "class-variance-authority" 3 | 4 | import { cn } from "@/lib/utils" 5 | 6 | const badgeVariants = cva( 7 | "inline-flex items-center rounded-full border px-2.5 py-0.5 text-xs font-semibold transition-colors focus:outline-none focus:ring-2 focus:ring-ring focus:ring-offset-2", 8 | { 9 | variants: { 10 | variant: { 11 | default: 12 | "border-transparent bg-primary text-primary-foreground hover:bg-primary/80", 13 | secondary: 14 | "border-transparent bg-secondary text-secondary-foreground hover:bg-secondary/80", 15 | destructive: 16 | "border-transparent bg-destructive text-destructive-foreground hover:bg-destructive/80", 17 | outline: "text-foreground", 18 | }, 19 | }, 20 | defaultVariants: { 21 | variant: "default", 22 | }, 23 | } 24 | ) 25 | 26 | export interface BadgeProps 27 | extends React.HTMLAttributes, 28 | VariantProps {} 29 | 30 | function Badge({ className, variant, ...props }: BadgeProps) { 31 | return ( 32 |
33 | ) 34 | } 35 | 36 | export { Badge, badgeVariants } 37 | -------------------------------------------------------------------------------- /.tutorial/08-recommendation-grid.md: -------------------------------------------------------------------------------- 1 | ## Displaying Recommendations 2 | 3 | Once the semantic search is performed and the results are obtained, the recommended books are displayed on the application interface. The recommendation grid appears with a list of books that come from the semantic search result from our `nearText` query. These are stored in `recommendedBooks` after a query is made. 4 | 5 | ### Rendering Recommendations 6 | In the JSX of `index`, we map through `recommendedBooks` which returns the relevant `divs` that represent the recommendations grid. The grid is styled in a `flex-wrap` `div` so that they expand into the parent container and wrap around when a row is filled. 7 | 8 | ### Displaying Book Details 9 | As we map through those recommendations, we render book details to the screen such as the book thumbnail, the book title, and a button to learn more about the book. 10 | 11 | ### Rendering Modal 12 | When the 'Learn More' button is tapped, a modal is rendered on the screen. There is some logic that will select the book and put it into the state for the modal to display, and then some additional state to force the modal to come on the screen. 13 | 14 | --- 15 | *Note: The next section of the tutorial will include detailed steps on closing the modal and wrapping up the application.* 16 | -------------------------------------------------------------------------------- /.tutorial/09-modal-view.md: -------------------------------------------------------------------------------- 1 | ## Modal Mechanics 2 | 3 | In this section, we will briefly look at how the modal is triggered and what it includes. 4 | 5 | ### Opening the Modal 6 | When the 'Learn More' button is clicked, the `openModal` function is triggered and receives a string that we can use to find the selected book. Once we've found it, we can set a state value for the book that was selected. 7 | 8 | ### Managing Modal Viewability 9 | We'll also have a state variable to manage the viewability of the modal, we'll call it `modalIsOpen`. When this state is true, we render the modal to screen and that render logic is handled in the return portion of the component. 10 | 11 | ### Modal Content 12 | This modal includes a thumbnail of the book, the author details, genre, average rating, published year, and description - these are rendered from the `selectedBook` state variable. Since we also have an International Standard Book Number or ISBN for this book, we can make a naive query against Amazon to show that book on an Amazon search result when clicked. This is mostly just for fun. 13 | 14 | ### Closing the Modal 15 | Lastly, we'll include a `closeModal` button that will close the modal from view. 16 | 17 | --- 18 | *Note: The next section of the tutorial will include detailed steps on finalizing and testing the application.* 19 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "my-app", 3 | "version": "0.1.0", 4 | "private": true, 5 | "scripts": { 6 | "dev": "next dev --port 3000 --hostname 0.0.0.0", 7 | "build": "next build", 8 | "start": "next start --port 3000 --hostname 0.0.0.0", 9 | "lint": "next lint" 10 | }, 11 | "dependencies": { 12 | "@next/third-parties": "^14.1.3", 13 | "@radix-ui/react-dialog": "^1.0.4", 14 | "@radix-ui/react-popover": "^1.0.7", 15 | "@radix-ui/react-slot": "^1.0.2", 16 | "@radix-ui/react-tooltip": "^1.0.7", 17 | "class-variance-authority": "^0.7.0", 18 | "clsx": "^2.0.0", 19 | "lucide-react": "^0.274.0", 20 | "next": "^13.4.9", 21 | "react": "^18.2.0", 22 | "react-dom": "18.2.0", 23 | "react-spinners": "^0.13.8", 24 | "tailwind-merge": "^1.14.0", 25 | "tailwindcss-animate": "^1.0.7", 26 | "weaviate-ts-client": "^1.5.0" 27 | }, 28 | "devDependencies": { 29 | "@types/node": "^20.5.6", 30 | "@types/react": "^18.2.21", 31 | "@types/react-dom": "^18.2.6", 32 | "@types/react-modal": "^3.16.0", 33 | "autoprefixer": "^10.4.15", 34 | "eslint": "^8.44.0", 35 | "eslint-config-next": "^13.4.9", 36 | "postcss": "^8.4.28", 37 | "react-modal": "^3.16.1", 38 | "tailwindcss": "^3.3.3", 39 | "typescript": "^5.1.6" 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /components/ui/popover.tsx: -------------------------------------------------------------------------------- 1 | import * as React from "react" 2 | import * as PopoverPrimitive from "@radix-ui/react-popover" 3 | 4 | import { cn } from "@/lib/utils" 5 | 6 | const Popover = PopoverPrimitive.Root 7 | 8 | const PopoverTrigger = PopoverPrimitive.Trigger 9 | 10 | const PopoverContent = React.forwardRef< 11 | React.ElementRef, 12 | React.ComponentPropsWithoutRef 13 | >(({ className, align = "center", sideOffset = 4, ...props }, ref) => ( 14 | 15 | 25 | 26 | )) 27 | PopoverContent.displayName = PopoverPrimitive.Content.displayName 28 | 29 | export { Popover, PopoverTrigger, PopoverContent } 30 | -------------------------------------------------------------------------------- /data-pipeline/weaviate-embeddings-and-friendliai/4-generative_search.py: -------------------------------------------------------------------------------- 1 | import os 2 | import weaviate 3 | from weaviate.classes.init import Auth 4 | from weaviate.classes.init import AdditionalConfig, Timeout 5 | 6 | from dotenv import load_dotenv 7 | 8 | load_dotenv() 9 | 10 | WEAVIATE_CLUSTER_URL = os.getenv('WEAVIATE_CLUSTER_URL') or 'https://zxzyqcyksbw7ozpm5yowa.c0.us-west2.gcp.weaviate.cloud' 11 | WEAVIATE_API_KEY = os.getenv('WEAVIATE_API_KEY') or 'n6mdfI32xrXF3DH76i8Pwc2IajzLZop2igb6' 12 | FRIENDLIAI_API_KEY = os.getenv('FRIENDLIAI_API_KEY') 13 | 14 | client = weaviate.connect_to_weaviate_cloud( 15 | cluster_url=WEAVIATE_CLUSTER_URL, 16 | auth_credentials=Auth.api_key(WEAVIATE_API_KEY), 17 | additional_headers={"X-Friendli-Api-Key": FRIENDLIAI_API_KEY}) 18 | 19 | print(client.is_connected()) 20 | 21 | book_collection = client.collections.get(name="Book") 22 | 23 | # Generative Search 24 | 25 | response = book_collection.generate.near_text( 26 | query="technology, data structures and algorithms, distributed systems", 27 | limit=2, 28 | single_prompt="Explain why this book might be interesting to someone who likes playing the violin, rock climbing, and doing yoga. the book's title is {title}, with a description: {description}, and is in the genre: {categories}." 29 | ) 30 | 31 | 32 | print(response.objects[0].generated) # Inspect the first object 33 | -------------------------------------------------------------------------------- /.tutorial/01-welcome.md: -------------------------------------------------------------------------------- 1 | Build it With Weaviate 2 | ===== 3 | # Build a Recommendation System using Weaviate 4 | 5 | Welcome to this Build with Weaviate episode where we create a simple recommendation system. 6 | 7 | ## Introduction 8 | In this tutorial, we will build a simple recommendation system using Weaviate, a purpose-built vector database. Recommendation systems have become an essential component of many online applications and services in today's digital age. From personalized product recommendations to content suggestions, these systems help users discover new things and make informed decisions. 9 | 10 | ## Overview 11 | We will perform the following steps: 12 | 1. Create an instance of a Weaviate cluster to get our vector database. 13 | 2. Populate it with about 7000 books from a Kaggle dataset. 14 | 3. Connect to a Large Language Model hosted on OpenAI. 15 | 4. Build a NextJS application to do semantic search over our dataset. 16 | 17 | ## Let’s Try It Out! 18 | Before we dive into building, let’s see what we’re aiming for! Head on over to the deployed URL and type in something you want to learn. The books that are surfaced should contain relevant suggestions based on your query. 19 | 20 | ## Ready to Build this EPIC Project? 21 | If this sounds interesting to you, let’s roll up our sleeves and get started! :muscle: 22 | 23 | --- 24 | 25 | *Note: Detailed steps for each section of the project will be included in the actual tutorial.* 26 | 27 | -------------------------------------------------------------------------------- /blog-resources/Search-With-Benefits.swiftpm/Package.swift: -------------------------------------------------------------------------------- 1 | // swift-tools-version: 5.9 2 | 3 | // WARNING: 4 | // This file is automatically generated. 5 | // Do not edit it by hand because the contents will be replaced. 6 | 7 | import PackageDescription 8 | import AppleProductTypes 9 | 10 | let package = Package( 11 | name: "Search with Benefits", 12 | platforms: [ 13 | .iOS("17.4") 14 | ], 15 | products: [ 16 | .iOSApplication( 17 | name: "Search with Benefits", 18 | targets: ["AppModule"], 19 | displayVersion: "1.0", 20 | bundleVersion: "1", 21 | appIcon: .asset("AppIcon"), 22 | accentColor: .presetColor(.cyan), 23 | supportedDeviceFamilies: [ 24 | .pad, 25 | .phone 26 | ], 27 | supportedInterfaceOrientations: [ 28 | .portrait, 29 | .landscapeRight, 30 | .landscapeLeft, 31 | .portraitUpsideDown(.when(deviceFamilies: [.pad])) 32 | ], 33 | capabilities: [ 34 | .incomingNetworkConnections(), 35 | .outgoingNetworkConnections() 36 | ] 37 | ) 38 | ], 39 | targets: [ 40 | .executableTarget( 41 | name: "AppModule", 42 | path: ".", 43 | swiftSettings: [ 44 | .enableUpcomingFeature("BareSlashRegexLiterals") 45 | ] 46 | ) 47 | ] 48 | ) 49 | -------------------------------------------------------------------------------- /data-pipeline/ollama/2-populate.py: -------------------------------------------------------------------------------- 1 | import os 2 | import csv 3 | import weaviate 4 | import weaviate.classes as wvc 5 | from weaviate.classes.config import Configure, Property, DataType 6 | 7 | 8 | client = weaviate.connect_to_local() 9 | 10 | book_collection = client.collections.get(name="Book") 11 | 12 | f = open("./data-pipeline/7k-books-kaggle.csv", "r") 13 | current_book = None 14 | try: 15 | reader = csv.reader(f) 16 | # Iterate through each row of data 17 | for book in reader: 18 | current_book = book 19 | # 0 - isbn13 20 | # 1 - isbn10 21 | # 2 - title 22 | # 3 - subtitle 23 | # 4 - authors 24 | # 5 - categories 25 | # 6 - thumbnail 26 | # 7 - description 27 | # 8 - published_year 28 | # 9 - average_rating 29 | # 10 - num_pages 30 | # 11 - ratings_count 31 | 32 | properties = { 33 | "isbn13": book[0], 34 | "isbn10": book[1], 35 | "title": book[2], 36 | "subtitle": book[3], 37 | "authors": book[4], 38 | "categories": book[5], 39 | "thumbnail": book[6], 40 | "description": book[7], 41 | "published_year": book[8], 42 | "average_rating": book[9], 43 | "num_pages": book[10], 44 | "ratings_count": book[11], 45 | } 46 | 47 | uuid = book_collection.data.insert(properties) 48 | 49 | print(f"{book[2]}: {uuid}", end='\n') 50 | except Exception as e: 51 | print(f"Exception: {e}.") 52 | 53 | f.close() 54 | client.close() -------------------------------------------------------------------------------- /data-pipeline/ollama/1-create_collection.py: -------------------------------------------------------------------------------- 1 | import weaviate 2 | import weaviate.classes as wvc 3 | import weaviate.classes.config as wc 4 | 5 | client = weaviate.connect_to_local() 6 | client.collections.delete(name="Book") 7 | print(client.is_connected()) 8 | 9 | questions = client.collections.create( 10 | name="Book", 11 | 12 | vectorizer_config=wvc.config.Configure.Vectorizer.text2vec_ollama(model="snowflake-arctic-embed:latest", api_endpoint="http://host.docker.internal:11434"), 13 | generative_config=wvc.config.Configure.Generative.ollama(api_endpoint="http://host.docker.internal:11434", model="llama3:latest"), 14 | properties=[ 15 | wc.Property(name="title", data_type=wc.DataType.TEXT), 16 | wc.Property(name="isbn10", data_type=wc.DataType.TEXT, skip_vectorization=True), 17 | wc.Property(name="isbn13", data_type=wc.DataType.TEXT, skip_vectorization=True), 18 | wc.Property(name="categories", data_type=wc.DataType.TEXT), 19 | wc.Property(name="thumbnail", data_type=wc.DataType.TEXT, skip_vectorization=True), 20 | wc.Property(name="description", data_type=wc.DataType.TEXT), 21 | wc.Property(name="num_pages", data_type=wc.DataType.TEXT, skip_vectorization=True), 22 | wc.Property(name="average_rating", data_type=wc.DataType.TEXT, skip_vectorization=True), 23 | wc.Property(name="published_year", data_type=wc.DataType.TEXT, skip_vectorization=True), 24 | wc.Property(name="authors", data_type=wc.DataType.TEXT, skip_vectorization=True), 25 | ], 26 | ) 27 | 28 | client.close() -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2020-2023, Weaviate B.V. 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | 2. Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | 3. Neither the name of the copyright holder nor the names of its 15 | contributors may be used to endorse or promote products derived from 16 | this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /.tutorial/04-basic-search-queries.md: -------------------------------------------------------------------------------- 1 | ## Running a Semantic Search 2 | 3 | ### Semantic Search 4 | With data in the vector database, we can run a simple semantic search over our embeddings in Weaviate and understand how the data is sent back to us as a result of the query. 5 | 6 | There is a script called `search.py` in the `data-pipeline` directory, which demonstrates a semantic search query. 7 | 8 | 1. We create a Weaviate client object. 9 | 2. We create an object called `nearText` that lists several “concepts” that we want to search against in the vector database. The search will look for semantically related concepts, not exact matches. 10 | 3. The query is executed against the Weaviate client object, and the results are printed to the screen. 11 | 12 | Feel free to change some of the concepts in `nearText` and run the script several times. 13 | 14 | ### Running the Search Script 15 | Running the search script is as simple as running `populate.py`. 16 | 17 | 1. In your shell, run `python ./data-pipeline/search.py`. 18 | 19 | The results should include a large object sent back with a list of books deeply nested in the response. These book results should be semantically similar to the `nearText` concepts passed into the query. 20 | 21 | ### Next Steps 22 | Now that we know how to interact with Weaviate to find semantically similar items, we will see how the NextJS application uses this to surface recommendations to the user in a web interface. 23 | 24 | --- 25 | *Note: The next section of the tutorial will include detailed steps on using the NextJS application to surface recommendations.* 26 | 27 | -------------------------------------------------------------------------------- /.tutorial/06-input-form.md: -------------------------------------------------------------------------------- 1 | ## Application Interface Breakdown 2 | 3 | ### JSX Structure 4 | The JSX for the application contains a form element, which includes a submission button and an input element with various attributes. 5 | 6 | 1. **Input Field**: This is the most interesting part as it receives the user's input. We store that in the `value` attribute as `query`. This `query` variable is a state managed at the top of the `index.js`. When the text in the input field changes, the `setQuery` function is triggered, and the `query` state is updated. This is standard React functionality. If this is new to you, consider taking a course in React / NextJS for a better understanding. 7 | 8 | 2. **Styling**: There are also some class tags that style the input field through the power of TailwindCSS. 9 | 10 | 3. **Form Submission**: The form element has an `onSubmit` event that triggers a function called `getRecommendations` when the submission button is clicked. 11 | 12 | ### getRecommendations Function 13 | Let's take a look at the `getRecommendations` function: 14 | 15 | 1. **Validation**: It starts with some lightweight validation to ensure the user has actually typed something into the input field. 16 | 17 | 2. **API Request**: Then we trigger a fetch call against `/api/recommendations`. 18 | 19 | 3. **Extracting Book Data**: Once we receive a response, we extract the book data from the payload. As seen in the Python search example, we know we can get the book data from `recommendations.data.Get.Book`, and we store it in a state variable. 20 | 21 | With this, we have the book recommendations data in our NextJS client application. 22 | 23 | --- 24 | *Note: The next section of the tutorial will include detailed steps on running the NextJS application and interacting with the interface.* 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib64/ 18 | parts/ 19 | sdist/ 20 | var/ 21 | wheels/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | MANIFEST 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *.cover 46 | .hypothesis/ 47 | .pytest_cache/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | db.sqlite3 57 | 58 | # Flask stuff: 59 | instance/ 60 | .webassets-cache 61 | 62 | # Scrapy stuff: 63 | .scrapy 64 | 65 | # Sphinx documentation 66 | docs/_build/ 67 | 68 | # PyBuilder 69 | target/ 70 | 71 | # Jupyter Notebook 72 | .ipynb_checkpoints 73 | 74 | # pyenv 75 | .python-version 76 | 77 | # celery beat schedule file 78 | celerybeat-schedule 79 | 80 | # SageMath parsed files 81 | *.sage.py 82 | 83 | # Environments 84 | .env 85 | .venv 86 | env/ 87 | venv/ 88 | ENV/ 89 | env.bak/ 90 | venv.bak/ 91 | 92 | # Spyder project settings 93 | .spyderproject 94 | .spyproject 95 | 96 | # Rope project settings 97 | .ropeproject 98 | 99 | # mkdocs documentation 100 | /site 101 | 102 | # mypy 103 | .mypy_cache/ 104 | 105 | .next 106 | .vercel 107 | node_modules -------------------------------------------------------------------------------- /data-pipeline/weaviate-embeddings-and-friendliai/2-populate.py: -------------------------------------------------------------------------------- 1 | import os 2 | import csv 3 | import weaviate 4 | from weaviate.classes.init import Auth 5 | 6 | 7 | WEAVIATE_CLUSTER_URL = os.getenv('WEAVIATE_CLUSTER_URL') or 'https://zxzyqcyksbw7ozpm5yowa.c0.us-west2.gcp.weaviate.cloud' 8 | WEAVIATE_API_KEY = os.getenv('WEAVIATE_API_KEY') or 'n6mdfI32xrXF3DH76i8Pwc2IajzLZop2igb6' 9 | 10 | client = weaviate.connect_to_weaviate_cloud( 11 | cluster_url=WEAVIATE_CLUSTER_URL, 12 | auth_credentials=Auth.api_key(WEAVIATE_API_KEY), 13 | ) 14 | 15 | book_collection = client.collections.get(name="WeaviateEmbeddingBooks") 16 | 17 | f = open("../7k-books-kaggle.csv", "r") 18 | current_book = None 19 | try: 20 | reader = csv.reader(f) 21 | # Iterate through each row of data 22 | for book in reader: 23 | current_book = book 24 | # 0 - isbn13 25 | # 1 - isbn10 26 | # 2 - title 27 | # 3 - subtitle 28 | # 4 - authors 29 | # 5 - categories 30 | # 6 - thumbnail 31 | # 7 - description 32 | # 8 - published_year 33 | # 9 - average_rating 34 | # 10 - num_pages 35 | # 11 - ratings_count 36 | 37 | properties = { 38 | "isbn13": book[0], 39 | "isbn10": book[1], 40 | "title": book[2], 41 | "subtitle": book[3], 42 | "authors": book[4], 43 | "categories": book[5], 44 | "thumbnail": book[6], 45 | "description": book[7], 46 | "published_year": book[8], 47 | "average_rating": book[9], 48 | "num_pages": book[10], 49 | "ratings_count": book[11], 50 | } 51 | 52 | uuid = book_collection.data.insert(properties) 53 | 54 | print(f"{book[2]}: {uuid}", end='\n') 55 | except Exception as e: 56 | print(f"Exception: {e}.") 57 | 58 | f.close() 59 | client.close() -------------------------------------------------------------------------------- /data-pipeline/openai/2-populate.py: -------------------------------------------------------------------------------- 1 | import os 2 | import csv 3 | import weaviate 4 | import weaviate.classes as wvc 5 | 6 | WEAVIATE_CLUSTER_URL = os.getenv('WEAVIATE_CLUSTER_URL') or 'https://zxzyqcyksbw7ozpm5yowa.c0.us-west2.gcp.weaviate.cloud' 7 | WEAVIATE_API_KEY = os.getenv('WEAVIATE_API_KEY') or 'n6mdfI32xrXF3DH76i8Pwc2IajzLZop2igb6' 8 | OPENAI_API_KEY = os.getenv('OPENAI_API_KEY') 9 | 10 | client = weaviate.Client( 11 | url=WEAVIATE_CLUSTER_URL, 12 | auth_client_secret=weaviate.AuthApiKey(api_key=WEAVIATE_API_KEY), 13 | additional_headers={"X-OpenAI-Api-Key": OPENAI_API_KEY}) 14 | 15 | book_collection = client.collections.get(name="Book") 16 | 17 | f = open("./data-pipeline/7k-books-kaggle.csv", "r") 18 | current_book = None 19 | try: 20 | reader = csv.reader(f) 21 | # Iterate through each row of data 22 | for book in reader: 23 | current_book = book 24 | # 0 - isbn13 25 | # 1 - isbn10 26 | # 2 - title 27 | # 3 - subtitle 28 | # 4 - authors 29 | # 5 - categories 30 | # 6 - thumbnail 31 | # 7 - description 32 | # 8 - published_year 33 | # 9 - average_rating 34 | # 10 - num_pages 35 | # 11 - ratings_count 36 | 37 | properties = { 38 | "isbn13": book[0], 39 | "isbn10": book[1], 40 | "title": book[2], 41 | "subtitle": book[3], 42 | "authors": book[4], 43 | "categories": book[5], 44 | "thumbnail": book[6], 45 | "description": book[7], 46 | "published_year": book[8], 47 | "average_rating": book[9], 48 | "num_pages": book[10], 49 | "ratings_count": book[11], 50 | } 51 | 52 | uuid = book_collection.data.insert(properties) 53 | 54 | print(f"{book[2]}: {uuid}", end='\n') 55 | except Exception as e: 56 | print(f"Exception: {e}.") 57 | 58 | f.close() 59 | client.close() -------------------------------------------------------------------------------- /data-pipeline/openai/1-create_collection.py: -------------------------------------------------------------------------------- 1 | import os 2 | import weaviate 3 | import weaviate.classes as wvc 4 | import weaviate.classes.config as wc 5 | 6 | WEAVIATE_CLUSTER_URL = os.getenv('WEAVIATE_CLUSTER_URL') or 'https://zxzyqcyksbw7ozpm5yowa.c0.us-west2.gcp.weaviate.cloud' 7 | WEAVIATE_API_KEY = os.getenv('WEAVIATE_API_KEY') or 'n6mdfI32xrXF3DH76i8Pwc2IajzLZop2igb6' 8 | OPENAI_API_KEY = os.getenv('OPENAI_API_KEY') 9 | 10 | client = weaviate.Client( 11 | url=WEAVIATE_CLUSTER_URL, 12 | auth_client_secret=weaviate.AuthApiKey(api_key=WEAVIATE_API_KEY), 13 | additional_headers={"X-OpenAI-Api-Key": OPENAI_API_KEY}) 14 | 15 | client.collections.delete(name="Book") 16 | print(client.is_connected()) 17 | 18 | questions = client.collections.create( 19 | name="Book", 20 | 21 | vectorizer_config=wvc.config.Configure.Vectorizer.text2vec_openai(model="text-embedding-3-small"), 22 | generative_config=wvc.config.Configure.Generative.openai(model="gpt-3.5-turbo"), 23 | properties=[ 24 | wc.Property(name="title", data_type=wc.DataType.TEXT), 25 | wc.Property(name="isbn10", data_type=wc.DataType.TEXT, skip_vectorization=True), 26 | wc.Property(name="isbn13", data_type=wc.DataType.TEXT, skip_vectorization=True), 27 | wc.Property(name="categories", data_type=wc.DataType.TEXT), 28 | wc.Property(name="thumbnail", data_type=wc.DataType.TEXT, skip_vectorization=True), 29 | wc.Property(name="description", data_type=wc.DataType.TEXT), 30 | wc.Property(name="num_pages", data_type=wc.DataType.TEXT, skip_vectorization=True), 31 | wc.Property(name="average_rating", data_type=wc.DataType.TEXT, skip_vectorization=True), 32 | wc.Property(name="published_year", data_type=wc.DataType.TEXT, skip_vectorization=True), 33 | wc.Property(name="authors", data_type=wc.DataType.TEXT, skip_vectorization=True), 34 | ], 35 | ) 36 | 37 | client.close() -------------------------------------------------------------------------------- /styles/globals.css: -------------------------------------------------------------------------------- 1 | @tailwind base; 2 | @tailwind components; 3 | @tailwind utilities; 4 | 5 | @layer base { 6 | :root { 7 | --background: 0 0% 100%; 8 | --foreground: 222.2 84% 4.9%; 9 | 10 | --card: 0 0% 100%; 11 | --card-foreground: 222.2 84% 4.9%; 12 | 13 | --popover: 0 0% 100%; 14 | --popover-foreground: 222.2 84% 4.9%; 15 | 16 | --primary: 222.2 47.4% 11.2%; 17 | --primary-foreground: 210 40% 98%; 18 | 19 | --secondary: 210 40% 96.1%; 20 | --secondary-foreground: 222.2 47.4% 11.2%; 21 | 22 | --muted: 210 40% 96.1%; 23 | --muted-foreground: 215.4 16.3% 46.9%; 24 | 25 | --accent: 210 40% 96.1%; 26 | --accent-foreground: 222.2 47.4% 11.2%; 27 | 28 | --destructive: 0 84.2% 60.2%; 29 | --destructive-foreground: 210 40% 98%; 30 | 31 | --border: 214.3 31.8% 91.4%; 32 | --input: 214.3 31.8% 91.4%; 33 | --ring: 222.2 84% 4.9%; 34 | 35 | --radius: 0.5rem; 36 | } 37 | 38 | .dark { 39 | --background: 222.2 84% 4.9%; 40 | --foreground: 210 40% 98%; 41 | 42 | --card: 222.2 84% 4.9%; 43 | --card-foreground: 210 40% 98%; 44 | 45 | --popover: 222.2 84% 4.9%; 46 | --popover-foreground: 210 40% 98%; 47 | 48 | --primary: 210 40% 98%; 49 | --primary-foreground: 222.2 47.4% 11.2%; 50 | 51 | --secondary: 217.2 32.6% 17.5%; 52 | --secondary-foreground: 210 40% 98%; 53 | 54 | --muted: 217.2 32.6% 17.5%; 55 | --muted-foreground: 215 20.2% 65.1%; 56 | 57 | --accent: 217.2 32.6% 17.5%; 58 | --accent-foreground: 210 40% 98%; 59 | 60 | --destructive: 0 62.8% 30.6%; 61 | --destructive-foreground: 210 40% 98%; 62 | 63 | --border: 217.2 32.6% 17.5%; 64 | --input: 217.2 32.6% 17.5%; 65 | --ring: 212.7 26.8% 83.9%; 66 | } 67 | } 68 | 69 | @layer base { 70 | * { 71 | @apply border-border; 72 | } 73 | body { 74 | @apply bg-background text-foreground; 75 | } 76 | } -------------------------------------------------------------------------------- /data-pipeline/weaviate-embeddings-and-friendliai/1-create_collection.py: -------------------------------------------------------------------------------- 1 | import os 2 | import weaviate 3 | from weaviate.classes.init import Auth 4 | import weaviate.classes as wvc 5 | import weaviate.classes.config as wc 6 | 7 | WEAVIATE_CLUSTER_URL = os.getenv('WEAVIATE_CLUSTER_URL') or 'REPLACE_WITH_YOUR_CLUSTER_URL_IF_NOT_USING_ENVVAR' 8 | WEAVIATE_API_KEY = os.getenv('WEAVIATE_API_KEY') or 'REPLACE_WITH_YOUR_KEY_IF_NOT_USING_ENVVAR' 9 | 10 | 11 | client = weaviate.connect_to_weaviate_cloud( 12 | cluster_url=WEAVIATE_CLUSTER_URL, 13 | auth_credentials=Auth.api_key(WEAVIATE_API_KEY), 14 | ) 15 | 16 | client.collections.delete(name="WeaviateEmbeddingBooks") 17 | print(client.is_connected()) 18 | 19 | questions = client.collections.create( 20 | name="WeaviateEmbeddingBooks", 21 | 22 | vectorizer_config=wvc.config.Configure.Vectorizer.text2vec_weaviate(model="Snowflake/snowflake-arctic-embed-l-v2.0"), 23 | generative_config=wvc.config.Configure.Generative.friendliai(model="meta-llama-3.3-70b-instruct"), 24 | properties=[ 25 | wc.Property(name="title", data_type=wc.DataType.TEXT), 26 | wc.Property(name="isbn10", data_type=wc.DataType.TEXT, skip_vectorization=True), 27 | wc.Property(name="isbn13", data_type=wc.DataType.TEXT, skip_vectorization=True), 28 | wc.Property(name="categories", data_type=wc.DataType.TEXT), 29 | wc.Property(name="thumbnail", data_type=wc.DataType.TEXT, skip_vectorization=True), 30 | wc.Property(name="description", data_type=wc.DataType.TEXT), 31 | wc.Property(name="num_pages", data_type=wc.DataType.TEXT, skip_vectorization=True), 32 | wc.Property(name="average_rating", data_type=wc.DataType.TEXT, skip_vectorization=True), 33 | wc.Property(name="published_year", data_type=wc.DataType.TEXT, skip_vectorization=True), 34 | wc.Property(name="authors", data_type=wc.DataType.TEXT, skip_vectorization=True), 35 | ], 36 | ) 37 | 38 | client.close() -------------------------------------------------------------------------------- /.tutorial/03-loading-data.md: -------------------------------------------------------------------------------- 1 | ## Populating the Vector Database 2 | 3 | ### Dataset 4 | We will be using a dataset from Kaggle, which includes about 7000 books with details like ISBN number, book description, book cover link, and more. You can find more information about the dataset on Kaggle (link in the description below). 5 | 6 | In our repository, there is a folder called `data-pipeline` that contains two scripts and a dataset. The data is stored in a file called “7k-books-Kaggle.csv”. Feel free to look through it for more details. 7 | 8 | ### Scripts 9 | One of the scripts, `populate.py`, is responsible for creating vectors and storing them in Weaviate. Let's look at `populate.py` in greater detail: 10 | 11 | 1. We create a weaviate-client object that receives the OpenAI API Key and the Weaviate key. 12 | 2. If the script was run before, we delete any pre-existing schema called 'Book' to keep the database “fresh” (Note: This is not recommended in production). 13 | 3. We create a schema for our books. The schema contains configuration details for vectorizing the data. We use the “text2vec-openai” vectorizer, and in the module configuration, we use `ada-002` as the foundation model to generate our embeddings. 14 | 4. We iterate through each row in our CSV dataset to create vectors for each book. The vectors and their related data objects are then stored in Weaviate. 15 | 16 | ### Running the Script 17 | Now that you understand how it works, let’s run the script. First, we need to install some dependencies. 18 | 19 | 1. Set up a python virtual environment for the dependencies. 20 | 2. Run `pip install weaviate-client` in the terminal to install the official Weaviate python client. 21 | 3. Run `python data-pipeline/populate.py`. 22 | 23 | When it finishes, you'll have data in Weaviate, and we can begin doing some fun and interesting searches on it. 24 | 25 | --- 26 | *Note: The next section of the tutorial will include detailed steps on doing searches on the data.* 27 | 28 | -------------------------------------------------------------------------------- /components/ui/button.tsx: -------------------------------------------------------------------------------- 1 | import * as React from "react" 2 | import { Slot } from "@radix-ui/react-slot" 3 | import { cva, type VariantProps } from "class-variance-authority" 4 | 5 | import { cn } from "@/lib/utils" 6 | 7 | const buttonVariants = cva( 8 | "inline-flex items-center justify-center rounded-md text-sm font-medium ring-offset-background transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:pointer-events-none disabled:opacity-50", 9 | { 10 | variants: { 11 | variant: { 12 | default: "bg-primary text-primary-foreground hover:bg-primary/90", 13 | destructive: 14 | "bg-destructive text-destructive-foreground hover:bg-destructive/90", 15 | outline: 16 | "border border-input bg-background hover:bg-accent hover:text-accent-foreground", 17 | secondary: 18 | "bg-secondary text-secondary-foreground hover:bg-secondary/80", 19 | ghost: "hover:bg-accent hover:text-accent-foreground", 20 | link: "text-primary underline-offset-4 hover:underline", 21 | }, 22 | size: { 23 | default: "h-10 px-4 py-2", 24 | sm: "h-9 rounded-md px-3", 25 | lg: "h-11 rounded-md px-8", 26 | icon: "h-10 w-10", 27 | }, 28 | }, 29 | defaultVariants: { 30 | variant: "default", 31 | size: "default", 32 | }, 33 | } 34 | ) 35 | 36 | export interface ButtonProps 37 | extends React.ButtonHTMLAttributes, 38 | VariantProps { 39 | asChild?: boolean 40 | } 41 | 42 | const Button = React.forwardRef( 43 | ({ className, variant, size, asChild = false, ...props }, ref) => { 44 | const Comp = asChild ? Slot : "button" 45 | return ( 46 | 51 | ) 52 | } 53 | ) 54 | Button.displayName = "Button" 55 | 56 | export { Button, buttonVariants } 57 | -------------------------------------------------------------------------------- /data-pipeline/openai/README.md: -------------------------------------------------------------------------------- 1 | # Book Recommendations 2 | 3 | ## Weaviate Cloud (WCD) and OpenAI 4 | 5 | The files contained in ./data-pipeline/openai demonstrate how to use a cluster in Weaviate Cloud (WCD) with OpenAI. 6 | 7 | First, you'll need to create a cluster in WCD at https://console.weaviate.cloud/. There are free sandbox tiers that allow you to create a cluster entirely for free. 8 | 9 | ## Environment Setup 10 | 11 | 1. You'll need to create an .env file with the following API keys: 12 | * WEAVIATE_CLUSTER_URL 13 | * WEAVIATE_API_KEY 14 | * OPENAI_API_KEY 15 | 16 | 2. Create a python virtual environment and install dependencies in requirements.txt at the root of this project 17 | ``` 18 | python3 -m venv venv 19 | source venv/bin/activate 20 | pip install -r requirements.txt 21 | ``` 22 | 23 | Then you can begin executing each step to understand how the BookRecs NextJS project interacts with Weaviate. 24 | 25 | ## Steps of Execution 26 | 27 | ```bash 28 | python 1-create_collection.py 29 | ``` 30 | 31 | * This file will create a client in your Weaviate Cloud instance that configures OpenAI for embeddings and inference. 32 | 33 | ```bash 34 | python 2-populate.py 35 | ``` 36 | 37 | * This script will populate your Weaviate Cloud data with data from the kaggle dataset referenced at the root directory of this project. 38 | 39 | ```bash 40 | python 3-semantic_search.py 41 | ``` 42 | 43 | * Once executed, the script will make a connection with your Weaviate Cluster. Once complete, you'll be prompted for user input regarding a query you'd like to apply in the semantic search. The script then uses the Weaviate Client to do the semantic search against the dataset. In doing so, Weaviate generates a vector embedding using the configured vectorizer, in this case OpenAI - `text-embedding-3-small`, and then uses the query embedding to find related vectors. 44 | 45 | ```bash 46 | python 4-generative_search.py 47 | ``` 48 | 49 | * Similar to 3-semantic_search.py, Weaviate applies an additional step where it passes the results over to the configured generative search module to do an inference based on the prompt in `4-generative_search.py`. -------------------------------------------------------------------------------- /components/ui/card.tsx: -------------------------------------------------------------------------------- 1 | import * as React from "react" 2 | 3 | import { cn } from "@/lib/utils" 4 | 5 | const Card = React.forwardRef< 6 | HTMLDivElement, 7 | React.HTMLAttributes 8 | >(({ className, ...props }, ref) => ( 9 |
17 | )) 18 | Card.displayName = "Card" 19 | 20 | const CardHeader = React.forwardRef< 21 | HTMLDivElement, 22 | React.HTMLAttributes 23 | >(({ className, ...props }, ref) => ( 24 |
29 | )) 30 | CardHeader.displayName = "CardHeader" 31 | 32 | const CardTitle = React.forwardRef< 33 | HTMLParagraphElement, 34 | React.HTMLAttributes 35 | >(({ className, ...props }, ref) => ( 36 |

44 | )) 45 | CardTitle.displayName = "CardTitle" 46 | 47 | const CardDescription = React.forwardRef< 48 | HTMLParagraphElement, 49 | React.HTMLAttributes 50 | >(({ className, ...props }, ref) => ( 51 |

56 | )) 57 | CardDescription.displayName = "CardDescription" 58 | 59 | const CardContent = React.forwardRef< 60 | HTMLDivElement, 61 | React.HTMLAttributes 62 | >(({ className, ...props }, ref) => ( 63 |

64 | )) 65 | CardContent.displayName = "CardContent" 66 | 67 | const CardFooter = React.forwardRef< 68 | HTMLDivElement, 69 | React.HTMLAttributes 70 | >(({ className, ...props }, ref) => ( 71 |
76 | )) 77 | CardFooter.displayName = "CardFooter" 78 | 79 | export { Card, CardHeader, CardFooter, CardTitle, CardDescription, CardContent } 80 | -------------------------------------------------------------------------------- /styles/Home.module.css: -------------------------------------------------------------------------------- 1 | .container { 2 | padding: 0 2rem; 3 | } 4 | 5 | .main { 6 | min-height: 100vh; 7 | padding: 4rem 0; 8 | flex: 1; 9 | display: flex; 10 | flex-direction: column; 11 | justify-content: center; 12 | align-items: center; 13 | } 14 | 15 | .footer { 16 | display: flex; 17 | flex: 1; 18 | padding: 2rem 0; 19 | border-top: 1px solid #eaeaea; 20 | justify-content: center; 21 | align-items: center; 22 | } 23 | 24 | .footer a { 25 | display: flex; 26 | justify-content: center; 27 | align-items: center; 28 | flex-grow: 1; 29 | } 30 | 31 | .title a { 32 | color: #0070f3; 33 | text-decoration: none; 34 | } 35 | 36 | .title a:hover, 37 | .title a:focus, 38 | .title a:active { 39 | text-decoration: underline; 40 | } 41 | 42 | .title { 43 | margin: 0; 44 | line-height: 1.15; 45 | font-size: 4rem; 46 | } 47 | 48 | .title, 49 | .description { 50 | text-align: center; 51 | } 52 | 53 | .description { 54 | margin: 4rem 0; 55 | line-height: 1.5; 56 | font-size: 1.5rem; 57 | } 58 | 59 | .code { 60 | background: #fafafa; 61 | border-radius: 5px; 62 | padding: 0.75rem; 63 | font-size: 1.1rem; 64 | font-family: Menlo, Monaco, Lucida Console, Liberation Mono, DejaVu Sans Mono, 65 | Bitstream Vera Sans Mono, Courier New, monospace; 66 | } 67 | 68 | .grid { 69 | display: flex; 70 | align-items: center; 71 | justify-content: center; 72 | flex-wrap: wrap; 73 | max-width: 800px; 74 | } 75 | 76 | .card { 77 | margin: 1rem; 78 | padding: 1.5rem; 79 | text-align: left; 80 | color: inherit; 81 | text-decoration: none; 82 | border: 1px solid #eaeaea; 83 | border-radius: 10px; 84 | transition: color 0.15s ease, border-color 0.15s ease; 85 | max-width: 300px; 86 | } 87 | 88 | .card:hover, 89 | .card:focus, 90 | .card:active { 91 | color: #0070f3; 92 | border-color: #0070f3; 93 | } 94 | 95 | .card h2 { 96 | margin: 0 0 1rem 0; 97 | font-size: 1.5rem; 98 | } 99 | 100 | .card p { 101 | margin: 0; 102 | font-size: 1.25rem; 103 | line-height: 1.5; 104 | } 105 | 106 | .logo { 107 | height: 1em; 108 | margin-left: 0.2rem; 109 | } 110 | 111 | @media (max-width: 600px) { 112 | .grid { 113 | width: 100%; 114 | flex-direction: column; 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /tailwind.config.js: -------------------------------------------------------------------------------- 1 | /** @type {import('tailwindcss').Config} */ 2 | module.exports = { 3 | darkMode: ["class"], 4 | content: [ 5 | './pages/**/*.{ts,tsx}', 6 | './components/**/*.{ts,tsx}', 7 | './app/**/*.{ts,tsx}', 8 | './src/**/*.{ts,tsx}', 9 | ], 10 | theme: { 11 | container: { 12 | center: true, 13 | padding: "2rem", 14 | screens: { 15 | "2xl": "1400px", 16 | }, 17 | }, 18 | extend: { 19 | colors: { 20 | border: "hsl(var(--border))", 21 | input: "hsl(var(--input))", 22 | ring: "hsl(var(--ring))", 23 | background: "hsl(var(--background))", 24 | foreground: "hsl(var(--foreground))", 25 | primary: { 26 | DEFAULT: "hsl(var(--primary))", 27 | foreground: "hsl(var(--primary-foreground))", 28 | }, 29 | secondary: { 30 | DEFAULT: "hsl(var(--secondary))", 31 | foreground: "hsl(var(--secondary-foreground))", 32 | }, 33 | destructive: { 34 | DEFAULT: "hsl(var(--destructive))", 35 | foreground: "hsl(var(--destructive-foreground))", 36 | }, 37 | muted: { 38 | DEFAULT: "hsl(var(--muted))", 39 | foreground: "hsl(var(--muted-foreground))", 40 | }, 41 | accent: { 42 | DEFAULT: "hsl(var(--accent))", 43 | foreground: "hsl(var(--accent-foreground))", 44 | }, 45 | popover: { 46 | DEFAULT: "hsl(var(--popover))", 47 | foreground: "hsl(var(--popover-foreground))", 48 | }, 49 | card: { 50 | DEFAULT: "hsl(var(--card))", 51 | foreground: "hsl(var(--card-foreground))", 52 | }, 53 | }, 54 | borderRadius: { 55 | lg: "var(--radius)", 56 | md: "calc(var(--radius) - 2px)", 57 | sm: "calc(var(--radius) - 4px)", 58 | }, 59 | keyframes: { 60 | "accordion-down": { 61 | from: { height: 0 }, 62 | to: { height: "var(--radix-accordion-content-height)" }, 63 | }, 64 | "accordion-up": { 65 | from: { height: "var(--radix-accordion-content-height)" }, 66 | to: { height: 0 }, 67 | }, 68 | }, 69 | animation: { 70 | "accordion-down": "accordion-down 0.2s ease-out", 71 | "accordion-up": "accordion-up 0.2s ease-out", 72 | }, 73 | }, 74 | }, 75 | plugins: [require("tailwindcss-animate")], 76 | } -------------------------------------------------------------------------------- /blog-resources/Search-With-Benefits.swiftpm/.swiftpm/playgrounds/CachedManifest.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | CachedManifest 6 | 7 | manifestData 8 | 9 | eyJkZXBlbmRlbmNpZXMiOltdLCJkaXNwbGF5TmFtZSI6IlNlYXJjaCB3aXRo 10 | IEJlbmVmaXRzIiwicGFja2FnZUtpbmQiOnsicm9vdCI6e319LCJwbGF0Zm9y 11 | bXMiOlt7Im9wdGlvbnMiOltdLCJwbGF0Zm9ybU5hbWUiOiJpb3MiLCJ2ZXJz 12 | aW9uIjoiMTcuNCJ9XSwicHJvZHVjdHMiOlt7Im5hbWUiOiJTZWFyY2ggd2l0 13 | aCBCZW5lZml0cyIsInNldHRpbmdzIjpbeyJkaXNwbGF5VmVyc2lvbiI6WyIx 14 | LjAiXX0seyJidW5kbGVWZXJzaW9uIjpbIjEiXX0seyJpT1NBcHBJbmZvIjpb 15 | eyJhY2NlbnRDb2xvciI6eyJwcmVzZXRDb2xvciI6eyJwcmVzZXRDb2xvciI6 16 | eyJyYXdWYWx1ZSI6ImN5YW4ifX19LCJhcHBJY29uIjp7ImFzc2V0Ijp7Im5h 17 | bWUiOiJBcHBJY29uIn19LCJjYXBhYmlsaXRpZXMiOlt7InB1cnBvc2UiOiJp 18 | bmNvbWluZ05ldHdvcmtDb25uZWN0aW9ucyJ9LHsicHVycG9zZSI6Im91dGdv 19 | aW5nTmV0d29ya0Nvbm5lY3Rpb25zIn1dLCJzdXBwb3J0ZWREZXZpY2VGYW1p 20 | bGllcyI6WyJwYWQiLCJwaG9uZSJdLCJzdXBwb3J0ZWRJbnRlcmZhY2VPcmll 21 | bnRhdGlvbnMiOlt7InBvcnRyYWl0Ijp7fX0seyJsYW5kc2NhcGVSaWdodCI6 22 | e319LHsibGFuZHNjYXBlTGVmdCI6e319LHsicG9ydHJhaXRVcHNpZGVEb3du 23 | Ijp7ImNvbmRpdGlvbiI6eyJkZXZpY2VGYW1pbGllcyI6WyJwYWQiXX19fV19 24 | XX1dLCJ0YXJnZXRzIjpbIkFwcE1vZHVsZSJdLCJ0eXBlIjp7ImV4ZWN1dGFi 25 | bGUiOm51bGx9fV0sInRhcmdldE1hcCI6eyJBcHBNb2R1bGUiOnsiZGVwZW5k 26 | ZW5jaWVzIjpbXSwiZXhjbHVkZSI6W10sIm5hbWUiOiJBcHBNb2R1bGUiLCJw 27 | YWNrYWdlQWNjZXNzIjp0cnVlLCJwYXRoIjoiLiIsInJlc291cmNlcyI6W10s 28 | InNldHRpbmdzIjpbeyJraW5kIjp7ImVuYWJsZVVwY29taW5nRmVhdHVyZSI6 29 | eyJfMCI6IkJhcmVTbGFzaFJlZ2V4TGl0ZXJhbHMifX0sInRvb2wiOiJzd2lm 30 | dCJ9XSwidHlwZSI6ImV4ZWN1dGFibGUifX0sInRhcmdldHMiOlt7ImRlcGVu 31 | ZGVuY2llcyI6W10sImV4Y2x1ZGUiOltdLCJuYW1lIjoiQXBwTW9kdWxlIiwi 32 | cGFja2FnZUFjY2VzcyI6dHJ1ZSwicGF0aCI6Ii4iLCJyZXNvdXJjZXMiOltd 33 | LCJzZXR0aW5ncyI6W3sia2luZCI6eyJlbmFibGVVcGNvbWluZ0ZlYXR1cmUi 34 | OnsiXzAiOiJCYXJlU2xhc2hSZWdleExpdGVyYWxzIn19LCJ0b29sIjoic3dp 35 | ZnQifV0sInR5cGUiOiJleGVjdXRhYmxlIn1dLCJ0b29sc1ZlcnNpb24iOnsi 36 | X3ZlcnNpb24iOiI1LjkuMCJ9fQ== 37 | 38 | manifestHash 39 | 40 | hIbPIDjF7brbVAxSESQAGb8fgjmm284rVuFFTJN6IM8= 41 | 42 | schemaVersion 43 | 4 44 | swiftPMVersionString 45 | 5.10.0-dev 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /.replit: -------------------------------------------------------------------------------- 1 | entrypoint = "index.js" 2 | 3 | hidden = [".config", ".next", ".swc"] 4 | modules = ["python-3.10:v18-20230807-322e88b"] 5 | 6 | run = "npm run dev" 7 | 8 | [[hints]] 9 | regex = "Error \\[ERR_REQUIRE_ESM\\]" 10 | message = "We see that you are using require(...) inside your code. We currently do not support this syntax. Please use 'import' instead when using external modules. (https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/import)" 11 | 12 | [nix] 13 | channel = "stable-22_11" 14 | 15 | [env] 16 | XDG_CONFIG_HOME = "$REPL_HOME/.config" 17 | PATH = "$REPL_HOME/.config/npm/node_global/bin:$REPL_HOME/node_modules/.bin" 18 | npm_config_prefix = "$REPL_HOME/.config/npm/node_global" 19 | NEXT_TELEMETRY_DISABLED = "1" 20 | 21 | [packager] 22 | language = "nodejs" 23 | 24 | [packager.features] 25 | packageSearch = true 26 | guessImports = true 27 | enabledForHosting = false 28 | 29 | [unitTest] 30 | language = "nodejs" 31 | 32 | [languages.javascript] 33 | pattern = "**/{*.js,*.jsx,*.ts,*.tsx}" 34 | 35 | [languages.javascript.languageServer] 36 | start = [ "typescript-language-server", "--stdio" ] 37 | 38 | [debugger] 39 | support = true 40 | 41 | [debugger.interactive] 42 | transport = "localhost:0" 43 | startCommand = [ "dap-node" ] 44 | 45 | [debugger.interactive.initializeMessage] 46 | command = "initialize" 47 | type = "request" 48 | 49 | [debugger.interactive.initializeMessage.arguments] 50 | clientID = "replit" 51 | clientName = "replit.com" 52 | columnsStartAt1 = true 53 | linesStartAt1 = true 54 | locale = "en-us" 55 | pathFormat = "path" 56 | supportsInvalidatedEvent = true 57 | supportsProgressReporting = true 58 | supportsRunInTerminalRequest = true 59 | supportsVariablePaging = true 60 | supportsVariableType = true 61 | 62 | [debugger.interactive.launchMessage] 63 | command = "launch" 64 | type = "request" 65 | 66 | [debugger.interactive.launchMessage.arguments] 67 | args = [] 68 | console = "externalTerminal" 69 | cwd = "." 70 | environment = [] 71 | pauseForSourceMap = false 72 | program = "./index.js" 73 | request = "launch" 74 | sourceMaps = true 75 | stopOnEntry = false 76 | type = "pwa-node" 77 | 78 | [deployment] 79 | build = ["sh", "-c", "npm run build"] 80 | run = ["sh", "-c", "npm run start"] 81 | 82 | [[ports]] 83 | localPort = 3000 84 | externalPort = 80 -------------------------------------------------------------------------------- /blog-resources/Search-With-Benefits.swiftpm/ContentView.swift: -------------------------------------------------------------------------------- 1 | // 2 | // ContentView.swift 3 | // SearchWithBenefits 4 | // 5 | // Created by Randy Fong on 4/24/24. 6 | // 7 | 8 | import SwiftUI 9 | 10 | struct ContentView: View { 11 | @StateObject var searchWithBenefitsViewModel = SearchWithBenefitsViewModel() 12 | @State var prompt: String = "" 13 | var body: some View { 14 | VStack { 15 | Text(searchWithBenefitsViewModel.message ?? "").padding() 16 | HStack { 17 | TextField( 18 | "Enter Prompt", 19 | text: $prompt, axis: .vertical).textFieldStyle(.roundedBorder) 20 | Button { 21 | Task { 22 | await searchWithBenefitsViewModel.getBooks(matching: prompt) 23 | } 24 | } label: { 25 | Image(systemName: "magnifyingglass") 26 | } 27 | Button { 28 | prompt = " " 29 | } label: { 30 | Image(systemName: "clear") 31 | } 32 | } 33 | .padding() 34 | List(searchWithBenefitsViewModel.books, id: \.title) { book in 35 | Text(book.title) 36 | Text(book.description).font(.caption) 37 | } 38 | } 39 | } 40 | } 41 | 42 | /* 43 | 44 | Suggested Prompts to Tryout 45 | 46 | 1. Find similar books to Pride and Prejudice by Jane Austen. 47 | 2. Find science fiction books similar to Dune by Frank Herbert. 48 | 3. Find fantasy novels with a coming-of-age theme. 49 | 4. Recommend dramas similar to Wizard of Oz. 50 | 5. Search for mysteries set in Western Europe. 51 | 6. Discover historical books similar to The Audacity of Hope: Thoughts on Reclaiming the American Dream. 52 | 7. Personalize search results based on a college aged woman from Boston majoring in fine arts who enjoys vacations in France. 53 | 8. Find books with a similar writing style to a JR Tolkien. 54 | 9. Search for light-hearted comedies similar to a Tom Hanks movie like Forrest Gump. 55 | 10. Recommend books with a similar emotional impact to Danielle Steel's The Butler. 56 | 11. Assist book clubs by finding thematically linked books to The Anxious Generation: How the Great Rewiring of Childhood Is Causing an Epidemic of Mental Illness. 57 | 12. Personalize children's book recommendations based on a kindergarten reading level. 58 | 59 | */ 60 | -------------------------------------------------------------------------------- /pages/api/recommendations.ts: -------------------------------------------------------------------------------- 1 | // Next.js API route support: https://nextjs.org/docs/api-routes/introduction 2 | import { NearTextType } from 'types'; 3 | import type { NextApiRequest, NextApiResponse } from 'next'; 4 | import weaviate, { WeaviateClient, ApiKey } from 'weaviate-ts-client'; 5 | 6 | 7 | export default async function handler( 8 | req: NextApiRequest, 9 | res: NextApiResponse 10 | ) { 11 | try { 12 | const { method } = req; 13 | let { query, userInterests } = req.body; 14 | 15 | const weaviateClusterUrl = process.env.WEAVIATE_CLUSTER_URL?.replace("https://", "") 16 | 17 | switch (method) { 18 | 19 | case 'POST': { 20 | 21 | let headers: { [key: string]: string } = {}; 22 | 23 | if (process.env.OPENAI_API_KEY) { 24 | headers['X-OpenAI-Api-Key'] = process.env.OPENAI_API_KEY; 25 | } 26 | 27 | if (process.env.COHERE_API_KEY) { 28 | headers['X-Cohere-Api-Key'] = process.env.COHERE_API_KEY; 29 | } 30 | 31 | const client: WeaviateClient = weaviate.client({ 32 | scheme: 'https', 33 | host: weaviateClusterUrl || 'zxzyqcyksbw7ozpm5yowa.c0.us-west2.gcp.weaviate.cloud', 34 | apiKey: new ApiKey(process.env.WEAVIATE_API_KEY || 'n6mdfI32xrXF3DH76i8Pwc2IajzLZop2igb6'), //READONLY API Key, ensure the environment variable is an Admin key to support writing 35 | headers: headers, 36 | }); 37 | 38 | let nearText: NearTextType = { 39 | concepts: [], 40 | } 41 | 42 | nearText.certainty = .6 43 | 44 | nearText.concepts = query; 45 | 46 | let generatePrompt = "Briefly describe why this book might be interesting to someone who has interests or hobbies in " + userInterests + ". the book's title is {title}, with a description: {description}, and is in the genre: {categories}. Don't make up anything that wasn't given in this prompt and don't ask how you can help."; 47 | 48 | let recDataBuilder = client.graphql 49 | .get() 50 | .withClassName('Book') 51 | .withFields( 52 | 'title isbn10 isbn13 categories thumbnail description num_pages average_rating published_year authors' 53 | ) 54 | .withNearText(nearText) 55 | .withLimit(20); 56 | 57 | if (headers['X-Cohere-Api-Key']) { 58 | recDataBuilder = recDataBuilder.withGenerate({ 59 | singlePrompt: generatePrompt, 60 | }); 61 | } 62 | 63 | const recData = await recDataBuilder.do(); 64 | 65 | res.status(200).json(recData); 66 | break; 67 | } 68 | default: 69 | res.status(400); 70 | break; 71 | } 72 | } catch (err) { 73 | console.error(err); 74 | res.status(500); 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /data-pipeline/ollama/README.md: -------------------------------------------------------------------------------- 1 | # Book Recommendations 2 | 3 | ## Weaviate Local and Ollama 4 | 5 | The files contained in `./data-pipeline/ollama` demonstrate how to run Weaviate locally using Docker and configuring Ollama to create vector embeddings and inference. 6 | 7 | ## Environment Setup 8 | 9 | First, you'll need to run Weaviate locally through docker and configure the text2vec_ollama and generative_ollama modules for Weaviate. You can use the docker-compose.yml in this folder. 10 | 11 | ```bash 12 | docker-compose -f ./data-pipeline/ollama/docker-compose.yml up -d 13 | ``` 14 | 15 | **NOTE**: The docker compose file above only has Ollama modules enabled. If you want to use another vectorizer configuration, you'll need to extend the docker-compose.yml configuration. Details can be found in our [developer docs for using Weaviate with Docker](https://weaviate.io/developers/weaviate/installation/docker-compose). 16 | 17 | You'll also need to install ollama locally and pull two models. Go to [Ollama](https://ollama.com) and download ollama for your operating system. Then pull two models - `llama3:latest` for inference, `snowflake-arctic-embed:latest` for embeddings. 18 | 19 | ``` 20 | ollama pull snowflake-arctic-embed:latest 21 | ollama pull llama3:latest 22 | ``` 23 | 24 | Create a python virtual environment and install dependencies in requirements.txt at the root of this project 25 | ``` 26 | python3 -m venv venv 27 | source venv/bin/activate 28 | pip install -r requirements.txt 29 | ``` 30 | 31 | Then you can begin executing each step to understand how the BookRecs NextJS project interacts with Weaviate. 32 | 33 | ## Steps of Execution 34 | 35 | ```bash 36 | python 1-create_collection.py 37 | ``` 38 | 39 | * This file will create a client in your locally running Weaviate instance that configures Ollama for embeddings and inference. 40 | 41 | ```bash 42 | python 2-populate.py 43 | ``` 44 | 45 | * This script will populate your Weaviate Cloud data with data from the kaggle dataset referenced at the root directory of this project. 46 | 47 | ```bash 48 | python 3-semantic_search.py 49 | ``` 50 | 51 | * Once executed, the script will make a connection with your Weaviate Cluster. Once complete, you'll be prompted for user input regarding a query you'd like to apply in the semantic search. The script then uses the Weaviate Client to do the semantic search against the dataset. In doing so, Weaviate generates a vector embedding using the configured vectorizer, in this case Ollama - `snowflake-arctic-embed:latest`, and then uses the query embedding to find related vectors. 52 | 53 | ```bash 54 | python 4-generative_search.py 55 | ``` 56 | 57 | * Similar to 3-semantic_search.py, Weaviate applies an additional step where it passes the results over to the configured generative search module to do an inference based on the prompt in `4-generative_search.py`. -------------------------------------------------------------------------------- /.tutorial/02-accounts-and-environment.md: -------------------------------------------------------------------------------- 1 | ## Accounts & Environments 2 | 3 | ### Register on Weaviate Cloud (WCD) 4 | The first step is to register an account on Weaviate Cloud (WCD). If you already have an account, you can skip ahead to creating a dedicated cluster. 5 | 6 | 1. Open your browser and go to [Weaviate Cloud](https://console.weaviate.cloud/) 7 | 2. Register for an account. (Pause here and sign up for an account if you haven't already) 8 | 9 | ### Create a New Cluster 10 | Once you have an account, log in and create a new cluster. 11 | 12 | 1. In the WCD console, click "Create cluster". 13 | 2. You can use a “Free sandbox”, or select Standard, Enterprise, or Business Critical for long-term use. 14 | 3. Give your cluster a memorable name. 15 | 4. Ensure “Enable Authentication” is set to “Yes”. 16 | 5. Review the details and click “Create”. 17 | 18 | WCD will deploy your cluster and provide an endpoint where you can begin storing your embeddings. 19 | 20 | ### Note Cluster URL and API Key 21 | While still in WCD, take note of the Cluster URL and the API Key for your Weaviate instance. 22 | 23 | 1. Click “Details” on the newly created cluster to expand the view. 24 | 2. Copy the “Cluster URL” and save it in a text file for later reference. 25 | 3. Go back to WCD, click API Keys, and copy the Admin key. Save this in the text file as well. 26 | 27 | ### Get OpenAI API Key 28 | You will also need an API key from OpenAI to generate embeddings. If you already have a token, you can use it for this project. 29 | 30 | 1. Register for an account on [OpenAI](https://openai.com). 31 | 2. Once logged in, select API. 32 | 3. Go to your avatar at the top right, select “View API Keys”, and create a new secret key. 33 | 4. Give it a name of your choice and create it. 34 | 5. Copy the key and save it in the text file for later reference. 35 | 36 | ### Set Up Your Environment 37 | Next, set up your environment and insert the values into your environment variables. 38 | 39 | - If you are building locally in VSCode, drop the values into a `.env` file and use the following environment variable names as shown on the screen. Source the `.env` file every time you start a new terminal session. 40 | - If you are building in Replit, use the Secrets tool on the bottom left of the screen and insert your environment variables there. You can add the environment variables by editing the JSON and inserting the snippet below, and adding each one manually. 41 | 42 | 43 | ``` 44 | { 45 | "WEAVIATE_API_KEY": "INSERT_API_KEY", 46 | "WEAVIATE_CLUSTER_URL": "INSERT_CLUSTER_URL", 47 | "OPENAI_APIKEY": "INSERT_API_KEY" 48 | } 49 | ``` 50 | 51 | ### Next Steps 52 | Great! Our environment is set up. Next, we will populate the vector database with our objects and their related embeddings. 53 | 54 | --- 55 | *Note: The next section of the tutorial will include detailed steps on populating the vector database.* 56 | -------------------------------------------------------------------------------- /blog-resources/Search-With-Benefits.swiftpm/SearchWithBenefitsViewModel.swift: -------------------------------------------------------------------------------- 1 | // 2 | // SearchWithBenefitsViewModel.swift 3 | // SearchWithBenefits 4 | // 5 | // Created by Randy Fong on 4/24/24. 6 | // 7 | 8 | import Foundation 9 | 10 | struct BookResponse: Decodable { 11 | let data: DataResponse 12 | 13 | struct DataResponse: Decodable { 14 | let Get: BookGetResponse 15 | 16 | struct BookGetResponse: Decodable { 17 | let Book: [Book] 18 | } 19 | } 20 | } 21 | 22 | struct Book: Decodable { 23 | let title: String 24 | let description: String 25 | } 26 | 27 | class SearchWithBenefitsViewModel: ObservableObject { 28 | @Published var books: [Book] = [] 29 | @Published var message: String? = "" 30 | 31 | func getBooks(matching prompt: String) async { 32 | 33 | // URL and API Keys 34 | let urlString = "https://zxzyqcyksbw7ozpm5yowa.c0.us-west2.gcp.weaviate.cloud/v1/graphql" 35 | let weaviateApiKey = "n6mdfI32xrXF3DH76i8Pwc2IajzLZop2igb6" 36 | let openAiApiKey = "<< Enter your OpenAI Key here >>" 37 | 38 | let urlRequest: URLRequest = { 39 | 40 | // URL 41 | let url = URL(string: urlString)! 42 | 43 | // Prompt 44 | let concepts = [prompt] 45 | let queryString = formatQuery(prompt: concepts) 46 | let queryData = queryString.data(using: .utf8) 47 | 48 | // URLRequest 49 | var urlRequest = URLRequest(url: url) 50 | urlRequest.httpMethod = "POST" 51 | urlRequest.setValue("application/json", forHTTPHeaderField: "Content-Type") 52 | urlRequest.setValue("Bearer \(weaviateApiKey)", forHTTPHeaderField: "Authorization") 53 | urlRequest.setValue(openAiApiKey, forHTTPHeaderField: "X-OpenAI-Api-Key") 54 | urlRequest.httpBody = queryData 55 | 56 | // Create URL Session 57 | return urlRequest 58 | }() 59 | 60 | do { 61 | // Loading Display 62 | await MainActor.run { 63 | books.removeAll() 64 | message = "Loading...." 65 | } 66 | 67 | // Get Books and Decode 68 | let (data, _) = try await URLSession.shared.data(for: urlRequest) 69 | let bookResponse: BookResponse = try JSONDecoder().decode(BookResponse.self, from: data) 70 | 71 | let booksFound = bookResponse.data.Get.Book 72 | if booksFound.count == 0 { 73 | // No Books Found Display 74 | message = "Unable to find related books" 75 | } else { 76 | // Books Found Display 77 | await MainActor.run { 78 | message = nil 79 | books = booksFound 80 | } 81 | // print("** Books") 82 | // debugPrint(books) 83 | } 84 | } catch(let error) { 85 | // Error Display 86 | await MainActor.run { 87 | message = "Search Error" 88 | } 89 | print("** Error") 90 | print(error) 91 | } 92 | } 93 | } 94 | 95 | // Format Weaviate Query 96 | private func formatQuery(prompt concepts: [String], limit: Int = 10) -> String { 97 | let conceptsString = concepts.joined(separator: ", ") 98 | let formatString = """ 99 | {"query": "{Get {Book(limit: %@ nearText: {concepts: [\\"%@\\"]}) {title description _additional {certainty distance}}}}"} 100 | """ 101 | return String(format: formatString, String(limit), conceptsString) 102 | } 103 | -------------------------------------------------------------------------------- /components/ui/dialog.tsx: -------------------------------------------------------------------------------- 1 | import * as React from "react" 2 | import * as DialogPrimitive from "@radix-ui/react-dialog" 3 | import { X } from "lucide-react" 4 | 5 | import { cn } from "@/lib/utils" 6 | 7 | const Dialog = DialogPrimitive.Root 8 | 9 | const DialogTrigger = DialogPrimitive.Trigger 10 | 11 | const DialogPortal = ({ 12 | className, 13 | ...props 14 | }: DialogPrimitive.DialogPortalProps) => ( 15 | 16 | ) 17 | DialogPortal.displayName = DialogPrimitive.Portal.displayName 18 | 19 | const DialogOverlay = React.forwardRef< 20 | React.ElementRef, 21 | React.ComponentPropsWithoutRef 22 | >(({ className, ...props }, ref) => ( 23 | 31 | )) 32 | DialogOverlay.displayName = DialogPrimitive.Overlay.displayName 33 | 34 | const DialogContent = React.forwardRef< 35 | React.ElementRef, 36 | React.ComponentPropsWithoutRef 37 | >(({ className, children, ...props }, ref) => ( 38 | 39 | 40 | 48 | {children} 49 | 50 | 51 | Close 52 | 53 | 54 | 55 | )) 56 | DialogContent.displayName = DialogPrimitive.Content.displayName 57 | 58 | const DialogHeader = ({ 59 | className, 60 | ...props 61 | }: React.HTMLAttributes) => ( 62 |
69 | ) 70 | DialogHeader.displayName = "DialogHeader" 71 | 72 | const DialogFooter = ({ 73 | className, 74 | ...props 75 | }: React.HTMLAttributes) => ( 76 |
83 | ) 84 | DialogFooter.displayName = "DialogFooter" 85 | 86 | const DialogTitle = React.forwardRef< 87 | React.ElementRef, 88 | React.ComponentPropsWithoutRef 89 | >(({ className, ...props }, ref) => ( 90 | 98 | )) 99 | DialogTitle.displayName = DialogPrimitive.Title.displayName 100 | 101 | const DialogDescription = React.forwardRef< 102 | React.ElementRef, 103 | React.ComponentPropsWithoutRef 104 | >(({ className, ...props }, ref) => ( 105 | 110 | )) 111 | DialogDescription.displayName = DialogPrimitive.Description.displayName 112 | 113 | export { 114 | Dialog, 115 | DialogTrigger, 116 | DialogContent, 117 | DialogHeader, 118 | DialogFooter, 119 | DialogTitle, 120 | DialogDescription, 121 | } 122 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | aiohttp==3.8.5 2 | aiosignal==1.3.1 3 | alembic==1.13.1 4 | annotated-types==0.6.0 5 | antlr4-python3-runtime==4.9.3 6 | anyio==3.7.1 7 | appnope==0.1.3 8 | argon2-cffi==21.3.0 9 | argon2-cffi-bindings==21.2.0 10 | arrow==1.2.3 11 | asgiref==3.7.2 12 | asttokens==2.2.1 13 | async-lru==2.0.4 14 | async-timeout==4.0.2 15 | attrs==23.1.0 16 | Authlib==1.2.1 17 | Babel==2.12.1 18 | backcall==0.2.0 19 | backoff==2.2.1 20 | beautifulsoup4==4.12.2 21 | bleach==6.0.0 22 | blis==0.7.10 23 | Brotli==1.1.0 24 | catalogue==2.0.9 25 | certifi==2023.7.22 26 | cffi==1.15.1 27 | chardet==5.2.0 28 | charset-normalizer==3.2.0 29 | click==8.1.6 30 | coloredlogs==15.0.1 31 | colorlog==6.8.2 32 | comm==0.1.4 33 | confection==0.1.1 34 | contourpy==1.1.0 35 | cryptography==41.0.3 36 | cycler==0.11.0 37 | cymem==2.0.7 38 | datasets==2.14.7 39 | debugpy==1.6.7 40 | decorator==5.1.1 41 | defusedxml==0.7.1 42 | dill==0.3.7 43 | distro==1.9.0 44 | Django==4.2.7 45 | django-cors-headers==4.3.1 46 | django-phonenumber-field==7.3.0 47 | django-phonenumbers==1.0.1 48 | djangorestframework==3.14.0 49 | djangorestframework-simplejwt==5.3.1 50 | dspy-ai==2.4.5 51 | effdet==0.4.1 52 | exceptiongroup==1.1.2 53 | executing==1.2.0 54 | fastai==2.7.12 55 | fastbook==0.0.29 56 | fastcore==1.5.29 57 | fastdownload==0.0.7 58 | fastjsonschema==2.18.0 59 | fastprogress==1.0.3 60 | filelock==3.12.2 61 | filetype==1.2.0 62 | flatbuffers==23.5.26 63 | fonttools==4.42.0 64 | fqdn==1.5.1 65 | frozenlist==1.4.0 66 | fsspec==2023.6.0 67 | graphviz==0.20.1 68 | grpcio==1.62.2 69 | grpcio-health-checking==1.62.2 70 | grpcio-tools==1.62.2 71 | h11==0.14.0 72 | httpcore==1.0.5 73 | httpx==0.27.0 74 | huggingface-hub==0.16.4 75 | humanfriendly==10.0 76 | idna==3.4 77 | importlib-metadata==6.8.0 78 | importlib-resources==6.0.0 79 | iopath==0.1.10 80 | ipykernel==6.25.0 81 | ipython==8.14.0 82 | ipython-genutils==0.2.0 83 | ipywidgets==7.8.0 84 | isoduration==20.11.0 85 | jedi==0.19.0 86 | Jinja2==3.1.2 87 | joblib==1.3.2 88 | json5==0.9.14 89 | jsonpointer==2.4 90 | jsonschema==4.18.6 91 | jsonschema-specifications==2023.7.1 92 | jupyter-events==0.7.0 93 | jupyter-lsp==2.2.0 94 | jupyter_client==8.3.0 95 | jupyter_core==5.3.1 96 | jupyter_server==2.7.0 97 | jupyter_server_terminals==0.4.4 98 | jupyterlab==4.0.4 99 | jupyterlab-pygments==0.2.2 100 | jupyterlab-widgets==1.1.5 101 | jupyterlab_server==2.24.0 102 | kiwisolver==1.4.4 103 | langcodes==3.3.0 104 | layoutparser==0.3.4 105 | lxml==4.9.3 106 | Mako==1.3.3 107 | MarkupSafe==2.1.3 108 | matplotlib==3.7.2 109 | matplotlib-inline==0.1.6 110 | minio==7.2.7 111 | mistune==3.0.1 112 | mpmath==1.3.0 113 | multidict==6.0.4 114 | multiprocess==0.70.15 115 | murmurhash==1.0.9 116 | mutagen==1.47.0 117 | nbclient==0.8.0 118 | nbconvert==7.7.3 119 | nbformat==5.9.2 120 | nest-asyncio==1.5.7 121 | networkx==3.1 122 | nltk==3.8.1 123 | notebook==7.0.2 124 | notebook_shim==0.2.3 125 | numpy==1.25.2 126 | ollama==0.1.9 127 | omegaconf==2.3.0 128 | onnxruntime==1.15.1 129 | openai==1.23.2 130 | opencv-python==4.8.0.76 131 | optuna==3.6.1 132 | overrides==7.3.1 133 | packaging==23.1 134 | pandas==2.0.3 135 | pandoc==2.3 136 | pandocfilters==1.5.0 137 | parso==0.8.3 138 | pathy==0.10.2 139 | pdf2image==1.16.3 140 | pdfminer.six==20221105 141 | pdfplumber==0.10.2 142 | pexpect==4.8.0 143 | phonenumbers==8.13.34 144 | pickleshare==0.7.5 145 | Pillow==9.5.0 146 | platformdirs==3.10.0 147 | plumbum==1.8.2 148 | ply==3.11 149 | portalocker==2.7.0 150 | preshed==3.0.8 151 | prometheus-client==0.17.1 152 | prompt-toolkit==3.0.39 153 | protobuf==4.24.0 154 | psutil==5.9.5 155 | ptyprocess==0.7.0 156 | pure-eval==0.2.2 157 | pyarrow==12.0.1 158 | pyarrow-hotfix==0.6 159 | pycocotools==2.0.6 160 | pycparser==2.21 161 | pycryptodome==3.20.0 162 | pycryptodomex==3.20.0 163 | pydantic==2.5.0 164 | pydantic_core==2.14.1 165 | Pygments==2.15.1 166 | PyJWT==2.8.0 167 | pypandoc==1.11 168 | pyparsing==3.0.9 169 | pypdfium2==4.18.0 170 | pytesseract==0.3.10 171 | python-dateutil==2.8.2 172 | python-docx==0.8.11 173 | python-json-logger==2.0.7 174 | python-magic==0.4.27 175 | python-multipart==0.0.6 176 | python-pptx==0.6.21 177 | pytz==2023.3 178 | PyYAML==6.0.1 179 | pyzmq==25.1.0 180 | referencing==0.30.1 181 | regex==2023.6.3 182 | requests==2.31.0 183 | rfc3339-validator==0.1.4 184 | rfc3986-validator==0.1.1 185 | rpds-py==0.9.2 186 | safetensors==0.3.1 187 | scikit-learn==1.3.0 188 | scipy==1.11.1 189 | Send2Trash==1.8.2 190 | sentencepiece==0.1.99 191 | six==1.16.0 192 | smart-open==6.3.0 193 | sniffio==1.3.0 194 | soupsieve==2.4.1 195 | spacy==3.6.0 196 | spacy-legacy==3.0.12 197 | spacy-loggers==1.0.4 198 | SQLAlchemy==2.0.29 199 | sqlparse==0.4.4 200 | srsly==2.4.7 201 | stack-data==0.6.2 202 | sympy==1.12 203 | tabulate==0.9.0 204 | terminado==0.17.1 205 | thinc==8.1.10 206 | threadpoolctl==3.2.0 207 | timm==0.9.5 208 | tinycss2==1.2.1 209 | tokenizers==0.13.3 210 | tomli==2.0.1 211 | torch==2.0.1 212 | torchvision==0.15.2 213 | tornado==6.3.2 214 | tqdm==4.65.0 215 | traitlets==5.9.0 216 | transformers==4.31.0 217 | typer==0.9.0 218 | typing_extensions==4.7.1 219 | tzdata==2023.3 220 | ujson==5.9.0 221 | unstructured==0.9.1 222 | unstructured-inference==0.5.7 223 | uri-template==1.3.0 224 | urllib3==2.2.1 225 | validators==0.28.1 226 | wasabi==1.1.2 227 | wcwidth==0.2.6 228 | weaviate-client==4.6.3 229 | webcolors==1.13 230 | webencodings==0.5.1 231 | websocket-client==1.6.1 232 | websockets==12.0 233 | widgetsnbextension==3.6.5 234 | XlsxWriter==3.1.2 235 | xmltodict==0.13.0 236 | xxhash==3.3.0 237 | yarl==1.9.2 238 | yt-dlp==2023.12.30 239 | zipp==3.16.2 240 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Book Recommendation System (BookRecs) 2 | [![Weaviate](https://img.shields.io/static/v1?label=powered%20by&message=Weaviate%20%E2%9D%A4&color=green&style=flat-square)](https://weaviate.io/) 3 | [![Demo](https://img.shields.io/badge/Check%20out%20the%20demo!-yellow?&style=flat-square&logo=react&logoColor=white)](https://bookrecs.weaviate.io/) 4 | 5 | 6 | This project is a book recommendation service that suggests books based on a user's inputted genre and book titles. It's built upon a database of 7000 books retrieved from Kaggle. Using Ada v2 as the large language model, vector embeddings were created with the Kaggle dataset to allow for quick vector search to find semantically similar books through natural language input. The frontend is built using Next.js and styled with TailwindCSS. 7 | 8 | ![Project Screenshot](/BookRecs.gif) 9 | 10 | ## 📑 Table of Contents 11 | 12 | - [Features](#features) 13 | - [Installation](#installation) 14 | - [Usage](#usage) 15 | - [Data Source](#data-source) 16 | - [Tech Stack](#tech-stack) 17 | - [Contributing](#contributing) 18 | - [License](#license) 19 | 20 | ## 💫 Features 21 | 22 | - Input genre and book titles to get book recommendations 23 | - Vector Search on Weaviate Vector database of 7000 books 24 | - Jupyter Notebook workflow to access and store vector embeddings in Weaviate 25 | - Responsive design, thanks to TailwindCSS 26 | - Uses Ollama or OpenAI for vector generation and inference. 27 | 28 | ## 🛠 Installation 29 | 30 | To run the project locally, follow these steps: 31 | 32 | Clone the repository 33 | ``` 34 | git clone https://github.com/weaviate/BookRecs.git 35 | ``` 36 | 37 | ### The Data Pipeline 38 | 39 | The data pipeline shows you how to connect with Weaviate, generate embeddings using integrated modules with Weaviate through OpenAI or Ollama, and then query them using semantic search and vector search. Choose between one of the following options. 40 | 41 | #### Data Pipeline Using Ollama (Option1) 42 | 43 | If you're using Ollama with this project, follow the instructions found in the [ollama/README.md](./data-pipeline/ollama/README.md) to set up Ollama and Weaviate running locally. 44 | 45 | #### Data Pipeline Using OpenAI (Option2) 46 | 47 | If you're using OpenAI with this project, make sure to create a Weaviate Cloud cluster in WCD and get an API key from OpenAI. There are instructions to get an API key from the official [OpenAI Docs](https://platform.openai.com/docs/api-reference/introduction). You'll also need to fund the account. 48 | 49 | Once you have the above dependencies sorted out, you can follow the instructions in the [openai/README.md](./data-pipeline/openai/README.md) 50 | 51 | ### The Web Application 52 | 53 | Once you've set up Weaviate and understand how the data pipeline works you can move over to the BookRecs web application written in NextJS. 54 | 55 | **Note**: The web application is configured only to use OpenAI and WCD as an introduction on how to leverage Weaviate. It can be modified to use Ollama and a locally running Weaviate instance, but this project won't do that ouf of the box. 56 | 57 | Additionally, this project has access to an existing WCD Cluster with an API Key configured to only allow READing from the public WCD cluster. 58 | 59 | Install dependencies 60 | ``` 61 | cd bookrecs 62 | npm install 63 | ``` 64 | Run the app 65 | ``` 66 | npm run dev 67 | ``` 68 | Try out BookRecs in a browser at http://localhost:3000 69 | 70 | 71 | ## 🤝 Configuring Cohere Integration 72 | 73 | This project provides book recommendations using a vector database for semantic search. An additional feature is the integration with Cohere through the Weaviate Generative Search module, which provides explainations as to why a user might like a particular book recommendation. 74 | 75 | If you would like to enable this feature, you will need to configure the COHERE_API_KEY and NEXT_PUBLIC_COHERE_CONFIGURED environment variables. 76 | 77 | Steps 78 | 1. Obtain a Cohere API key by signing up on the [Cohere website](https://cohere.com). 79 | 2. Once you have your API key, open the .env file in the root directory of the project. 80 | 3. Add the following line to the file, replacing 'INSERT_OPEN_API_KEY_HERE' with the API key you obtained from Cohere: 81 | ``` 82 | COHERE_API_KEY=INSERT_OPENAPI_KEY_HERE 83 | ``` 84 | 4. To enable the Cohere integration, set the NEXT_PUBLIC_COHERE_CONFIGURED environment variable to "1". Add the following line to the .env file: 85 | ``` 86 | NEXT_PUBLIC_COHERE_CONFIGURED=1 87 | ``` 88 | 5. Save the .env file and restart your development server. The Cohere integration should now be enabled. 89 | 90 | Please note that the COHERE_API_KEY should be kept secret and not exposed to the client-side of your application. 91 | 92 | 93 | ## 🧰 Usage 94 | 95 | To use the service, simply type in a genre and several book titles in the provided input fields. The system will then generate several book recommendations based on your inputs. 96 | 97 | You can try this at https://bookrecs.weaviate.io 98 | 99 | You must set at least on OPENAI_API_KEY environment variable. You can also set up your own Weaviate cluster and create embeddings yourself. If you choose not to do this, BookRecs will use a Read Only API key for an existing Weaviate cluster containing the Kaggle dataset. 100 | 101 | 102 | ## 💾 Data Source 103 | 104 | The book data used for this project is sourced from the following Kaggle dataset: [7k books with metadata](https://www.kaggle.com/datasets/dylanjcastillo/7k-books-with-metadata). The dataset has been converted to a vector embedding using the sentence-transformer model for improved natural language processing and stored in a Weaviate clustor for fast vector lookups. 105 | 106 | ## 💻 Tech Stack 107 | 108 | - [NodeJS version 18.12.1 or above](https://nodejs.org/) 109 | - [Next.js](https://nextjs.org/) 110 | - [TailwindCSS](https://tailwindcss.com/) 111 | - [Python Data Pipeline](https://python.org/) 112 | - [Weaviate >1.25](https://weaviate.io/) 113 | 114 | ## 🕷 Known Issues 115 | 116 | - Some book images are inaccessible due to dead links on the original data set 117 | 118 | ## 💰 Large Language Model (LLM) Costs with OpenAI 119 | 120 | BookRecs utilizes OpenAI or Ollama models. For OpenAI -- be advised that the usage costs for these models will be billed to the API access key you provide. Primarily, costs are incurred during data embedding and answer generation processes. The default vectorization engine for this project is `text-embedding-3-small`. 121 | 122 | ## 💖 Open Source Contribution 123 | 124 | Your contributions are always welcome! Feel free to contribute ideas, feedback, or create issues and bug reports if you find any! Visit our [Weaviate Community Forum](https://forum.weaviate.io/) if you need any help! 125 | 126 | -------------------------------------------------------------------------------- /pages/index.tsx: -------------------------------------------------------------------------------- 1 | import { SyntheticEvent, useState } from 'react'; 2 | import CircleLoader from 'react-spinners/CircleLoader'; 3 | import Modal from 'react-modal'; 4 | import { Book } from 'types'; 5 | import { Input } from "@/components/ui/input"; 6 | import { Button } from "@/components/ui/button"; 7 | import { 8 | Popover, 9 | PopoverContent, 10 | PopoverTrigger, 11 | } from "@/components/ui/popover" 12 | 13 | const customStyles = { 14 | content: { 15 | top: '50%', 16 | left: '50%', 17 | right: 'auto', 18 | bottom: 'auto', 19 | marginRight: '-50%', 20 | width: '90%', 21 | height: '80%', 22 | transform: 'translate(-50%, -50%)', 23 | borderRadius: '5px', 24 | }, 25 | }; 26 | export default function Home() { 27 | const [isLoading, setIsLoading] = useState(false); 28 | const [loadedOnce, setLoadedOnce] = useState(false); 29 | const [query, setQuery] = useState(''); 30 | const [userInterests, setUserInterests] = useState(''); 31 | const [recommendedBooks, setRecommendedBooks] = useState([]); 32 | const [modalIsOpen, setIsOpen] = useState(false); 33 | const [selectedBook, setSelectedbook] = useState(undefined); 34 | 35 | const openModal = (book_title: string) => { 36 | const bookSelection = recommendedBooks.filter((book: Book) => { 37 | return book.title === book_title; 38 | }); 39 | console.log(bookSelection); 40 | setSelectedbook(bookSelection[0]); 41 | setIsOpen(true); 42 | }; 43 | 44 | const closeModal = () => { 45 | setIsOpen(false); 46 | }; 47 | 48 | const getRecommendations = async (e: SyntheticEvent) => { 49 | e.preventDefault(); 50 | 51 | // Check Inputs 52 | if (query === '') { 53 | alert("Please let us know what you'd like to learn!"); 54 | return; 55 | } 56 | 57 | setIsLoading(true); 58 | 59 | await fetch('/api/recommendations', { 60 | method: 'POST', 61 | headers: { 62 | 'Content-Type': 'application/json', 63 | }, 64 | body: JSON.stringify({ 65 | query, 66 | userInterests, 67 | }) 68 | }) 69 | .then((res) => { 70 | console.log(res) 71 | if (res.ok) return res.json(); 72 | }) 73 | .then((recommendations) => { 74 | console.log(recommendations.data.Get.Book); 75 | setRecommendedBooks(recommendations.data.Get.Book); 76 | }); 77 | 78 | setIsLoading(false); 79 | setLoadedOnce(true); 80 | }; 81 | 82 | return ( 83 |
84 | 85 | 91 |
92 |

93 | {selectedBook?.title} 94 |

95 | 101 |
102 |
103 |
104 |
105 | {"Thumbnail 110 |
111 |
112 |
113 |

Authors:{' '}{selectedBook?.authors}

114 |

115 | Genre:{' '}{selectedBook?.categories} 116 |

117 |

118 | Rating:{' '}{selectedBook?.average_rating} 119 |

120 |

121 | Publication Year:{' '}{selectedBook?.published_year} 122 |


123 |

{selectedBook?.description}

124 | 125 |
126 | 131 | 135 | 136 |
137 |
138 | 139 |
140 |
141 |
142 |
143 |

144 | Book Recommendations 145 |

146 | 147 |
152 |
153 | 159 | { 167 | setQuery(e.target.value); 168 | }} 169 | /> 170 | {process.env.NEXT_PUBLIC_COHERE_CONFIGURED && ( 171 | <> 172 | 178 | { 186 | setUserInterests(e.target.value); 187 | }} 188 | /> 189 | 190 | )} 191 | 192 |
193 | 196 | 197 |
198 | 199 | {isLoading ? ( 200 |
201 | 208 |
209 | ) : ( 210 | <> 211 | {loadedOnce ? ( 212 | <> 213 |

214 | Recommended Books 215 |

216 | 266 | 267 | ) : ( 268 |
269 | )} 270 | 271 | 272 | )} 273 |
274 | 275 | 276 |
277 | 278 | 284 |
285 | ); 286 | } 287 | --------------------------------------------------------------------------------