├── .env.example ├── .gitignore ├── README.md ├── ch1 ├── js │ ├── a-llm.js │ ├── b-chat.js │ ├── c-system.js │ ├── d-prompt.js │ ├── e-prompt-model.js │ ├── f-chat-prompt.js │ ├── g-chat-prompt-model.js │ ├── h-structured.js │ ├── i-csv.js │ ├── j-methods.js │ ├── k-imperative.js │ ├── ka-stream.js │ └── l-declarative.js └── py │ ├── a-llm.py │ ├── b-chat.py │ ├── c-system.py │ ├── d-prompt.py │ ├── e-prompt-model.py │ ├── f-chat-prompt.py │ ├── g-chat-prompt-model.py │ ├── h-structured.py │ ├── i-csv.py │ ├── j-methods.py │ ├── k-imperative.py │ ├── ka-stream.py │ ├── kb-async.py │ └── l-declarative.py ├── ch10 ├── js │ ├── agent-evaluation-rag.js │ ├── agent-evaluation-sql.js │ ├── agent-sql-graph.js │ ├── create-rag-dataset.js │ ├── create-sql-dataset.js │ ├── rag-graph.js │ ├── retrieve-and-grade.js │ └── search-graph.js └── py │ ├── agent_evaluation_rag.py │ ├── agent_evaluation_sql.py │ ├── agent_sql_graph.py │ ├── create_rag_dataset.py │ ├── create_sql_dataset.py │ ├── rag_graph.py │ ├── retrieve_and_grade.py │ └── search_graph.py ├── ch2 ├── js │ ├── a-text-loader.js │ ├── b-web-loader.js │ ├── c-pdf-loader.js │ ├── d-rec-text-splitter.js │ ├── e-rec-text-splitter-code.js │ ├── f-markdown-splitter.js │ ├── g-embeddings.js │ ├── h-load-split-embed.js │ ├── i-pg-vector.js │ ├── j-record-manager.js │ └── k-multi-vector-retriever.js └── py │ ├── a-text-loader.py │ ├── b-web-loader.py │ ├── c-pdf-loader.py │ ├── d-rec-text-splitter.py │ ├── e-rec-text-splitter-code.py │ ├── f-markdown-splitter.py │ ├── g-embeddings.py │ ├── h-load-split-embed.py │ ├── i-pg-vector.py │ ├── j-record-manager.py │ ├── k-multi-vector-retriever.py │ └── l-rag-colbert.py ├── ch3 ├── js │ ├── a-basic-rag.js │ ├── b-rewrite.js │ ├── c-multi-query.js │ ├── d-rag-fusion.js │ ├── e-hyde.js │ ├── f-router.js │ ├── g-semantic-router.js │ ├── h-self-query.js │ └── i-sql-example.js └── py │ ├── a-basic-rag.py │ ├── b-rewrite.py │ ├── c-multi-query.py │ ├── d-rag-fusion.py │ ├── e-hyde.py │ ├── f-router.py │ ├── g-semantic-router.py │ ├── h-self-query.py │ └── i-sql-example.py ├── ch4 ├── js │ ├── a-simple-memory.js │ ├── b-state-graph.js │ ├── c-persistent-memory.js │ ├── d-trim-messages.js │ ├── e-filter-messages.js │ └── f-merge-messages.js └── py │ ├── a-simple-memory.py │ ├── b-state-graph.py │ ├── c-persistent-memory.py │ ├── d-trim-messages.py │ ├── e-filter-messages.py │ └── f-merge-messages.py ├── ch5 ├── js │ ├── a-chatbot.js │ ├── b-sql-generator.js │ └── c-multi-rag.js └── py │ ├── a-chatbot.py │ ├── b-sql-generator.py │ └── c-multi-rag.py ├── ch6 ├── js │ ├── a-basic-agent.js │ ├── b-force-first-tool.js │ └── c-many-tools.js └── py │ ├── a-basic-agent.py │ ├── b-force-first-tool.py │ └── c-many-tools.py ├── ch7 ├── js │ ├── a-reflection.js │ ├── b-subgraph-direct.js │ ├── c-subgraph-function.js │ └── d-supervisor.js └── py │ ├── a-reflection.py │ ├── b-subgraph-direct.py │ ├── c-subgraph-function.py │ └── d-supervisor.py ├── ch8 ├── js │ ├── a-structured-output.js │ ├── b-streaming-output.js │ ├── c-interrupt.js │ ├── d-authorize.js │ ├── e-resume.js │ ├── f-edit-state.js │ └── g-fork.js └── py │ ├── a-structured-output.py │ ├── b-streaming-output.py │ ├── c-interrupt.py │ ├── d-authorize.py │ ├── e-resume.py │ ├── f-edit-state.py │ └── g-fork.py ├── ch9 ├── README.md ├── js │ ├── .gitignore │ ├── demo.ts │ ├── langgraph.json │ ├── package.json │ ├── src │ │ ├── ingestion_graph │ │ │ ├── configuration.ts │ │ │ ├── graph.ts │ │ │ └── state.ts │ │ ├── retrieval_graph │ │ │ ├── configuration.ts │ │ │ ├── graph.ts │ │ │ ├── state.ts │ │ │ └── utils.ts │ │ └── shared │ │ │ ├── configuration.ts │ │ │ ├── retrieval.ts │ │ │ ├── state.ts │ │ │ └── utils.ts │ └── tsconfig.json └── py │ ├── demo.py │ ├── langgraph.json │ ├── pyproject.toml │ └── src │ ├── docSplits.json │ ├── ingestion_graph │ ├── __init__.py │ ├── configuration.py │ ├── graph.py │ └── state.py │ ├── retrieval_graph │ ├── __init__.py │ ├── configuration.py │ ├── graph.py │ ├── state.py │ └── utils.py │ └── shared │ ├── __init__.py │ ├── configuration.py │ ├── retrieval.py │ └── state.py ├── package-lock.json ├── package.json ├── pyproject.toml ├── test.pdf └── test.txt /.env.example: -------------------------------------------------------------------------------- 1 | OPENAI_API_KEY= 2 | 3 | LANGCHAIN_API_KEY= 4 | 5 | LANGCHAIN_TRACING_V2=true 6 | 7 | LANGCHAIN_PROJECT="learning-langchain" 8 | 9 | ## Supabase keys are only used in Chapter 9 deployment examples 10 | SUPABASE_URL= 11 | 12 | SUPABASE_SERVICE_ROLE_KEY= -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # ===== Python ===== 2 | 3 | # Python 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | *.so 8 | .Python 9 | build/ 10 | develop-eggs/ 11 | dist/ 12 | downloads/ 13 | eggs/ 14 | .eggs/ 15 | lib/ 16 | lib64/ 17 | parts/ 18 | sdist/ 19 | var/ 20 | wheels/ 21 | *.egg-info/ 22 | .installed.cfg 23 | *.egg 24 | 25 | # Virtual Environment 26 | venv/ 27 | env/ 28 | ENV/ 29 | .env 30 | .venv/ 31 | 32 | # IDEs and Editors 33 | .idea/ 34 | .vscode/ 35 | *.swp 36 | *.swo 37 | *~ 38 | 39 | # OS generated files 40 | .DS_Store 41 | .DS_Store? 42 | ._* 43 | .Spotlight-V100 44 | .Trashes 45 | ehthumbs.db 46 | Thumbs.db 47 | 48 | # Logs and databases 49 | *.log 50 | *.sqlite 51 | *.db 52 | 53 | # Coverage and testing 54 | .coverage 55 | htmlcov/ 56 | .tox/ 57 | .pytest_cache/ 58 | coverage.xml 59 | *.cover 60 | 61 | # Documentation 62 | docs/_build/ 63 | 64 | 65 | # ===== JavaScript ===== 66 | 67 | # Node modules 68 | node_modules/ 69 | 70 | # Optional npm cache 71 | .npm 72 | 73 | # Optional eslint cache 74 | .eslintcache 75 | 76 | # Optional REPL history 77 | .node_repl_history 78 | 79 | # Output of 'npm pack' 80 | *.tgz 81 | 82 | # Yarn 83 | .yarn/ 84 | .yarn-integrity 85 | 86 | # dotenv environment variables 87 | .env 88 | .env.test 89 | .env.production 90 | 91 | # Build output 92 | dist/ 93 | build/ 94 | 95 | # Logs 96 | logs/ 97 | 98 | # VS Code 99 | .vscode/ 100 | 101 | # IntelliJ/WebStorm 102 | .idea/ 103 | 104 | # Jest coverage 105 | coverage/ 106 | -------------------------------------------------------------------------------- /ch1/js/a-llm.js: -------------------------------------------------------------------------------- 1 | import { ChatOpenAI } from '@langchain/openai'; 2 | 3 | const model = new ChatOpenAI({ model: 'gpt-3.5-turbo' }); 4 | 5 | const response = await model.invoke('The sky is'); 6 | console.log(response); 7 | -------------------------------------------------------------------------------- /ch1/js/b-chat.js: -------------------------------------------------------------------------------- 1 | import { ChatOpenAI } from '@langchain/openai'; 2 | import { HumanMessage } from '@langchain/core/messages'; 3 | 4 | const model = new ChatOpenAI(); 5 | const prompt = [new HumanMessage('What is the capital of France?')]; 6 | 7 | const response = await model.invoke(prompt); 8 | console.log(response); 9 | -------------------------------------------------------------------------------- /ch1/js/c-system.js: -------------------------------------------------------------------------------- 1 | import { ChatOpenAI } from '@langchain/openai'; 2 | import { HumanMessage, SystemMessage } from '@langchain/core/messages'; 3 | 4 | const model = new ChatOpenAI(); 5 | const prompt = [ 6 | new SystemMessage( 7 | 'You are a helpful assistant that responds to questions with three exclamation marks.' 8 | ), 9 | new HumanMessage('What is the capital of France?'), 10 | ]; 11 | 12 | const response = await model.invoke(prompt); 13 | console.log(response); 14 | -------------------------------------------------------------------------------- /ch1/js/d-prompt.js: -------------------------------------------------------------------------------- 1 | import { PromptTemplate } from '@langchain/core/prompts'; 2 | 3 | const template = 4 | PromptTemplate.fromTemplate(`Answer the question based on the context below. If the question cannot be answered using the information provided, answer with "I don't know". 5 | 6 | Context: {context} 7 | 8 | Question: {question} 9 | 10 | Answer: `); 11 | 12 | const response = await template.invoke({ 13 | context: 14 | "The most recent advancements in NLP are being driven by Large Language Models (LLMs). These models outperform their smaller counterparts and have become invaluable for developers who are creating applications with NLP capabilities. Developers can tap into these models through Hugging Face's `transformers` library, or by utilizing OpenAI and Cohere's offerings through the `openai` and `cohere` libraries, respectively.", 15 | question: 'Which model providers offer LLMs?', 16 | }); 17 | 18 | console.log(response); 19 | -------------------------------------------------------------------------------- /ch1/js/e-prompt-model.js: -------------------------------------------------------------------------------- 1 | import { PromptTemplate } from '@langchain/core/prompts'; 2 | import { OpenAI } from '@langchain/openai'; 3 | 4 | const model = new OpenAI({ 5 | model: 'gpt-3.5-turbo', 6 | }); 7 | const template = 8 | PromptTemplate.fromTemplate(`Answer the question based on the context below. If the question cannot be answered using the information provided, answer with "I don't know". 9 | 10 | Context: {context} 11 | 12 | Question: {question} 13 | 14 | Answer: `); 15 | 16 | const prompt = await template.invoke({ 17 | context: 18 | "The most recent advancements in NLP are being driven by Large Language Models (LLMs). These models outperform their smaller counterparts and have become invaluable for developers who are creating applications with NLP capabilities. Developers can tap into these models through Hugging Face's `transformers` library, or by utilizing OpenAI and Cohere's offerings through the `openai` and `cohere` libraries, respectively.", 19 | question: 'Which model providers offer LLMs?', 20 | }); 21 | 22 | const response = await model.invoke(prompt); 23 | console.log(response); 24 | -------------------------------------------------------------------------------- /ch1/js/f-chat-prompt.js: -------------------------------------------------------------------------------- 1 | import { ChatPromptTemplate } from '@langchain/core/prompts'; 2 | 3 | const template = ChatPromptTemplate.fromMessages([ 4 | [ 5 | 'system', 6 | 'Answer the question based on the context below. If the question cannot be answered using the information provided, answer with "I don\'t know".', 7 | ], 8 | ['human', 'Context: {context}'], 9 | ['human', 'Question: {question}'], 10 | ]); 11 | 12 | const response = await template.invoke({ 13 | context: 14 | "The most recent advancements in NLP are being driven by Large Language Models (LLMs). These models outperform their smaller counterparts and have become invaluable for developers who are creating applications with NLP capabilities. Developers can tap into these models through Hugging Face's `transformers` library, or by utilizing OpenAI and Cohere's offerings through the `openai` and `cohere` libraries, respectively.", 15 | question: 'Which model providers offer LLMs?', 16 | }); 17 | console.log(response); 18 | -------------------------------------------------------------------------------- /ch1/js/g-chat-prompt-model.js: -------------------------------------------------------------------------------- 1 | import { ChatPromptTemplate } from '@langchain/core/prompts'; 2 | import { ChatOpenAI } from '@langchain/openai'; 3 | 4 | const model = new ChatOpenAI(); 5 | const template = ChatPromptTemplate.fromMessages([ 6 | [ 7 | 'system', 8 | 'Answer the question based on the context below. If the question cannot be answered using the information provided, answer with "I don\'t know".', 9 | ], 10 | ['human', 'Context: {context}'], 11 | ['human', 'Question: {question}'], 12 | ]); 13 | 14 | const prompt = await template.invoke({ 15 | context: 16 | "The most recent advancements in NLP are being driven by Large Language Models (LLMs). These models outperform their smaller counterparts and have become invaluable for developers who are creating applications with NLP capabilities. Developers can tap into these models through Hugging Face's `transformers` library, or by utilizing OpenAI and Cohere's offerings through the `openai` and `cohere` libraries, respectively.", 17 | question: 'Which model providers offer LLMs?', 18 | }); 19 | 20 | const response = await model.invoke(prompt); 21 | console.log(response); 22 | -------------------------------------------------------------------------------- /ch1/js/h-structured.js: -------------------------------------------------------------------------------- 1 | import { ChatOpenAI } from '@langchain/openai'; 2 | import { z } from 'zod'; 3 | 4 | const answerSchema = z 5 | .object({ 6 | answer: z.string().describe("The answer to the user's question"), 7 | justification: z.string().describe('Justification for the answer'), 8 | }) 9 | .describe( 10 | "An answer to the user's question along with justification for the answer." 11 | ); 12 | 13 | const model = new ChatOpenAI({ 14 | model: 'gpt-3.5-turbo', 15 | temperature: 0, 16 | }).withStructuredOutput(answerSchema); 17 | 18 | const response = await model.invoke( 19 | 'What weighs more, a pound of bricks or a pound of feathers' 20 | ); 21 | console.log(response); 22 | -------------------------------------------------------------------------------- /ch1/js/i-csv.js: -------------------------------------------------------------------------------- 1 | import { CommaSeparatedListOutputParser } from '@langchain/core/output_parsers'; 2 | 3 | const parser = new CommaSeparatedListOutputParser(); 4 | 5 | const response = await parser.invoke('apple, banana, cherry'); 6 | console.log(response); 7 | -------------------------------------------------------------------------------- /ch1/js/j-methods.js: -------------------------------------------------------------------------------- 1 | import { ChatOpenAI } from '@langchain/openai'; 2 | 3 | const model = new ChatOpenAI(); 4 | 5 | const response = await model.invoke('Hi there!'); 6 | console.log(response); 7 | // Hi! 8 | 9 | const completions = await model.batch(['Hi there!', 'Bye!']); 10 | // ['Hi!', 'See you!'] 11 | 12 | for await (const token of await model.stream('Bye!')) { 13 | console.log(token); 14 | // Good 15 | // bye 16 | // ! 17 | } 18 | -------------------------------------------------------------------------------- /ch1/js/k-imperative.js: -------------------------------------------------------------------------------- 1 | import { ChatOpenAI } from '@langchain/openai'; 2 | import { ChatPromptTemplate } from '@langchain/core/prompts'; 3 | import { RunnableLambda } from '@langchain/core/runnables'; 4 | 5 | // the building blocks 6 | 7 | const template = ChatPromptTemplate.fromMessages([ 8 | ['system', 'You are a helpful assistant.'], 9 | ['human', '{question}'], 10 | ]); 11 | 12 | const model = new ChatOpenAI({ 13 | model: 'gpt-3.5-turbo', 14 | }); 15 | 16 | // combine them in a function 17 | // RunnableLambda adds the same Runnable interface for any function you write 18 | 19 | const chatbot = RunnableLambda.from(async (values) => { 20 | const prompt = await template.invoke(values); 21 | return await model.invoke(prompt); 22 | }); 23 | 24 | // use it 25 | 26 | const response = await chatbot.invoke({ 27 | question: 'Which model providers offer LLMs?', 28 | }); 29 | console.log(response); 30 | -------------------------------------------------------------------------------- /ch1/js/ka-stream.js: -------------------------------------------------------------------------------- 1 | import { ChatOpenAI } from '@langchain/openai'; 2 | import { ChatPromptTemplate } from '@langchain/core/prompts'; 3 | import { RunnableLambda } from '@langchain/core/runnables'; 4 | 5 | const template = ChatPromptTemplate.fromMessages([ 6 | ['system', 'You are a helpful assistant.'], 7 | ['human', '{question}'], 8 | ]); 9 | 10 | const model = new ChatOpenAI({ 11 | model: 'gpt-3.5-turbo', 12 | }); 13 | 14 | const chatbot = RunnableLambda.from(async function* (values) { 15 | const prompt = await template.invoke(values); 16 | for await (const token of await model.stream(prompt)) { 17 | yield token; 18 | } 19 | }); 20 | 21 | for await (const token of await chatbot.stream({ 22 | question: 'Which model providers offer LLMs?', 23 | })) { 24 | console.log(token); 25 | } 26 | -------------------------------------------------------------------------------- /ch1/js/l-declarative.js: -------------------------------------------------------------------------------- 1 | import { ChatOpenAI } from '@langchain/openai'; 2 | import { ChatPromptTemplate } from '@langchain/core/prompts'; 3 | import { RunnableLambda } from '@langchain/core/runnables'; 4 | 5 | // the building blocks 6 | 7 | const template = ChatPromptTemplate.fromMessages([ 8 | ['system', 'You are a helpful assistant.'], 9 | ['human', '{question}'], 10 | ]); 11 | 12 | const model = new ChatOpenAI({ 13 | model: 'gpt-3.5-turbo', 14 | }); 15 | 16 | // combine them in a function 17 | 18 | const chatbot = template.pipe(model); 19 | 20 | // use it 21 | 22 | const response = await chatbot.invoke({ 23 | question: 'Which model providers offer LLMs?', 24 | }); 25 | 26 | console.log(response); 27 | 28 | //streaming 29 | 30 | for await (const part of chatbot.stream({ 31 | question: 'Which model providers offer LLMs?', 32 | })) { 33 | console.log(part); 34 | } 35 | -------------------------------------------------------------------------------- /ch1/py/a-llm.py: -------------------------------------------------------------------------------- 1 | from langchain_openai.chat_models import ChatOpenAI 2 | 3 | model = ChatOpenAI(model="gpt-3.5-turbo") 4 | 5 | response = model.invoke("The sky is") 6 | print(response.content) 7 | -------------------------------------------------------------------------------- /ch1/py/b-chat.py: -------------------------------------------------------------------------------- 1 | from langchain_openai.chat_models import ChatOpenAI 2 | from langchain_core.messages import HumanMessage 3 | 4 | model = ChatOpenAI() 5 | prompt = [HumanMessage("What is the capital of France?")] 6 | 7 | response = model.invoke(prompt) 8 | print(response.content) 9 | -------------------------------------------------------------------------------- /ch1/py/c-system.py: -------------------------------------------------------------------------------- 1 | from langchain_core.messages import HumanMessage, SystemMessage 2 | from langchain_openai.chat_models import ChatOpenAI 3 | 4 | model = ChatOpenAI() 5 | system_msg = SystemMessage( 6 | "You are a helpful assistant that responds to questions with three exclamation marks." 7 | ) 8 | human_msg = HumanMessage("What is the capital of France?") 9 | 10 | response = model.invoke([system_msg, human_msg]) 11 | print(response.content) 12 | -------------------------------------------------------------------------------- /ch1/py/d-prompt.py: -------------------------------------------------------------------------------- 1 | from langchain_core.prompts import PromptTemplate 2 | 3 | template = PromptTemplate.from_template("""Answer the question based on the context below. If the question cannot be answered using the information provided, answer with "I don't know". 4 | 5 | Context: {context} 6 | 7 | Question: {question} 8 | 9 | Answer: """) 10 | 11 | response = template.invoke( 12 | { 13 | "context": "The most recent advancements in NLP are being driven by Large Language Models (LLMs). These models outperform their smaller counterparts and have become invaluable for developers who are creating applications with NLP capabilities. Developers can tap into these models through Hugging Face's `transformers` library, or by utilizing OpenAI and Cohere's offerings through the `openai` and `cohere` libraries, respectively.", 14 | "question": "Which model providers offer LLMs?", 15 | } 16 | ) 17 | 18 | print(response) 19 | -------------------------------------------------------------------------------- /ch1/py/e-prompt-model.py: -------------------------------------------------------------------------------- 1 | from langchain_openai.chat_models import ChatOpenAI 2 | from langchain_core.prompts import PromptTemplate 3 | 4 | # both `template` and `model` can be reused many times 5 | 6 | template = PromptTemplate.from_template("""Answer the question based on the context below. If the question cannot be answered using the information provided, answer with "I don't know". 7 | 8 | Context: {context} 9 | 10 | Question: {question} 11 | 12 | Answer: """) 13 | 14 | model = ChatOpenAI(model="gpt-3.5-turbo") 15 | 16 | # `prompt` and `completion` are the results of using template and model once 17 | 18 | prompt = template.invoke( 19 | { 20 | "context": "The most recent advancements in NLP are being driven by Large Language Models (LLMs). These models outperform their smaller counterparts and have become invaluable for developers who are creating applications with NLP capabilities. Developers can tap into these models through Hugging Face's `transformers` library, or by utilizing OpenAI and Cohere's offerings through the `openai` and `cohere` libraries, respectively.", 21 | "question": "Which model providers offer LLMs?", 22 | } 23 | ) 24 | 25 | response = model.invoke(prompt) 26 | print(response) 27 | -------------------------------------------------------------------------------- /ch1/py/f-chat-prompt.py: -------------------------------------------------------------------------------- 1 | from langchain_core.prompts import ChatPromptTemplate 2 | 3 | template = ChatPromptTemplate.from_messages( 4 | [ 5 | ( 6 | "system", 7 | 'Answer the question based on the context below. If the question cannot be answered using the information provided, answer with "I don\'t know".', 8 | ), 9 | ("human", "Context: {context}"), 10 | ("human", "Question: {question}"), 11 | ] 12 | ) 13 | 14 | response = template.invoke( 15 | { 16 | "context": "The most recent advancements in NLP are being driven by Large Language Models (LLMs). These models outperform their smaller counterparts and have become invaluable for developers who are creating applications with NLP capabilities. Developers can tap into these models through Hugging Face's `transformers` library, or by utilizing OpenAI and Cohere's offerings through the `openai` and `cohere` libraries, respectively.", 17 | "question": "Which model providers offer LLMs?", 18 | } 19 | ) 20 | 21 | print(response) 22 | -------------------------------------------------------------------------------- /ch1/py/g-chat-prompt-model.py: -------------------------------------------------------------------------------- 1 | from langchain_openai.chat_models import ChatOpenAI 2 | from langchain_core.prompts import ChatPromptTemplate 3 | 4 | # both `template` and `model` can be reused many times 5 | 6 | template = ChatPromptTemplate.from_messages( 7 | [ 8 | ( 9 | "system", 10 | 'Answer the question based on the context below. If the question cannot be answered using the information provided, answer with "I don\'t know".', 11 | ), 12 | ("human", "Context: {context}"), 13 | ("human", "Question: {question}"), 14 | ] 15 | ) 16 | 17 | model = ChatOpenAI() 18 | 19 | # `prompt` and `completion` are the results of using template and model once 20 | 21 | prompt = template.invoke( 22 | { 23 | "context": "The most recent advancements in NLP are being driven by Large Language Models (LLMs). These models outperform their smaller counterparts and have become invaluable for developers who are creating applications with NLP capabilities. Developers can tap into these models through Hugging Face's `transformers` library, or by utilizing OpenAI and Cohere's offerings through the `openai` and `cohere` libraries, respectively.", 24 | "question": "Which model providers offer LLMs?", 25 | } 26 | ) 27 | 28 | print(model.invoke(prompt)) 29 | -------------------------------------------------------------------------------- /ch1/py/h-structured.py: -------------------------------------------------------------------------------- 1 | from langchain_openai import ChatOpenAI 2 | from pydantic import BaseModel 3 | 4 | 5 | class AnswerWithJustification(BaseModel): 6 | """An answer to the user's question along with justification for the answer.""" 7 | 8 | answer: str 9 | """The answer to the user's question""" 10 | justification: str 11 | """Justification for the answer""" 12 | 13 | 14 | llm = ChatOpenAI(model="gpt-3.5", temperature=0) 15 | structured_llm = llm.with_structured_output(AnswerWithJustification) 16 | 17 | response = structured_llm.invoke( 18 | "What weighs more, a pound of bricks or a pound of feathers") 19 | print(response) 20 | -------------------------------------------------------------------------------- /ch1/py/i-csv.py: -------------------------------------------------------------------------------- 1 | from langchain_core.output_parsers import CommaSeparatedListOutputParser 2 | 3 | parser = CommaSeparatedListOutputParser() 4 | 5 | response = parser.invoke("apple, banana, cherry") 6 | print(response) 7 | -------------------------------------------------------------------------------- /ch1/py/j-methods.py: -------------------------------------------------------------------------------- 1 | from langchain_openai.chat_models import ChatOpenAI 2 | 3 | model = ChatOpenAI(model="gpt-3.5-turbo") 4 | 5 | completion = model.invoke("Hi there!") 6 | # Hi! 7 | 8 | completions = model.batch(["Hi there!", "Bye!"]) 9 | # ['Hi!', 'See you!'] 10 | 11 | for token in model.stream("Bye!"): 12 | print(token) 13 | # Good 14 | # bye 15 | # ! 16 | -------------------------------------------------------------------------------- /ch1/py/k-imperative.py: -------------------------------------------------------------------------------- 1 | from langchain_openai.chat_models import ChatOpenAI 2 | from langchain_core.prompts import ChatPromptTemplate 3 | from langchain_core.runnables import chain 4 | 5 | # the building blocks 6 | 7 | template = ChatPromptTemplate.from_messages( 8 | [ 9 | ("system", "You are a helpful assistant."), 10 | ("human", "{question}"), 11 | ] 12 | ) 13 | 14 | model = ChatOpenAI(model="gpt-3.5-turbo") 15 | 16 | # combine them in a function 17 | # @chain decorator adds the same Runnable interface for any function you write 18 | 19 | 20 | @chain 21 | def chatbot(values): 22 | prompt = template.invoke(values) 23 | return model.invoke(prompt) 24 | 25 | 26 | # use it 27 | 28 | response = chatbot.invoke({"question": "Which model providers offer LLMs?"}) 29 | print(response.content) 30 | -------------------------------------------------------------------------------- /ch1/py/ka-stream.py: -------------------------------------------------------------------------------- 1 | from langchain_core.runnables import chain 2 | from langchain_openai.chat_models import ChatOpenAI 3 | from langchain_core.prompts import ChatPromptTemplate 4 | 5 | 6 | model = ChatOpenAI(model="gpt-3.5-turbo") 7 | 8 | 9 | template = ChatPromptTemplate.from_messages( 10 | [ 11 | ("system", "You are a helpful assistant."), 12 | ("human", "{question}"), 13 | ] 14 | ) 15 | 16 | 17 | @chain 18 | def chatbot(values): 19 | prompt = template.invoke(values) 20 | for token in model.stream(prompt): 21 | yield token 22 | 23 | 24 | for part in chatbot.stream({"question": "Which model providers offer LLMs?"}): 25 | print(part) 26 | -------------------------------------------------------------------------------- /ch1/py/kb-async.py: -------------------------------------------------------------------------------- 1 | from langchain_core.runnables import chain 2 | from langchain_openai.chat_models import ChatOpenAI 3 | from langchain_core.prompts import ChatPromptTemplate 4 | 5 | template = ChatPromptTemplate.from_messages( 6 | [ 7 | ("system", "You are a helpful assistant."), 8 | ("human", "{question}"), 9 | ] 10 | ) 11 | 12 | model = ChatOpenAI(model="gpt-3.5-turbo") 13 | 14 | 15 | @chain 16 | async def chatbot(values): 17 | prompt = await template.ainvoke(values) 18 | return await model.ainvoke(prompt) 19 | 20 | 21 | async def main(): 22 | return await chatbot.ainvoke({"question": "Which model providers offer LLMs?"}) 23 | 24 | if __name__ == "__main__": 25 | import asyncio 26 | print(asyncio.run(main())) 27 | -------------------------------------------------------------------------------- /ch1/py/l-declarative.py: -------------------------------------------------------------------------------- 1 | from langchain_openai.chat_models import ChatOpenAI 2 | from langchain_core.prompts import ChatPromptTemplate 3 | 4 | # the building blocks 5 | 6 | template = ChatPromptTemplate.from_messages( 7 | [ 8 | ("system", "You are a helpful assistant."), 9 | ("human", "{question}"), 10 | ] 11 | ) 12 | 13 | model = ChatOpenAI() 14 | 15 | # combine them with the | operator 16 | 17 | chatbot = template | model 18 | 19 | # use it 20 | 21 | response = chatbot.invoke({"question": "Which model providers offer LLMs?"}) 22 | print(response.content) 23 | 24 | # streaming 25 | 26 | for part in chatbot.stream({"question": "Which model providers offer LLMs?"}): 27 | print(part) 28 | -------------------------------------------------------------------------------- /ch10/js/agent-evaluation-rag.js: -------------------------------------------------------------------------------- 1 | import { ChatOpenAI } from '@langchain/openai'; 2 | import { ChatPromptTemplate } from '@langchain/core/prompts'; 3 | import { evaluate } from 'langsmith/evaluation'; 4 | import { traceable } from 'langsmith/traceable'; 5 | import { graph } from './rag-graph.js'; 6 | import { z } from 'zod'; 7 | 8 | const defaultDataset = 'langchain-blogs-qa'; 9 | 10 | const experimentPrefix = 'langchain-blogs-qa-evals'; 11 | 12 | const llm = new ChatOpenAI({ model: 'gpt-4o', temperature: 0 }); 13 | 14 | const EVALUATION_PROMPT = `You are a teacher grading a quiz. 15 | 16 | You will be given a QUESTION, the GROUND TRUTH (correct) RESPONSE, and the STUDENT RESPONSE. 17 | 18 | Here is the grade criteria to follow: 19 | (1) Grade the student responses based ONLY on their factual accuracy relative to the ground truth answer. 20 | (2) Ensure that the student response does not contain any conflicting statements. 21 | (3) It is OK if the student response contains more information than the ground truth response, as long as it is factually accurate relative to the ground truth response. 22 | 23 | Correctness: 24 | True means that the student's response meets all of the criteria. 25 | False means that the student's response does not meet all of the criteria. 26 | 27 | Explain your reasoning in a step-by-step manner to ensure your reasoning and conclusion are correct.`; 28 | 29 | const userPrompt = `QUESTION: {question} 30 | GROUND TRUTH RESPONSE: {reference} 31 | STUDENT RESPONSE: {answer}`; 32 | 33 | const prompt = ChatPromptTemplate.fromMessages([ 34 | ['system', EVALUATION_PROMPT], 35 | ['user', userPrompt], 36 | ]); 37 | 38 | // LLM-as-judge output schema 39 | 40 | const grade = z 41 | .object({ 42 | reasoning: z 43 | .string() 44 | .describe( 45 | 'Explain your reasoning for whether the actual response is correct or not.' 46 | ), 47 | isCorrect: z 48 | .boolean() 49 | .describe( 50 | 'True if the student response is mostly or exactly correct, otherwise False.' 51 | ), 52 | }) 53 | .describe( 54 | 'Compare the expected and actual answers and grade the actual answer.' 55 | ); 56 | 57 | const graderLlm = prompt.pipe(llm.withStructuredOutput(grade)); 58 | 59 | const evaluateAgent = async (run, example) => { 60 | const question = run.inputs.question; 61 | const answer = run.outputs.answer; 62 | const reference = example.outputs.answer; 63 | 64 | const grade = await graderLlm.invoke({ question, reference, answer }); 65 | const isCorrect = grade.isCorrect; 66 | 67 | return { key: 'correct', score: Number(isCorrect) }; 68 | }; 69 | 70 | const runGraph = traceable(async (inputs) => { 71 | const answer = await graph.invoke({ question: inputs.question }); 72 | return { answer: answer.answer }; 73 | }); 74 | 75 | await evaluate((inputs) => runGraph(inputs), { 76 | data: defaultDataset, 77 | evaluators: [evaluateAgent], 78 | experimentPrefix, 79 | maxConcurrency: 4, 80 | }); 81 | -------------------------------------------------------------------------------- /ch10/js/create-rag-dataset.js: -------------------------------------------------------------------------------- 1 | import { Client } from 'langsmith'; 2 | const client = new Client(); 3 | 4 | const exampleInputs = [ 5 | [ 6 | 'Which companies are highlighted as top LangGraph agent adopters in 2024?', 7 | 'The top adopters include Uber (code migration tools), AppFolio (property management copilot), LinkedIn (SQL Bot), Elastic (AI assistant), and Replit (multi-agent development platform) :cite[3].', 8 | ], 9 | [ 10 | "How did AppFolio's AI copilot impact property managers?", 11 | "AppFolio's Realm-X AI copilot saved property managers over 10 hours per week by automating queries, bulk actions, and scheduling :cite[3].", 12 | ], 13 | [ 14 | 'What infrastructure trends dominated LLM usage in 2024?', 15 | 'OpenAI remained the top LLM provider (6x more usage than Ollama), while open-source models via Ollama and Groq surged. Chroma and FAISS led vector stores, with MongoDB and Elastic gaining traction :cite[2]:cite[5].', 16 | ], 17 | [ 18 | 'How did LangGraph improve agent workflows compared to 2023?', 19 | 'LangGraph usage grew to 43% of LangSmith organizations, with 21.9% of traces involving tool calls (up from 0.5% in 2023), enabling complex multi-step tasks like database writes :cite[2]:cite[7].', 20 | ], 21 | [ 22 | "What distinguishes Replit's LangGraph implementation?", 23 | "Replit's agent emphasizes human-in-the-loop validation and a multi-agent architecture for code generation, combining autonomy with controlled outputs :cite[3].", 24 | ], 25 | ]; 26 | 27 | const datasetName = 'langchain-blogs-qa'; 28 | 29 | // Create dataset 30 | const dataset = await client.createDataset(datasetName, { 31 | description: 'Langchain blogs QA.', 32 | }); 33 | 34 | // Prepare inputs, outputs, and metadata for bulk creation 35 | const inputs = exampleInputs.map(([inputPrompt]) => ({ 36 | question: inputPrompt, 37 | })); 38 | 39 | const outputs = exampleInputs.map(([, outputAnswer]) => ({ 40 | answer: outputAnswer, 41 | })); 42 | 43 | const metadata = exampleInputs.map(() => ({ source: 'LangChain Blog' })); 44 | 45 | // Use the bulk createExamples method 46 | await client.createExamples({ 47 | inputs, 48 | outputs, 49 | metadata, 50 | datasetId: dataset.id, 51 | }); 52 | 53 | console.log( 54 | `Dataset created in langsmith with ID: ${dataset.id}\n Navigate to ${dataset.url}.` 55 | ); 56 | -------------------------------------------------------------------------------- /ch10/js/create-sql-dataset.js: -------------------------------------------------------------------------------- 1 | import { Client } from 'langsmith'; 2 | const client = new Client(); 3 | 4 | const exampleInputs = [ 5 | [ 6 | "Which country's customers spent the most? And how much did they spend?", 7 | 'The country whose customers spent the most is the USA, with a total expenditure of $523.06', 8 | ], 9 | [ 10 | 'What was the most purchased track of 2013?', 11 | 'The most purchased track of 2013 was Hot Girl.', 12 | ], 13 | [ 14 | 'How many albums does the artist Led Zeppelin have?', 15 | 'Led Zeppelin has 14 albums', 16 | ], 17 | [ 18 | "What is the total price for the album 'Big Ones'?", 19 | 'The total price for the album "Big Ones" is 14.85', 20 | ], 21 | [ 22 | 'Which sales agent made the most in sales in 2009?', 23 | 'Steve Johnson made the most sales in 2009', 24 | ], 25 | ]; 26 | 27 | const datasetName = 'sql-agent-response'; 28 | 29 | if (!(await client.hasDataset({ datasetName }))) { 30 | client.createDataset(datasetName); 31 | 32 | // Prepare inputs, outputs, and metadata for bulk creation 33 | const inputs = exampleInputs.map(([inputPrompt]) => ({ 34 | question: inputPrompt, 35 | })); 36 | 37 | const outputs = exampleInputs.map(([, outputAnswer]) => ({ 38 | answer: outputAnswer, 39 | })); 40 | 41 | await client.createExamples({ 42 | inputs, 43 | outputs, 44 | datasetId: dataset.id, 45 | }); 46 | } 47 | -------------------------------------------------------------------------------- /ch10/js/rag-graph.js: -------------------------------------------------------------------------------- 1 | import { Annotation, StateGraph } from '@langchain/langgraph'; 2 | import { CheerioWebBaseLoader } from '@langchain/community/document_loaders/web/cheerio'; 3 | import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter'; 4 | import { MemoryVectorStore } from 'langchain/vectorstores/memory'; 5 | import { ChatOpenAI, OpenAIEmbeddings } from '@langchain/openai'; 6 | import * as hub from 'langchain/hub'; 7 | import { StringOutputParser } from '@langchain/core/output_parsers'; 8 | 9 | const GraphState = Annotation.Root({ 10 | question: Annotation(), 11 | scrapedDocuments: Annotation(), 12 | vectorstore: Annotation(), 13 | answer: Annotation(), 14 | }); 15 | 16 | const scrapeBlogPosts = async (state) => { 17 | const urls = [ 18 | 'https://blog.langchain.dev/top-5-langgraph-agents-in-production-2024/', 19 | 'https://blog.langchain.dev/langchain-state-of-ai-2024/', 20 | 'https://blog.langchain.dev/introducing-ambient-agents/', 21 | ]; 22 | 23 | const loadDocs = async (urls) => { 24 | const docs = []; 25 | for (const url of urls) { 26 | const loader = new CheerioWebBaseLoader(url); 27 | const loadedDocs = await loader.load(); 28 | docs.push(...loadedDocs); 29 | } 30 | return docs; 31 | }; 32 | 33 | const scrapedDocuments = await loadDocs(urls); 34 | 35 | return { scrapedDocuments }; 36 | }; 37 | 38 | const indexing = async (state) => { 39 | const textSplitter = new RecursiveCharacterTextSplitter({ 40 | chunkSize: 1000, 41 | chunkOverlap: 0, 42 | }); 43 | 44 | const docSplits = await textSplitter.splitDocuments(state.scrapedDocuments); 45 | 46 | const vectorstore = new MemoryVectorStore(new OpenAIEmbeddings()); 47 | 48 | await vectorstore.addDocuments(docSplits); 49 | 50 | console.log('vectorstore: ', vectorstore); 51 | 52 | return { vectorstore }; 53 | }; 54 | 55 | const retrieveAndGenerate = async (state) => { 56 | const { question, vectorstore } = state; 57 | 58 | const retriever = vectorstore.asRetriever(); 59 | 60 | const prompt = await hub.pull('rlm/rag-prompt'); 61 | 62 | const llm = new ChatOpenAI({ model: 'gpt-3.5-turbo', temperature: 0 }); 63 | 64 | const docs = await retriever.invoke(question); 65 | 66 | const chain = prompt.pipe(llm).pipe(new StringOutputParser()); 67 | 68 | const answer = await chain.invoke({ context: docs, question }); 69 | 70 | console.log('answer: ', answer); 71 | 72 | return { answer }; 73 | }; 74 | 75 | const workflow = new StateGraph(GraphState) 76 | .addNode('retrieve_and_generate', retrieveAndGenerate) 77 | .addNode('scrape_blog_posts', scrapeBlogPosts) 78 | .addNode('indexing', indexing) 79 | .addEdge('__start__', 'scrape_blog_posts') 80 | .addEdge('scrape_blog_posts', 'indexing') 81 | .addEdge('indexing', 'retrieve_and_generate') 82 | .addEdge('retrieve_and_generate', '__end__'); 83 | 84 | const graph = workflow.compile(); 85 | 86 | await graph.invoke({ question: 'What are ambient agents?' }); 87 | -------------------------------------------------------------------------------- /ch10/js/retrieve-and-grade.js: -------------------------------------------------------------------------------- 1 | import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters'; 2 | import { CheerioWebBaseLoader } from '@langchain/community/document_loaders/web/cheerio'; 3 | import { InMemoryVectorStore } from '@langchain/community/vectorstores/in_memory'; 4 | import { OpenAIEmbeddings } from '@langchain/openai'; 5 | import { ChatPromptTemplate } from '@langchain/core/prompts'; 6 | import { z } from 'zod'; 7 | import { ChatOpenAI } from '@langchain/openai'; 8 | 9 | const urls = [ 10 | 'https://blog.langchain.dev/top-5-langgraph-agents-in-production-2024/', 11 | 'https://blog.langchain.dev/langchain-state-of-ai-2024/', 12 | 'https://blog.langchain.dev/introducing-ambient-agents/', 13 | ]; 14 | 15 | // Load documents from URLs 16 | const loadDocs = async (urls) => { 17 | const docs = []; 18 | for (const url of urls) { 19 | const loader = new CheerioWebBaseLoader(url); 20 | const loadedDocs = await loader.load(); 21 | docs.push(...loadedDocs); 22 | } 23 | return docs; 24 | }; 25 | 26 | const docsList = await loadDocs(urls); 27 | 28 | // Initialize the text splitter 29 | const textSplitter = new RecursiveCharacterTextSplitter({ 30 | chunkSize: 250, 31 | chunkOverlap: 0, 32 | }); 33 | 34 | // Split the documents into smaller chunks 35 | const docSplits = textSplitter.splitDocuments(docsList); 36 | 37 | // Add to vector database 38 | const vectorstore = await InMemoryVectorStore.fromDocuments( 39 | docSplits, 40 | new OpenAIEmbeddings() 41 | ); 42 | 43 | const retriever = vectorstore.asRetriever(); // The `retriever` object can now be used for querying 44 | 45 | const question = 'What are 2 LangGraph agents used in production in 2024?'; 46 | 47 | const docs = retriever.invoke(question); 48 | 49 | console.log('Retrieved documents: \n', docs[0].page_content); 50 | 51 | // Define the schema using Zod 52 | const GradeDocumentsSchema = z.object({ 53 | binary_score: z 54 | .string() 55 | .describe("Documents are relevant to the question, 'yes' or 'no'"), 56 | }); 57 | 58 | // Initialize LLM with structured output using Zod schema 59 | const llm = new ChatOpenAI({ model: 'gpt-3.5-turbo', temperature: 0 }); 60 | const structuredLLMGrader = llm.withStructuredOutput(GradeDocumentsSchema); 61 | 62 | // System and prompt template 63 | const systemMessage = `You are a grader assessing relevance of a retrieved document to a user question. If the document contains keyword(s) or semantic meaning related to the question, grade it as relevant. Give a binary score 'yes' or 'no' to indicate whether the document is relevant to the question.`; 64 | const gradePrompt = ChatPromptTemplate.fromMessages([ 65 | { role: 'system', content: systemMessage }, 66 | { 67 | role: 'human', 68 | content: 69 | 'Retrieved document: \n\n {document} \n\n User question: {question}', 70 | }, 71 | ]); 72 | 73 | // Combine prompt with the structured output 74 | const retrievalGrader = gradePrompt.pipe(structuredLLMGrader); 75 | 76 | // Grade retrieved documents 77 | const results = await retrievalGrader.invoke({ 78 | question, 79 | document: docs[0].page_content, 80 | }); 81 | 82 | console.log('\n\nGrading results: \n', results); 83 | -------------------------------------------------------------------------------- /ch10/py/create_rag_dataset.py: -------------------------------------------------------------------------------- 1 | from langsmith import wrappers, Client 2 | from pydantic import BaseModel, Field 3 | from openai import OpenAI 4 | 5 | client = Client() 6 | openai_client = wrappers.wrap_openai(OpenAI()) 7 | 8 | examples = [ 9 | { 10 | "question": "Which companies are highlighted as top LangGraph agent adopters in 2024?", 11 | "answer": "The top adopters include Uber (code migration tools), AppFolio (property management copilot), LinkedIn (SQL Bot), Elastic (AI assistant), and Replit (multi-agent development platform) :cite[3]." 12 | }, 13 | { 14 | "question": "How did AppFolio's AI copilot impact property managers?", 15 | "answer": "AppFolio's Realm-X AI copilot saved property managers over 10 hours per week by automating queries, bulk actions, and scheduling :cite[3]." 16 | }, 17 | { 18 | "question": "What infrastructure trends dominated LLM usage in 2024?", 19 | "answer": "OpenAI remained the top LLM provider (6x more usage than Ollama), while open-source models via Ollama and Groq surged. Chroma and FAISS led vector stores, with MongoDB and Elastic gaining traction :cite[2]:cite[5]." 20 | }, 21 | { 22 | "question": "How did LangGraph improve agent workflows compared to 2023?", 23 | "answer": "LangGraph usage grew to 43% of LangSmith organizations, with 21.9% of traces involving tool calls (up from 0.5% in 2023), enabling complex multi-step tasks like database writes :cite[2]:cite[7]." 24 | }, 25 | { 26 | "question": "What distinguishes Replit's LangGraph implementation?", 27 | "answer": "Replit's agent emphasizes human-in-the-loop validation and a multi-agent architecture for code generation, combining autonomy with controlled outputs :cite[3]." 28 | } 29 | ] 30 | 31 | inputs = [{"question": example["question"]} for example in examples] 32 | outputs = [{"answer": example["answer"]} for example in examples] 33 | 34 | # Programmatically create a dataset in LangSmith 35 | dataset = client.create_dataset( 36 | dataset_name="langchain-blogs-qa", description="Langchain blogs QA." 37 | ) 38 | 39 | # Add examples to the dataset 40 | client.create_examples(inputs=inputs, outputs=outputs, dataset_id=dataset.id) 41 | 42 | print( 43 | f"Dataset created in langsmith with ID: {dataset.id}\n Navigate to {dataset.url}.") 44 | -------------------------------------------------------------------------------- /ch10/py/create_sql_dataset.py: -------------------------------------------------------------------------------- 1 | from langsmith import Client 2 | 3 | client = Client() 4 | 5 | # Create a dataset 6 | examples = [ 7 | ("Which country's customers spent the most? And how much did they spend?", 8 | "The country whose customers spent the most is the USA, with a total expenditure of $523.06"), 9 | ("What was the most purchased track of 2013?", 10 | "The most purchased track of 2013 was Hot Girl."), 11 | ("How many albums does the artist Led Zeppelin have?", 12 | "Led Zeppelin has 14 albums"), 13 | ("What is the total price for the album “Big Ones”?", 14 | "The total price for the album 'Big Ones' is 14.85"), 15 | ("Which sales agent made the most in sales in 2009?", 16 | "Steve Johnson made the most sales in 2009"), 17 | ] 18 | 19 | dataset_name = "sql-agent-response" 20 | if not client.has_dataset(dataset_name=dataset_name): 21 | dataset = client.create_dataset(dataset_name=dataset_name) 22 | inputs, outputs = zip( 23 | *[({"input": text}, {"output": label}) for text, label in examples] 24 | ) 25 | client.create_examples( 26 | inputs=inputs, outputs=outputs, dataset_id=dataset.id) 27 | -------------------------------------------------------------------------------- /ch10/py/rag_graph.py: -------------------------------------------------------------------------------- 1 | from typing import List, TypedDict 2 | from langchain_community.document_loaders import WebBaseLoader 3 | from langchain.schema import Document 4 | from langgraph.graph import END, StateGraph, START 5 | from langchain_community.vectorstores import InMemoryVectorStore 6 | from langchain_text_splitters import RecursiveCharacterTextSplitter 7 | from langchain_openai import OpenAIEmbeddings 8 | from langchain import hub 9 | from langchain_openai import ChatOpenAI 10 | 11 | 12 | class GraphState(TypedDict): 13 | """ 14 | Represents the state of our graph. 15 | 16 | Attributes: 17 | question: question 18 | scraped_documents: list of documents 19 | vectorstore: vectorstore 20 | """ 21 | 22 | question: str 23 | scraped_documents: List[str] 24 | vectorstore: InMemoryVectorStore 25 | answer: str 26 | 27 | 28 | def scrape_blog_posts(state) -> List[Document]: 29 | """ 30 | Scrape the blog posts and create a list of documents 31 | """ 32 | 33 | urls = [ 34 | "https://blog.langchain.dev/top-5-langgraph-agents-in-production-2024/", 35 | "https://blog.langchain.dev/langchain-state-of-ai-2024/", 36 | "https://blog.langchain.dev/introducing-ambient-agents/", 37 | ] 38 | 39 | docs = [WebBaseLoader(url).load() for url in urls] 40 | docs_list = [item for sublist in docs for item in sublist] 41 | 42 | return {"scraped_documents": docs_list} 43 | 44 | 45 | def indexing(state): 46 | """ 47 | Index the documents 48 | """ 49 | text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder( 50 | chunk_size=250, chunk_overlap=0 51 | ) 52 | doc_splits = text_splitter.split_documents(state["scraped_documents"]) 53 | 54 | # Add to vectorDB 55 | vectorstore = InMemoryVectorStore.from_documents( 56 | documents=doc_splits, 57 | embedding=OpenAIEmbeddings(), 58 | ) 59 | return {"vectorstore": vectorstore} 60 | 61 | 62 | def retrieve_and_generate(state): 63 | """ 64 | Retrieve documents from vectorstore and generate answer 65 | """ 66 | question = state["question"] 67 | vectorstore = state["vectorstore"] 68 | 69 | retriever = vectorstore.as_retriever() 70 | 71 | prompt = hub.pull("rlm/rag-prompt") 72 | llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0) 73 | 74 | # fetch relevant documents 75 | docs = retriever.invoke(question) # format prompt 76 | formatted = prompt.invoke( 77 | {"context": docs, "question": question}) # generate answer 78 | answer = llm.invoke(formatted) 79 | return {"answer": answer} 80 | 81 | 82 | # Graph 83 | workflow = StateGraph(GraphState) 84 | 85 | # Define the nodes 86 | workflow.add_node("retrieve_and_generate", retrieve_and_generate) # retrieve 87 | workflow.add_node("scrape_blog_posts", scrape_blog_posts) # scrape web 88 | workflow.add_node("indexing", indexing) # index 89 | 90 | # Build graph 91 | workflow.add_edge(START, "scrape_blog_posts") 92 | workflow.add_edge("scrape_blog_posts", "indexing") 93 | workflow.add_edge("indexing", "retrieve_and_generate") 94 | 95 | workflow.add_edge("retrieve_and_generate", END) 96 | 97 | # Compile 98 | graph = workflow.compile() 99 | -------------------------------------------------------------------------------- /ch10/py/retrieve_and_grade.py: -------------------------------------------------------------------------------- 1 | from langchain.text_splitter import RecursiveCharacterTextSplitter 2 | from langchain_community.document_loaders import WebBaseLoader 3 | from langchain_community.vectorstores import InMemoryVectorStore 4 | from langchain_openai import OpenAIEmbeddings 5 | from langchain_core.prompts import ChatPromptTemplate 6 | from pydantic import BaseModel, Field 7 | from langchain_openai import ChatOpenAI 8 | 9 | 10 | # --- Create an index of documents --- 11 | 12 | urls = [ 13 | "https://blog.langchain.dev/top-5-langgraph-agents-in-production-2024/", 14 | "https://blog.langchain.dev/langchain-state-of-ai-2024/", 15 | "https://blog.langchain.dev/introducing-ambient-agents/", 16 | ] 17 | 18 | docs = [WebBaseLoader(url).load() for url in urls] 19 | docs_list = [item for sublist in docs for item in sublist] 20 | 21 | text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder( 22 | chunk_size=250, chunk_overlap=0 23 | ) 24 | doc_splits = text_splitter.split_documents(docs_list) 25 | 26 | # Add to vectorDB 27 | vectorstore = InMemoryVectorStore.from_documents( 28 | documents=doc_splits, 29 | embedding=OpenAIEmbeddings(), 30 | ) 31 | retriever = vectorstore.as_retriever() 32 | 33 | # Retrieve the relevant documents 34 | results = retriever.invoke( 35 | "What are 2 LangGraph agents used in production in 2024?") 36 | 37 | print("Results: \n", results) 38 | 39 | 40 | # --- Create a grader for retrieved documents --- 41 | 42 | # Data model 43 | class GradeDocuments(BaseModel): 44 | """Binary score for relevance check on retrieved documents.""" 45 | 46 | binary_score: str = Field( 47 | description="Documents are relevant to the question, 'yes' or 'no'" 48 | ) 49 | 50 | 51 | # LLM with structured output 52 | llm = ChatOpenAI(temperature=0) 53 | structured_llm_grader = llm.with_structured_output(GradeDocuments) 54 | 55 | # Prompt 56 | system = """You are a grader assessing relevance of a retrieved document to a user question. 57 | If the document contains keyword(s) or semantic meaning related to the question, grade it as relevant. 58 | Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question.""" 59 | 60 | grade_prompt = ChatPromptTemplate.from_messages( 61 | [ 62 | ("system", system), 63 | ("human", 64 | "Retrieved document: \n\n {document} \n\n User question: {question}"), 65 | ] 66 | ) 67 | 68 | retrieval_grader = grade_prompt | structured_llm_grader 69 | 70 | # --- Grade retrieved documents --- 71 | 72 | question = "What are 2 LangGraph agents used in production in 2024?" 73 | 74 | # as an example retrieval_grader.invoke({"question": question, "document": doc_txt}) 75 | docs = retriever.invoke(question) 76 | 77 | doc_txt = docs[0].page_content 78 | 79 | result = retrieval_grader.invoke({"question": question, "document": doc_txt}) 80 | 81 | print("\n\nGrade Result: \n", result) 82 | -------------------------------------------------------------------------------- /ch2/js/a-text-loader.js: -------------------------------------------------------------------------------- 1 | import { TextLoader } from 'langchain/document_loaders/fs/text'; 2 | 3 | const loader = new TextLoader('./test.txt'); 4 | const docs = await loader.load(); 5 | 6 | console.log(docs); 7 | -------------------------------------------------------------------------------- /ch2/js/b-web-loader.js: -------------------------------------------------------------------------------- 1 | import { CheerioWebBaseLoader } from '@langchain/community/document_loaders/web/cheerio'; 2 | 3 | const loader = new CheerioWebBaseLoader('https://www.langchain.com/'); 4 | const docs = await loader.load(); 5 | 6 | console.log(docs); 7 | -------------------------------------------------------------------------------- /ch2/js/c-pdf-loader.js: -------------------------------------------------------------------------------- 1 | // install the pdf parsing library: npm install pdf-parse 2 | import { PDFLoader } from '@langchain/community/document_loaders/fs/pdf'; 3 | 4 | const loader = new PDFLoader('./test.pdf'); 5 | const docs = await loader.load(); 6 | 7 | console.log(docs); 8 | -------------------------------------------------------------------------------- /ch2/js/d-rec-text-splitter.js: -------------------------------------------------------------------------------- 1 | import { TextLoader } from 'langchain/document_loaders/fs/text'; 2 | import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters'; 3 | 4 | const loader = new TextLoader('./test.txt'); // or any other loader 5 | const docs = await loader.load(); 6 | 7 | const splitter = new RecursiveCharacterTextSplitter({ 8 | chunkSize: 1000, 9 | chunkOverlap: 200, 10 | }); 11 | 12 | const splittedDocs = await splitter.splitDocuments(docs); 13 | 14 | console.log(splittedDocs); 15 | -------------------------------------------------------------------------------- /ch2/js/e-rec-text-splitter-code.js: -------------------------------------------------------------------------------- 1 | import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters'; 2 | 3 | const PYTHON_CODE = ` def hello_world(): print("Hello, World!") # Call the function hello_world() `; 4 | 5 | const pythonSplitter = RecursiveCharacterTextSplitter.fromLanguage('python', { 6 | chunkSize: 50, 7 | chunkOverlap: 0, 8 | }); 9 | 10 | const pythonDocs = await pythonSplitter.createDocuments([PYTHON_CODE]); 11 | 12 | console.log(pythonDocs); 13 | -------------------------------------------------------------------------------- /ch2/js/f-markdown-splitter.js: -------------------------------------------------------------------------------- 1 | import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters'; 2 | 3 | const markdownText = ` # 🦜🔗 LangChain ⚡ Building applications with LLMs through composability ⚡ ## Quick Install \`\`\`bash pip install langchain \`\`\` As an open source project in a rapidly developing field, we are extremely open to contributions. `; 4 | 5 | const mdSplitter = RecursiveCharacterTextSplitter.fromLanguage('markdown', { 6 | chunkSize: 60, 7 | chunkOverlap: 0, 8 | }); 9 | 10 | const mdDocs = await mdSplitter.createDocuments( 11 | [markdownText], 12 | [{ source: 'https://www.langchain.com' }] 13 | ); 14 | 15 | console.log(mdDocs); 16 | -------------------------------------------------------------------------------- /ch2/js/g-embeddings.js: -------------------------------------------------------------------------------- 1 | import { OpenAIEmbeddings } from '@langchain/openai'; 2 | 3 | const model = new OpenAIEmbeddings(); 4 | const embeddings = await model.embedDocuments([ 5 | 'Hi there!', 6 | 'Oh, hello!', 7 | "What's your name?", 8 | 'My friends call me World', 9 | 'Hello World!', 10 | ]); 11 | 12 | console.log(embeddings); 13 | -------------------------------------------------------------------------------- /ch2/js/h-load-split-embed.js: -------------------------------------------------------------------------------- 1 | import { TextLoader } from 'langchain/document_loaders/fs/text'; 2 | import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters'; 3 | import { OpenAIEmbeddings } from '@langchain/openai'; 4 | 5 | const loader = new TextLoader('./test.txt'); 6 | const docs = await loader.load(); 7 | 8 | // Split the document 9 | const splitter = new RecursiveCharacterTextSplitter({ 10 | chunkSize: 1000, 11 | chunkOverlap: 200, 12 | }); 13 | const chunks = await splitter.splitDocuments(docs); 14 | 15 | console.log(chunks); 16 | 17 | // Generate embeddings 18 | const model = new OpenAIEmbeddings(); 19 | const embeddings = await model.embedDocuments(chunks.map((c) => c.pageContent)); 20 | 21 | console.log(embeddings); 22 | -------------------------------------------------------------------------------- /ch2/js/i-pg-vector.js: -------------------------------------------------------------------------------- 1 | /** 2 | 1. Ensure docker is installed and running (https://docs.docker.com/get-docker/) 3 | 2. Run the following command to start the postgres container: 4 | 5 | docker run \ 6 | --name pgvector-container \ 7 | -e POSTGRES_USER=langchain \ 8 | -e POSTGRES_PASSWORD=langchain \ 9 | -e POSTGRES_DB=langchain \ 10 | -p 6024:5432 \ 11 | -d pgvector/pgvector:pg16 12 | 3. Use the connection string below for the postgres container 13 | */ 14 | 15 | import { TextLoader } from 'langchain/document_loaders/fs/text'; 16 | import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters'; 17 | import { OpenAIEmbeddings } from '@langchain/openai'; 18 | import { PGVectorStore } from '@langchain/community/vectorstores/pgvector'; 19 | import { v4 as uuidv4 } from 'uuid'; 20 | 21 | const connectionString = 22 | 'postgresql://langchain:langchain@localhost:6024/langchain'; 23 | // Load the document, split it into chunks 24 | const loader = new TextLoader('./test.txt'); 25 | const raw_docs = await loader.load(); 26 | const splitter = new RecursiveCharacterTextSplitter({ 27 | chunkSize: 1000, 28 | chunkOverlap: 200, 29 | }); 30 | const docs = await splitter.splitDocuments(raw_docs); 31 | 32 | // embed each chunk and insert it into the vector store 33 | const model = new OpenAIEmbeddings(); 34 | const db = await PGVectorStore.fromDocuments(docs, model, { 35 | postgresConnectionOptions: { 36 | connectionString, 37 | }, 38 | }); 39 | 40 | console.log('Vector store created successfully'); 41 | 42 | const results = await db.similaritySearch('query', 4); 43 | 44 | console.log(`Similarity search results: ${JSON.stringify(results)}`); 45 | 46 | console.log('Adding documents to the vector store'); 47 | 48 | const ids = [uuidv4(), uuidv4()]; 49 | 50 | await db.addDocuments( 51 | [ 52 | { 53 | pageContent: 'there are cats in the pond', 54 | metadata: { location: 'pond', topic: 'animals' }, 55 | }, 56 | { 57 | pageContent: 'ducks are also found in the pond', 58 | metadata: { location: 'pond', topic: 'animals' }, 59 | }, 60 | ], 61 | { ids } 62 | ); 63 | 64 | console.log('Documents added successfully'); 65 | 66 | await db.delete({ ids: [ids[1]] }); 67 | 68 | console.log('second document deleted successfully'); 69 | -------------------------------------------------------------------------------- /ch2/js/j-record-manager.js: -------------------------------------------------------------------------------- 1 | /** 2 | 1. Ensure docker is installed and running (https://docs.docker.com/get-docker/) 3 | 2. Run the following command to start the postgres container: 4 | 5 | docker run \ 6 | --name pgvector-container \ 7 | -e POSTGRES_USER=langchain \ 8 | -e POSTGRES_PASSWORD=langchain \ 9 | -e POSTGRES_DB=langchain \ 10 | -p 6024:5432 \ 11 | -d pgvector/pgvector:pg16 12 | 3. Use the connection string below for the postgres container 13 | */ 14 | 15 | import { PostgresRecordManager } from '@langchain/community/indexes/postgres'; 16 | import { index } from 'langchain/indexes'; 17 | import { OpenAIEmbeddings } from '@langchain/openai'; 18 | import { PGVectorStore } from '@langchain/community/vectorstores/pgvector'; 19 | import { v4 as uuidv4 } from 'uuid'; 20 | 21 | const tableName = 'test_langchain'; 22 | const connectionString = 23 | 'postgresql://langchain:langchain@localhost:6024/langchain'; 24 | // Load the document, split it into chunks 25 | 26 | const config = { 27 | postgresConnectionOptions: { 28 | connectionString, 29 | }, 30 | tableName: tableName, 31 | columns: { 32 | idColumnName: 'id', 33 | vectorColumnName: 'vector', 34 | contentColumnName: 'content', 35 | metadataColumnName: 'metadata', 36 | }, 37 | }; 38 | 39 | const vectorStore = await PGVectorStore.initialize( 40 | new OpenAIEmbeddings(), 41 | config 42 | ); 43 | 44 | // Create a new record manager 45 | const recordManagerConfig = { 46 | postgresConnectionOptions: { 47 | connectionString, 48 | }, 49 | tableName: 'upsertion_records', 50 | }; 51 | const recordManager = new PostgresRecordManager( 52 | 'test_namespace', 53 | recordManagerConfig 54 | ); 55 | 56 | // Create the schema if it doesn't exist 57 | await recordManager.createSchema(); 58 | 59 | const docs = [ 60 | { 61 | pageContent: 'there are cats in the pond', 62 | metadata: { id: uuidv4(), source: 'cats.txt' }, 63 | }, 64 | { 65 | pageContent: 'ducks are also found in the pond', 66 | metadata: { id: uuidv4(), source: 'ducks.txt' }, 67 | }, 68 | ]; 69 | 70 | // the first attempt will index both documents 71 | const index_attempt_1 = await index({ 72 | docsSource: docs, 73 | recordManager, 74 | vectorStore, 75 | options: { 76 | cleanup: 'incremental', // prevent duplicate documents by id from being indexed 77 | sourceIdKey: 'source', // the key in the metadata that will be used to identify the document 78 | }, 79 | }); 80 | 81 | console.log(index_attempt_1); 82 | 83 | // the second attempt will skip indexing because the identical documents already exist 84 | const index_attempt_2 = await index({ 85 | docsSource: docs, 86 | recordManager, 87 | vectorStore, 88 | options: { 89 | cleanup: 'incremental', 90 | sourceIdKey: 'source', 91 | }, 92 | }); 93 | 94 | console.log(index_attempt_2); 95 | 96 | // If we mutate a document, the new version will be written and all old versions sharing the same source will be deleted. 97 | docs[0].pageContent = 'I modified the first document content'; 98 | const index_attempt_3 = await index({ 99 | docsSource: docs, 100 | recordManager, 101 | vectorStore, 102 | options: { 103 | cleanup: 'incremental', 104 | sourceIdKey: 'source', 105 | }, 106 | }); 107 | 108 | console.log(index_attempt_3); 109 | -------------------------------------------------------------------------------- /ch2/js/k-multi-vector-retriever.js: -------------------------------------------------------------------------------- 1 | import * as uuid from 'uuid'; 2 | import { MultiVectorRetriever } from 'langchain/retrievers/multi_vector'; 3 | import { OpenAIEmbeddings } from '@langchain/openai'; 4 | import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters'; 5 | import { InMemoryStore } from '@langchain/core/stores'; 6 | import { TextLoader } from 'langchain/document_loaders/fs/text'; 7 | import { Document } from '@langchain/core/documents'; 8 | import { PGVectorStore } from '@langchain/community/vectorstores/pgvector'; 9 | import { ChatOpenAI } from '@langchain/openai'; 10 | import { PromptTemplate } from '@langchain/core/prompts'; 11 | import { RunnableSequence } from '@langchain/core/runnables'; 12 | import { StringOutputParser } from '@langchain/core/output_parsers'; 13 | 14 | const connectionString = 15 | 'postgresql://langchain:langchain@localhost:6024/langchain'; 16 | const collectionName = 'summaries'; 17 | 18 | const textLoader = new TextLoader('./test.txt'); 19 | const parentDocuments = await textLoader.load(); 20 | const splitter = new RecursiveCharacterTextSplitter({ 21 | chunkSize: 10000, 22 | chunkOverlap: 20, 23 | }); 24 | const docs = await splitter.splitDocuments(parentDocuments); 25 | 26 | const prompt = PromptTemplate.fromTemplate( 27 | `Summarize the following document:\n\n{doc}` 28 | ); 29 | 30 | const llm = new ChatOpenAI({ modelName: 'gpt-3.5-turbo' }); 31 | 32 | const chain = RunnableSequence.from([ 33 | { doc: (doc) => doc.pageContent }, 34 | prompt, 35 | llm, 36 | new StringOutputParser(), 37 | ]); 38 | 39 | // batch summarization chain across the chunks 40 | const summaries = await chain.batch(docs, { 41 | maxConcurrency: 5, 42 | }); 43 | 44 | const idKey = 'doc_id'; 45 | const docIds = docs.map((_) => uuid.v4()); 46 | // create summary docs with metadata linking to the original docs 47 | const summaryDocs = summaries.map((summary, i) => { 48 | const summaryDoc = new Document({ 49 | pageContent: summary, 50 | metadata: { 51 | [idKey]: docIds[i], 52 | }, 53 | }); 54 | return summaryDoc; 55 | }); 56 | 57 | // The byteStore to use to store the original chunks 58 | const byteStore = new InMemoryStore(); 59 | 60 | // vector store for the summaries 61 | const vectorStore = await PGVectorStore.fromDocuments( 62 | docs, 63 | new OpenAIEmbeddings(), 64 | { 65 | postgresConnectionOptions: { 66 | connectionString, 67 | }, 68 | } 69 | ); 70 | 71 | const retriever = new MultiVectorRetriever({ 72 | vectorstore: vectorStore, 73 | byteStore, 74 | idKey, 75 | }); 76 | 77 | const keyValuePairs = docs.map((originalDoc, i) => [docIds[i], originalDoc]); 78 | 79 | // Use the retriever to add the original chunks to the document store 80 | await retriever.docstore.mset(keyValuePairs); 81 | 82 | // Vectorstore alone retrieves the small chunks 83 | const vectorstoreResult = await retriever.vectorstore.similaritySearch( 84 | 'chapter on philosophy', 85 | 2 86 | ); 87 | console.log(`summary: ${vectorstoreResult[0].pageContent}`); 88 | console.log( 89 | `summary retrieved length: ${vectorstoreResult[0].pageContent.length}` 90 | ); 91 | 92 | // Retriever returns larger chunk result 93 | const retrieverResult = await retriever.invoke('chapter on philosophy'); 94 | console.log( 95 | `multi-vector retrieved chunk length: ${retrieverResult[0].pageContent.length}` 96 | ); 97 | -------------------------------------------------------------------------------- /ch2/py/a-text-loader.py: -------------------------------------------------------------------------------- 1 | from langchain_community.document_loaders import TextLoader 2 | 3 | loader = TextLoader('./test.txt', encoding="utf-8") 4 | docs = loader.load() 5 | 6 | print(docs) 7 | -------------------------------------------------------------------------------- /ch2/py/b-web-loader.py: -------------------------------------------------------------------------------- 1 | """ 2 | Install the beautifulsoup4 package: 3 | 4 | ```bash 5 | pip install beautifulsoup4 6 | ``` 7 | """ 8 | 9 | from langchain_community.document_loaders import WebBaseLoader 10 | 11 | loader = WebBaseLoader('https://www.langchain.com/') 12 | docs = loader.load() 13 | 14 | print(docs) 15 | -------------------------------------------------------------------------------- /ch2/py/c-pdf-loader.py: -------------------------------------------------------------------------------- 1 | # install the pdf parsing library !pip install pypdf 2 | 3 | from langchain_community.document_loaders import PyPDFLoader 4 | 5 | loader = PyPDFLoader('./test.pdf') 6 | pages = loader.load() 7 | 8 | print(pages) 9 | -------------------------------------------------------------------------------- /ch2/py/d-rec-text-splitter.py: -------------------------------------------------------------------------------- 1 | from langchain_text_splitters import RecursiveCharacterTextSplitter 2 | 3 | from langchain_community.document_loaders import TextLoader 4 | 5 | loader = TextLoader('./test.txt', encoding="utf-8") 6 | docs = loader.load() 7 | 8 | splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) 9 | splitted_docs = splitter.split_documents(docs) 10 | 11 | print(splitted_docs) 12 | -------------------------------------------------------------------------------- /ch2/py/e-rec-text-splitter-code.py: -------------------------------------------------------------------------------- 1 | from langchain_text_splitters import ( 2 | Language, 3 | RecursiveCharacterTextSplitter, 4 | ) 5 | 6 | PYTHON_CODE = """ def hello_world(): print("Hello, World!") # Call the function hello_world() """ 7 | 8 | python_splitter = RecursiveCharacterTextSplitter.from_language( 9 | language=Language.PYTHON, chunk_size=50, chunk_overlap=0 10 | ) 11 | 12 | python_docs = python_splitter.create_documents([PYTHON_CODE]) 13 | 14 | print(python_docs) 15 | -------------------------------------------------------------------------------- /ch2/py/f-markdown-splitter.py: -------------------------------------------------------------------------------- 1 | from langchain_text_splitters import ( 2 | Language, 3 | RecursiveCharacterTextSplitter, 4 | ) 5 | markdown_text = """ # 🦜🔗 LangChain ⚡ Building applications with LLMs through composability ⚡ ## Quick Install ```bash pip install langchain ``` As an open source project in a rapidly developing field, we are extremely open to contributions. """ 6 | 7 | md_splitter = RecursiveCharacterTextSplitter.from_language( 8 | language=Language.MARKDOWN, chunk_size=60, chunk_overlap=0 9 | ) 10 | 11 | md_docs = md_splitter.create_documents( 12 | [markdown_text], [{"source": "https://www.langchain.com"}]) 13 | 14 | print(md_docs) 15 | -------------------------------------------------------------------------------- /ch2/py/g-embeddings.py: -------------------------------------------------------------------------------- 1 | from langchain_openai import OpenAIEmbeddings 2 | 3 | model = OpenAIEmbeddings(model="text-embedding-3-small") 4 | embeddings = model.embed_documents([ 5 | "Hi there!", 6 | "Oh, hello!", 7 | "What's your name?", 8 | "My friends call me World", 9 | "Hello World!" 10 | ]) 11 | 12 | print(embeddings) 13 | -------------------------------------------------------------------------------- /ch2/py/h-load-split-embed.py: -------------------------------------------------------------------------------- 1 | from langchain_community.document_loaders import TextLoader 2 | from langchain_text_splitters import RecursiveCharacterTextSplitter 3 | from langchain_openai import OpenAIEmbeddings 4 | 5 | # Load the document 6 | loader = TextLoader("./test.txt", encoding="utf-8") 7 | doc = loader.load() 8 | 9 | # Split the document 10 | splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) 11 | chunks = splitter.split_documents(doc) 12 | 13 | # Generate embeddings 14 | embeddings_model = OpenAIEmbeddings(model="text-embedding-3-small") 15 | embeddings = embeddings_model.embed_documents( 16 | [chunk.page_content for chunk in chunks] 17 | ) 18 | 19 | print(embeddings) 20 | -------------------------------------------------------------------------------- /ch2/py/i-pg-vector.py: -------------------------------------------------------------------------------- 1 | """ 2 | 1. Ensure docker is installed and running (https://docs.docker.com/get-docker/) 3 | 2. pip install -qU langchain_postgres 4 | 3. Run the following command to start the postgres container: 5 | 6 | docker run \ 7 | --name pgvector-container \ 8 | -e POSTGRES_USER=langchain \ 9 | -e POSTGRES_PASSWORD=langchain \ 10 | -e POSTGRES_DB=langchain \ 11 | -p 6024:5432 \ 12 | -d pgvector/pgvector:pg16 13 | 4. Use the connection string below for the postgres container 14 | 15 | """ 16 | 17 | from langchain_community.document_loaders import TextLoader 18 | from langchain_openai import OpenAIEmbeddings 19 | from langchain_text_splitters import RecursiveCharacterTextSplitter 20 | from langchain_postgres.vectorstores import PGVector 21 | from langchain_core.documents import Document 22 | import uuid 23 | 24 | 25 | # See docker command above to launch a postgres instance with pgvector enabled. 26 | connection = "postgresql+psycopg://langchain:langchain@localhost:6024/langchain" 27 | 28 | # Load the document, split it into chunks 29 | raw_documents = TextLoader('./test.txt', encoding="utf-8").load() 30 | text_splitter = RecursiveCharacterTextSplitter( 31 | chunk_size=1000, chunk_overlap=200) 32 | documents = text_splitter.split_documents(raw_documents) 33 | 34 | # Create embeddings for the documents 35 | embeddings_model = OpenAIEmbeddings() 36 | 37 | db = PGVector.from_documents( 38 | documents, embeddings_model, connection=connection) 39 | 40 | results = db.similarity_search("query", k=4) 41 | 42 | print(results) 43 | 44 | print("Adding documents to the vector store") 45 | ids = [str(uuid.uuid4()), str(uuid.uuid4())] 46 | db.add_documents( 47 | [ 48 | Document( 49 | page_content="there are cats in the pond", 50 | metadata={"location": "pond", "topic": "animals"}, 51 | ), 52 | Document( 53 | page_content="ducks are also found in the pond", 54 | metadata={"location": "pond", "topic": "animals"}, 55 | ), 56 | ], 57 | ids=ids, 58 | ) 59 | 60 | print("Documents added successfully.\n Fetched documents count:", 61 | len(db.get_by_ids(ids))) 62 | 63 | print("Deleting document with id", ids[1]) 64 | db.delete({"ids": ids}) 65 | 66 | print("Document deleted successfully.\n Fetched documents count:", 67 | len(db.get_by_ids(ids))) 68 | -------------------------------------------------------------------------------- /ch2/py/j-record-manager.py: -------------------------------------------------------------------------------- 1 | from langchain.indexes import SQLRecordManager, index 2 | from langchain_postgres.vectorstores import PGVector 3 | from langchain_openai import OpenAIEmbeddings 4 | from langchain.docstore.document import Document 5 | 6 | connection = "postgresql+psycopg://langchain:langchain@localhost:6024/langchain" 7 | collection_name = "my_docs" 8 | embeddings_model = OpenAIEmbeddings(model="text-embedding-3-small") 9 | namespace = "my_docs_namespace" 10 | 11 | vectorstore = PGVector( 12 | embeddings=embeddings_model, 13 | collection_name=collection_name, 14 | connection=connection, 15 | use_jsonb=True, 16 | ) 17 | 18 | record_manager = SQLRecordManager( 19 | namespace, 20 | db_url="postgresql+psycopg://langchain:langchain@localhost:6024/langchain", 21 | ) 22 | 23 | # Create the schema if it doesn't exist 24 | record_manager.create_schema() 25 | 26 | # Create documents 27 | docs = [ 28 | Document(page_content='there are cats in the pond', metadata={ 29 | "id": 1, "source": "cats.txt"}), 30 | Document(page_content='ducks are also found in the pond', metadata={ 31 | "id": 2, "source": "ducks.txt"}), 32 | ] 33 | 34 | # Index the documents 35 | index_1 = index( 36 | docs, 37 | record_manager, 38 | vectorstore, 39 | cleanup="incremental", # prevent duplicate documents 40 | source_id_key="source", # use the source field as the source_id 41 | ) 42 | 43 | print("Index attempt 1:", index_1) 44 | 45 | # second time you attempt to index, it will not add the documents again 46 | index_2 = index( 47 | docs, 48 | record_manager, 49 | vectorstore, 50 | cleanup="incremental", 51 | source_id_key="source", 52 | ) 53 | 54 | print("Index attempt 2:", index_2) 55 | 56 | # If we mutate a document, the new version will be written and all old versions sharing the same source will be deleted. 57 | 58 | docs[0].page_content = "I just modified this document!" 59 | 60 | index_3 = index( 61 | docs, 62 | record_manager, 63 | vectorstore, 64 | cleanup="incremental", 65 | source_id_key="source", 66 | ) 67 | 68 | print("Index attempt 3:", index_3) 69 | -------------------------------------------------------------------------------- /ch2/py/k-multi-vector-retriever.py: -------------------------------------------------------------------------------- 1 | from langchain_community.document_loaders import TextLoader 2 | from langchain_text_splitters import RecursiveCharacterTextSplitter 3 | from langchain_openai import OpenAIEmbeddings 4 | from langchain_postgres.vectorstores import PGVector 5 | from langchain_core.output_parsers import StrOutputParser 6 | from langchain_core.prompts import ChatPromptTemplate 7 | from pydantic import BaseModel 8 | from langchain_core.runnables import RunnablePassthrough 9 | from langchain_openai import ChatOpenAI 10 | from langchain_core.documents import Document 11 | from langchain.retrievers.multi_vector import MultiVectorRetriever 12 | from langchain.storage import InMemoryStore 13 | import uuid 14 | 15 | connection = "postgresql+psycopg://langchain:langchain@localhost:6024/langchain" 16 | collection_name = "summaries" 17 | embeddings_model = OpenAIEmbeddings() 18 | # Load the document 19 | loader = TextLoader("./test.txt", encoding="utf-8") 20 | docs = loader.load() 21 | 22 | print("length of loaded docs: ", len(docs[0].page_content)) 23 | # Split the document 24 | splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) 25 | chunks = splitter.split_documents(docs) 26 | 27 | # The rest of your code remains the same, starting from: 28 | prompt_text = "Summarize the following document:\n\n{doc}" 29 | 30 | prompt = ChatPromptTemplate.from_template(prompt_text) 31 | llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo") 32 | summarize_chain = { 33 | "doc": lambda x: x.page_content} | prompt | llm | StrOutputParser() 34 | 35 | # batch the chain across the chunks 36 | summaries = summarize_chain.batch(chunks, {"max_concurrency": 5}) 37 | 38 | # The vectorstore to use to index the child chunks 39 | vectorstore = PGVector( 40 | embeddings=embeddings_model, 41 | collection_name=collection_name, 42 | connection=connection, 43 | use_jsonb=True, 44 | ) 45 | # The storage layer for the parent documents 46 | store = InMemoryStore() 47 | id_key = "doc_id" 48 | 49 | # indexing the summaries in our vector store, whilst retaining the original documents in our document store: 50 | retriever = MultiVectorRetriever( 51 | vectorstore=vectorstore, 52 | docstore=store, 53 | id_key=id_key, 54 | ) 55 | 56 | # Changed from summaries to chunks since we need same length as docs 57 | doc_ids = [str(uuid.uuid4()) for _ in chunks] 58 | 59 | # Each summary is linked to the original document by the doc_id 60 | summary_docs = [ 61 | Document(page_content=s, metadata={id_key: doc_ids[i]}) 62 | for i, s in enumerate(summaries) 63 | ] 64 | 65 | # Add the document summaries to the vector store for similarity search 66 | retriever.vectorstore.add_documents(summary_docs) 67 | 68 | # Store the original documents in the document store, linked to their summaries via doc_ids 69 | # This allows us to first search summaries efficiently, then fetch the full docs when needed 70 | retriever.docstore.mset(list(zip(doc_ids, chunks))) 71 | 72 | # vector store retrieves the summaries 73 | sub_docs = retriever.vectorstore.similarity_search( 74 | "chapter on philosophy", k=2) 75 | 76 | print("sub docs: ", sub_docs[0].page_content) 77 | 78 | print("length of sub docs:\n", len(sub_docs[0].page_content)) 79 | 80 | # Whereas the retriever will return the larger source document chunks: 81 | retrieved_docs = retriever.invoke("chapter on philosophy") 82 | 83 | print("length of retrieved docs: ", len(retrieved_docs[0].page_content)) 84 | -------------------------------------------------------------------------------- /ch2/py/l-rag-colbert.py: -------------------------------------------------------------------------------- 1 | """ 2 | - Windows is not supported. RAGatouille doesn't appear to work outside WSL and has issues with WSL1. Some users have had success running RAGatouille in WSL2. 3 | - Only on python. 4 | - Read full docs here: https://github.com/AnswerDotAI/RAGatouille/blob/8183aad64a9a6ba805d4066dcab489d97615d316/README.md 5 | 6 | - To install run: 7 | 8 | ```bash 9 | pip install -U ragatouille transformers 10 | ``` 11 | """ 12 | from ragatouille import RAGPretrainedModel 13 | import requests 14 | 15 | RAG = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0") 16 | 17 | 18 | def get_wikipedia_page(title: str): 19 | """ 20 | Retrieve the full text content of a Wikipedia page. 21 | :param title: str - Title of the Wikipedia page. 22 | :return: str - Full text content of the page as raw string. 23 | """ 24 | # Wikipedia API endpoint 25 | URL = "https://en.wikipedia.org/w/api.php" 26 | # Parameters for the API request 27 | params = { 28 | "action": "query", 29 | "format": "json", 30 | "titles": title, 31 | "prop": "extracts", 32 | "explaintext": True, 33 | } 34 | # Custom User-Agent header to comply with Wikipedia's best practices 35 | headers = {"User-Agent": "RAGatouille_tutorial/0.0.1"} 36 | response = requests.get(URL, params=params, headers=headers) 37 | data = response.json() 38 | # Extracting page content 39 | page = next(iter(data["query"]["pages"].values())) 40 | return page["extract"] if "extract" in page else None 41 | 42 | 43 | full_document = get_wikipedia_page("Hayao_Miyazaki") 44 | # Create an index 45 | RAG.index( 46 | collection=[full_document], 47 | index_name="Miyazaki-123", 48 | max_document_length=180, 49 | split_documents=True, 50 | ) 51 | # query 52 | results = RAG.search(query="What animation studio did Miyazaki found?", k=3) 53 | 54 | print(results) 55 | 56 | # Alternative: Utilize langchain retriever 57 | retriever = RAG.as_langchain_retriever(k=3) 58 | retriever.invoke("What animation studio did Miyazaki found?") 59 | -------------------------------------------------------------------------------- /ch3/js/a-basic-rag.js: -------------------------------------------------------------------------------- 1 | /** 2 | 1. Ensure docker is installed and running (https://docs.docker.com/get-docker/) 3 | 2. Run the following command to start the postgres container: 4 | 5 | docker run \ 6 | --name pgvector-container \ 7 | -e POSTGRES_USER=langchain \ 8 | -e POSTGRES_PASSWORD=langchain \ 9 | -e POSTGRES_DB=langchain \ 10 | -p 6024:5432 \ 11 | -d pgvector/pgvector:pg16 12 | 3. Use the connection string below for the postgres container 13 | */ 14 | 15 | import { TextLoader } from 'langchain/document_loaders/fs/text'; 16 | import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters'; 17 | import { OpenAIEmbeddings } from '@langchain/openai'; 18 | import { PGVectorStore } from '@langchain/community/vectorstores/pgvector'; 19 | import { ChatPromptTemplate } from '@langchain/core/prompts'; 20 | import { ChatOpenAI } from '@langchain/openai'; 21 | import { RunnableLambda } from '@langchain/core/runnables'; 22 | const connectionString = 23 | 'postgresql://langchain:langchain@localhost:6024/langchain'; 24 | // Load the document, split it into chunks 25 | const loader = new TextLoader('./test.txt'); 26 | const raw_docs = await loader.load(); 27 | const splitter = new RecursiveCharacterTextSplitter({ 28 | chunkSize: 1000, 29 | chunkOverlap: 200, 30 | }); 31 | const splitDocs = await splitter.splitDocuments(raw_docs); 32 | 33 | // embed each chunk and insert it into the vector store 34 | const model = new OpenAIEmbeddings(); 35 | 36 | const db = await PGVectorStore.fromDocuments(splitDocs, model, { 37 | postgresConnectionOptions: { 38 | connectionString, 39 | }, 40 | }); 41 | 42 | // retrieve 2 relevant documents from the vector store 43 | const retriever = db.asRetriever({ k: 2 }); 44 | 45 | const query = 46 | 'Who are the key figures in the ancient greek history of philosophy?'; 47 | 48 | // fetch relevant documents 49 | const docs = await retriever.invoke(query); 50 | 51 | console.log( 52 | `fetched document based on similarity search query:\n ${docs[0].pageContent}\n\n` 53 | ); 54 | 55 | /** 56 | * Provide retrieved docs as context to the LLM to answer a user's question 57 | */ 58 | const prompt = ChatPromptTemplate.fromTemplate( 59 | 'Answer the question based only on the following context:\n {context}\n\nQuestion: {question}' 60 | ); 61 | 62 | const llm = new ChatOpenAI({ temperature: 0, modelName: 'gpt-3.5-turbo' }); 63 | const chain = prompt.pipe(llm); 64 | 65 | const result = await chain.invoke({ 66 | context: docs, 67 | question: query, 68 | }); 69 | 70 | console.log(result); 71 | console.log('\n\n'); 72 | 73 | // run again but this time encapsulate the logic for efficiency 74 | 75 | console.log( 76 | 'Running again but this time encapsulate the logic for efficiency\n' 77 | ); 78 | const qa = RunnableLambda.from(async (input) => { 79 | // fetch relevant documents 80 | const docs = await retriever.invoke(input); 81 | // format prompt 82 | const formatted = await prompt.invoke({ context: docs, question: input }); 83 | // generate answer 84 | const answer = await llm.invoke(formatted); 85 | return answer; 86 | }); 87 | 88 | const finalResult = await qa.invoke(query); 89 | console.log(finalResult); 90 | -------------------------------------------------------------------------------- /ch3/js/b-rewrite.js: -------------------------------------------------------------------------------- 1 | /** 2 | 1. Ensure docker is installed and running (https://docs.docker.com/get-docker/) 3 | 2. Run the following command to start the postgres container: 4 | 5 | docker run \ 6 | --name pgvector-container \ 7 | -e POSTGRES_USER=langchain \ 8 | -e POSTGRES_PASSWORD=langchain \ 9 | -e POSTGRES_DB=langchain \ 10 | -p 6024:5432 \ 11 | -d pgvector/pgvector:pg16 12 | 3. Use the connection string below for the postgres container 13 | */ 14 | 15 | import { TextLoader } from 'langchain/document_loaders/fs/text'; 16 | import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters'; 17 | import { OpenAIEmbeddings } from '@langchain/openai'; 18 | import { PGVectorStore } from '@langchain/community/vectorstores/pgvector'; 19 | import { ChatPromptTemplate } from '@langchain/core/prompts'; 20 | import { ChatOpenAI } from '@langchain/openai'; 21 | import { RunnableLambda } from '@langchain/core/runnables'; 22 | const connectionString = 23 | 'postgresql://langchain:langchain@localhost:6024/langchain'; 24 | // Load the document, split it into chunks 25 | const loader = new TextLoader('./test.txt'); 26 | const raw_docs = await loader.load(); 27 | const splitter = new RecursiveCharacterTextSplitter({ 28 | chunkSize: 1000, 29 | chunkOverlap: 200, 30 | }); 31 | const splitDocs = await splitter.splitDocuments(raw_docs); 32 | 33 | // embed each chunk and insert it into the vector store 34 | const model = new OpenAIEmbeddings(); 35 | 36 | const db = await PGVectorStore.fromDocuments(splitDocs, model, { 37 | postgresConnectionOptions: { 38 | connectionString, 39 | }, 40 | }); 41 | 42 | // retrieve 2 relevant documents from the vector store 43 | const retriever = db.asRetriever({ k: 2 }); 44 | 45 | /** 46 | * Query starts with irrelevant information before asking the relevant question 47 | */ 48 | const query = 49 | 'Today I woke up and brushed my teeth, then I sat down to read the news. But then I forgot the food on the cooker. Who are some key figures in the ancient greek history of philosophy?'; 50 | /** 51 | * Provide retrieved docs as context to the LLM to answer a user's question 52 | */ 53 | const prompt = ChatPromptTemplate.fromTemplate( 54 | 'Answer the question based only on the following context:\n {context}\n\nQuestion: {question}' 55 | ); 56 | 57 | const llm = new ChatOpenAI({ temperature: 0, modelName: 'gpt-3.5-turbo' }); 58 | 59 | const qa = RunnableLambda.from(async (input) => { 60 | // fetch relevant documents 61 | const docs = await retriever.invoke(input); 62 | // format prompt 63 | const formatted = await prompt.invoke({ context: docs, question: input }); 64 | // generate answer 65 | const answer = await llm.invoke(formatted); 66 | return { answer, docs }; 67 | }); 68 | 69 | const result = await qa.invoke(query); 70 | console.log(result); 71 | console.log('\n\nCall model again with rewritten query\n\n'); 72 | 73 | const rewritePrompt = ChatPromptTemplate.fromTemplate( 74 | `Provide a better search query for web search engine to answer the given question, end the queries with '**'. Question: {question} Answer:` 75 | ); 76 | const rewriter = rewritePrompt.pipe(llm).pipe((message) => { 77 | return message.content.replaceAll('"', '').replaceAll('**'); 78 | }); 79 | const rewriterQA = RunnableLambda.from(async (input) => { 80 | const newQuery = await rewriter.invoke({ question: input }); // fetch relevant documents console.log('New query: ', newQuery); 81 | const docs = await retriever.invoke(newQuery); // format prompt 82 | const formatted = await prompt.invoke({ context: docs, question: input }); // generate answer 83 | const answer = await llm.invoke(formatted); 84 | return answer; 85 | }); 86 | 87 | const finalResult = await rewriterQA.invoke(query); 88 | console.log(finalResult); 89 | -------------------------------------------------------------------------------- /ch3/js/c-multi-query.js: -------------------------------------------------------------------------------- 1 | /** 2 | 1. Ensure docker is installed and running (https://docs.docker.com/get-docker/) 3 | 2. Run the following command to start the postgres container: 4 | 5 | docker run \ 6 | --name pgvector-container \ 7 | -e POSTGRES_USER=langchain \ 8 | -e POSTGRES_PASSWORD=langchain \ 9 | -e POSTGRES_DB=langchain \ 10 | -p 6024:5432 \ 11 | -d pgvector/pgvector:pg16 12 | 3. Use the connection string below for the postgres container 13 | */ 14 | 15 | import { TextLoader } from 'langchain/document_loaders/fs/text'; 16 | import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters'; 17 | import { OpenAIEmbeddings } from '@langchain/openai'; 18 | import { PGVectorStore } from '@langchain/community/vectorstores/pgvector'; 19 | import { ChatPromptTemplate } from '@langchain/core/prompts'; 20 | import { ChatOpenAI } from '@langchain/openai'; 21 | import { RunnableLambda } from '@langchain/core/runnables'; 22 | 23 | const connectionString = 24 | 'postgresql://langchain:langchain@localhost:6024/langchain'; 25 | // Load the document, split it into chunks 26 | const loader = new TextLoader('./test.txt'); 27 | const raw_docs = await loader.load(); 28 | const splitter = new RecursiveCharacterTextSplitter({ 29 | chunkSize: 1000, 30 | chunkOverlap: 200, 31 | }); 32 | const splitDocs = await splitter.splitDocuments(raw_docs); 33 | 34 | // embed each chunk and insert it into the vector store 35 | const model = new OpenAIEmbeddings(); 36 | 37 | const db = await PGVectorStore.fromDocuments(splitDocs, model, { 38 | postgresConnectionOptions: { 39 | connectionString, 40 | }, 41 | }); 42 | 43 | // retrieve 2 relevant documents from the vector store 44 | const retriever = db.asRetriever({ k: 2 }); 45 | /** 46 | * Provide retrieved docs as context to the LLM to answer a user's question 47 | */ 48 | const llm = new ChatOpenAI({ temperature: 0, modelName: 'gpt-3.5-turbo' }); 49 | 50 | const perspectivesPrompt = ChatPromptTemplate.fromTemplate( 51 | `You are an AI language model assistant. Your task is to generate five different versions of the given user question to retrieve relevant documents from a vector database. By generating multiple perspectives on the user question, your goal is to help the user overcome some of the limitations of the distance-based similarity search. Provide these alternative questions separated by newlines. Original question: {question}` 52 | ); 53 | 54 | const queryGen = perspectivesPrompt.pipe(llm).pipe((message) => { 55 | return message.content.split('\n'); 56 | }); 57 | 58 | /** 59 | * This chain retrieves and combines the documents from the vector store for each query 60 | */ 61 | const retrievalChain = queryGen 62 | .pipe(retriever.batch.bind(retriever)) 63 | .pipe((documentLists) => { 64 | const dedupedDocs = {}; 65 | documentLists.flat().forEach((doc) => { 66 | dedupedDocs[doc.pageContent] = doc; 67 | }); 68 | return Object.values(dedupedDocs); 69 | }); 70 | 71 | const prompt = ChatPromptTemplate.fromTemplate( 72 | 'Answer the question based only on the following context:\n {context}\n\nQuestion: {question}' 73 | ); 74 | 75 | console.log('Running multi query qa\n'); 76 | const multiQueryQa = RunnableLambda.from(async (input) => { 77 | // fetch relevant documents 78 | const docs = await retrievalChain.invoke({ question: input }); 79 | // format prompt 80 | const formatted = await prompt.invoke({ context: docs, question: input }); 81 | // generate answer 82 | const answer = await llm.invoke(formatted); 83 | return answer; 84 | }); 85 | 86 | const result = await multiQueryQa.invoke( 87 | 'Who are the key figures in the ancient greek history of philosophy?' 88 | ); 89 | 90 | console.log(result); 91 | -------------------------------------------------------------------------------- /ch3/js/e-hyde.js: -------------------------------------------------------------------------------- 1 | /** 2 | 1. Ensure docker is installed and running (https://docs.docker.com/get-docker/) 3 | 2. Run the following command to start the postgres container: 4 | 5 | docker run \ 6 | --name pgvector-container \ 7 | -e POSTGRES_USER=langchain \ 8 | -e POSTGRES_PASSWORD=langchain \ 9 | -e POSTGRES_DB=langchain \ 10 | -p 6024:5432 \ 11 | -d pgvector/pgvector:pg16 12 | 3. Use the connection string below for the postgres container 13 | */ 14 | 15 | import { TextLoader } from 'langchain/document_loaders/fs/text'; 16 | import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters'; 17 | import { OpenAIEmbeddings } from '@langchain/openai'; 18 | import { PGVectorStore } from '@langchain/community/vectorstores/pgvector'; 19 | import { ChatPromptTemplate } from '@langchain/core/prompts'; 20 | import { ChatOpenAI } from '@langchain/openai'; 21 | import { RunnableLambda } from '@langchain/core/runnables'; 22 | 23 | const connectionString = 24 | 'postgresql://langchain:langchain@localhost:6024/langchain'; 25 | // Load the document, split it into chunks 26 | const loader = new TextLoader('./test.txt'); 27 | const raw_docs = await loader.load(); 28 | const splitter = new RecursiveCharacterTextSplitter({ 29 | chunkSize: 1000, 30 | chunkOverlap: 200, 31 | }); 32 | const splitDocs = await splitter.splitDocuments(raw_docs); 33 | 34 | // embed each chunk and insert it into the vector store 35 | const model = new OpenAIEmbeddings(); 36 | 37 | const db = await PGVectorStore.fromDocuments(splitDocs, model, { 38 | postgresConnectionOptions: { 39 | connectionString, 40 | }, 41 | }); 42 | 43 | // retrieve 2 relevant documents from the vector store 44 | const retriever = db.asRetriever({ k: 2 }); 45 | /** 46 | * Provide retrieved docs as context to the LLM to answer a user's question 47 | */ 48 | const llm = new ChatOpenAI({ temperature: 0, modelName: 'gpt-3.5-turbo' }); 49 | 50 | const hydePrompt = ChatPromptTemplate.fromTemplate( 51 | `Please write a passage to answer the question.\n Question: {question} \n Passage:` 52 | ); 53 | 54 | const generatedDoc = hydePrompt.pipe(llm).pipe((msg) => msg.content); 55 | 56 | /** 57 | * This chain retrieves and combines the documents from the vector store for each query 58 | */ 59 | const retrievalChain = generatedDoc.pipe(retriever); 60 | 61 | const prompt = ChatPromptTemplate.fromTemplate( 62 | 'Answer the question based only on the following context:\n {context}\n\nQuestion: {question}' 63 | ); 64 | 65 | console.log('Running hyde\n'); 66 | const hydeQa = RunnableLambda.from(async (input) => { 67 | // fetch relevant documents 68 | const docs = await retrievalChain.invoke(input); 69 | // format prompt 70 | const formatted = await prompt.invoke({ context: docs, question: input }); 71 | // generate answer 72 | const answer = await llm.invoke(formatted); 73 | return answer; 74 | }); 75 | 76 | const result = await hydeQa.invoke( 77 | 'Who are some lesser known philosophers in the ancient greek history of philosophy?' 78 | ); 79 | 80 | console.log(result); 81 | -------------------------------------------------------------------------------- /ch3/js/f-router.js: -------------------------------------------------------------------------------- 1 | import { ChatOpenAI } from '@langchain/openai'; 2 | import { z } from 'zod'; 3 | import { ChatPromptTemplate } from '@langchain/core/prompts'; 4 | 5 | const routeQuery = z 6 | .object({ 7 | datasource: z 8 | .enum(['python_docs', 'js_docs']) 9 | .describe( 10 | 'Given a user question, choose which datasource would be most relevant for answering their question' 11 | ), 12 | }) 13 | .describe('Route a user query to the most relevant datasource.'); 14 | 15 | const llm = new ChatOpenAI({ model: 'gpt-3.5-turbo', temperature: 0 }); 16 | // withStructuredOutput is a method that allows us to use the structured output of the model 17 | const structuredLlm = llm.withStructuredOutput(routeQuery, { 18 | name: 'RouteQuery', 19 | }); 20 | 21 | const prompt = ChatPromptTemplate.fromMessages([ 22 | [ 23 | 'system', 24 | `You are an expert at routing a user question to the appropriate data source. Based on the programming language the question is referring to, route it to the relevant data source.`, 25 | ], 26 | ['human', '{question}'], 27 | ]); 28 | 29 | const router = prompt.pipe(structuredLlm); 30 | 31 | const question = `Why doesn't the following code work: 32 | from langchain_core.prompts 33 | import ChatPromptTemplate 34 | prompt = ChatPromptTemplate.from_messages(["human", "speak in {language}"]) 35 | prompt.invoke("french") `; 36 | 37 | const result = await router.invoke({ question }); 38 | 39 | console.log('Routing to: ', result); 40 | 41 | /** Once we’ve extracted the relevant data source, we can pass the value into another function to execute additional logic as required: */ 42 | 43 | const chooseRoute = (result) => { 44 | if (result.datasource.toLowerCase().includes('python_docs')) { 45 | return 'chain for python_docs'; 46 | } else { 47 | return 'chain for js_docs'; 48 | } 49 | }; 50 | 51 | const fullChain = router.pipe(chooseRoute); 52 | 53 | const finalResult = await fullChain.invoke({ question }); 54 | 55 | console.log('Choose route: ', finalResult); 56 | -------------------------------------------------------------------------------- /ch3/js/g-semantic-router.js: -------------------------------------------------------------------------------- 1 | import { cosineSimilarity } from '@langchain/core/utils/math'; 2 | import { ChatOpenAI, OpenAIEmbeddings } from '@langchain/openai'; 3 | import { PromptTemplate } from '@langchain/core/prompts'; 4 | import { RunnableLambda } from '@langchain/core/runnables'; 5 | 6 | const physicsTemplate = `You are a very smart physics professor. You are great at answering questions about physics in a concise and easy-to-understand manner. When you don't know the answer to a question, you admit that you don't know. Here is a question: {query}`; 7 | 8 | const mathTemplate = `You are a very good mathematician. You are great at answering math questions. You are so good because you are able to break down hard problems into their component parts, answer the component parts, and then put them together to answer the broader question. Here is a question: {query}`; 9 | 10 | const embeddings = new OpenAIEmbeddings(); 11 | 12 | const promptTemplates = [physicsTemplate, mathTemplate]; 13 | 14 | const promptEmbeddings = await embeddings.embedDocuments(promptTemplates); 15 | 16 | const promptRouter = RunnableLambda.from(async (query) => { 17 | // Embed question 18 | const queryEmbedding = await embeddings.embedQuery(query); 19 | // Compute similarity 20 | const similarities = cosineSimilarity([queryEmbedding], promptEmbeddings)[0]; 21 | // Pick the prompt most similar to the input question 22 | const mostSimilar = 23 | similarities[0] > similarities[1] ? promptTemplates[0] : promptTemplates[1]; 24 | console.log( 25 | `Using ${mostSimilar === promptTemplates[0] ? 'PHYSICS' : 'MATH'}` 26 | ); 27 | return PromptTemplate.fromTemplate(mostSimilar).invoke({ query }); 28 | }); 29 | 30 | const semanticRouter = promptRouter.pipe( 31 | new ChatOpenAI({ modelName: 'gpt-3.5-turbo', temperature: 0 }) 32 | ); 33 | 34 | const result = await semanticRouter.invoke('What is a black hole'); 35 | console.log('\nSemantic router result: ', result); 36 | -------------------------------------------------------------------------------- /ch3/js/i-sql-example.js: -------------------------------------------------------------------------------- 1 | /* 2 | The below example will use a SQLite connection with the Chinook database, which is a sample database that represents a digital media store. Follow these installation steps to create Chinook.db in the same directory as this notebook. You can also download and build the database via the command line: 3 | 4 | ```bash 5 | curl -s https://raw.githubusercontent.com/lerocha/chinook-database/master/ChinookDatabase/DataSources/Chinook_Sqlite.sql | sqlite3 Chinook.db 6 | 7 | ``` 8 | 9 | Afterwards, place `Chinook.db` in the same directory where this code is running. 10 | 11 | */ 12 | 13 | import { ChatOpenAI } from '@langchain/openai'; 14 | import { createSqlQueryChain } from 'langchain/chains/sql_db'; 15 | import { SqlDatabase } from 'langchain/sql_db'; 16 | import { DataSource } from 'typeorm'; 17 | import { QuerySqlTool } from 'langchain/tools/sql'; 18 | 19 | const datasource = new DataSource({ 20 | type: 'sqlite', 21 | database: 'Chinook.db', //this should be the path to the db 22 | }); 23 | const db = await SqlDatabase.fromDataSourceParams({ 24 | appDataSource: datasource, 25 | }); 26 | //test that the db is working 27 | await db.run('SELECT * FROM Artist LIMIT 10;'); 28 | 29 | const llm = new ChatOpenAI({ modelName: 'gpt-4o', temperature: 0 }); 30 | // convert question to sql query 31 | const writeQuery = await createSqlQueryChain({ llm, db, dialect: 'sqlite' }); 32 | // execute query 33 | const executeQuery = new QuerySqlTool(db); 34 | // combined 35 | const chain = writeQuery.pipe(executeQuery); 36 | 37 | const result = await chain.invoke({ 38 | question: 'How many employees are there?', 39 | }); 40 | console.log(result); 41 | -------------------------------------------------------------------------------- /ch3/py/a-basic-rag.py: -------------------------------------------------------------------------------- 1 | """ 2 | 1. Ensure docker is installed and running (https://docs.docker.com/get-docker/) 3 | 2. pip install -qU langchain_postgres 4 | 3. Run the following command to start the postgres container: 5 | 6 | docker run \ 7 | --name pgvector-container \ 8 | -e POSTGRES_USER=langchain \ 9 | -e POSTGRES_PASSWORD=langchain \ 10 | -e POSTGRES_DB=langchain \ 11 | -p 6024:5432 \ 12 | -d pgvector/pgvector:pg16 13 | 4. Use the connection string below for the postgres container 14 | 15 | """ 16 | 17 | from langchain_community.document_loaders import TextLoader 18 | from langchain_openai import OpenAIEmbeddings 19 | from langchain_text_splitters import RecursiveCharacterTextSplitter 20 | from langchain_postgres.vectorstores import PGVector 21 | from langchain_openai import ChatOpenAI 22 | from langchain_core.prompts import ChatPromptTemplate 23 | from langchain_core.runnables import chain 24 | 25 | 26 | # See docker command above to launch a postgres instance with pgvector enabled. 27 | connection = "postgresql+psycopg://langchain:langchain@localhost:6024/langchain" 28 | 29 | # Load the document, split it into chunks 30 | raw_documents = TextLoader('./test.txt', encoding='utf-8').load() 31 | text_splitter = RecursiveCharacterTextSplitter( 32 | chunk_size=1000, chunk_overlap=200) 33 | documents = text_splitter.split_documents(raw_documents) 34 | 35 | # Create embeddings for the documents 36 | embeddings_model = OpenAIEmbeddings() 37 | 38 | db = PGVector.from_documents( 39 | documents, embeddings_model, connection=connection) 40 | 41 | # create retriever to retrieve 2 relevant documents 42 | retriever = db.as_retriever(search_kwargs={"k": 2}) 43 | 44 | query = 'Who are the key figures in the ancient greek history of philosophy?' 45 | 46 | # fetch relevant documents 47 | docs = retriever.invoke(query) 48 | 49 | print(docs[0].page_content) 50 | 51 | prompt = ChatPromptTemplate.from_template( 52 | """Answer the question based only on the following context: {context} Question: {question} """ 53 | ) 54 | llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0) 55 | llm_chain = prompt | llm 56 | 57 | # answer the question based on relevant documents 58 | result = llm_chain.invoke({"context": docs, "question": query}) 59 | 60 | print(result) 61 | print("\n\n") 62 | 63 | # Run again but this time encapsulate the logic for efficiency 64 | 65 | # @chain decorator transforms this function into a LangChain runnable, 66 | # making it compatible with LangChain's chain operations and pipeline 67 | 68 | print("Running again but this time encapsulate the logic for efficiency\n") 69 | 70 | 71 | @chain 72 | def qa(input): 73 | # fetch relevant documents 74 | docs = retriever.invoke(input) 75 | # format prompt 76 | formatted = prompt.invoke({"context": docs, "question": input}) 77 | # generate answer 78 | answer = llm.invoke(formatted) 79 | return answer 80 | 81 | 82 | # run it 83 | result = qa.invoke(query) 84 | print(result.content) 85 | -------------------------------------------------------------------------------- /ch3/py/c-multi-query.py: -------------------------------------------------------------------------------- 1 | from langchain_community.document_loaders import TextLoader 2 | from langchain_openai import OpenAIEmbeddings 3 | from langchain_text_splitters import RecursiveCharacterTextSplitter 4 | from langchain_postgres.vectorstores import PGVector 5 | from langchain_openai import ChatOpenAI 6 | from langchain_core.prompts import ChatPromptTemplate 7 | from langchain_core.runnables import chain 8 | 9 | 10 | # See docker command above to launch a postgres instance with pgvector enabled. 11 | connection = "postgresql+psycopg://langchain:langchain@localhost:6024/langchain" 12 | 13 | # Load the document, split it into chunks 14 | raw_documents = TextLoader('./test.txt', encoding='utf-8').load() 15 | text_splitter = RecursiveCharacterTextSplitter( 16 | chunk_size=1000, chunk_overlap=200) 17 | documents = text_splitter.split_documents(raw_documents) 18 | 19 | # Create embeddings for the documents 20 | embeddings_model = OpenAIEmbeddings() 21 | 22 | db = PGVector.from_documents( 23 | documents, embeddings_model, connection=connection) 24 | 25 | # create retriever to retrieve 2 relevant documents 26 | retriever = db.as_retriever(search_kwargs={"k": 5}) 27 | 28 | # instruction to generate multiple queries 29 | perspectives_prompt = ChatPromptTemplate.from_template( 30 | """You are an AI language model assistant. Your task is to generate five different versions of the given user question to retrieve relevant documents from a vector database. 31 | By generating multiple perspectives on the user question, your goal is to help the user overcome some of the limitations of the distance-based similarity search. 32 | Provide these alternative questions separated by newlines. 33 | Original question: {question}""") 34 | 35 | llm = ChatOpenAI(model="gpt-3.5-turbo") 36 | 37 | 38 | def parse_queries_output(message): 39 | return message.content.split('\n') 40 | 41 | 42 | query_gen = perspectives_prompt | llm | parse_queries_output 43 | 44 | 45 | def get_unique_union(document_lists): 46 | # Flatten list of lists, and dedupe them 47 | deduped_docs = { 48 | doc.page_content: doc for sublist in document_lists for doc in sublist} 49 | # return a flat list of unique docs 50 | return list(deduped_docs.values()) 51 | 52 | 53 | retrieval_chain = query_gen | retriever.batch | get_unique_union 54 | 55 | prompt = ChatPromptTemplate.from_template( 56 | """Answer the question based only on the following context: {context} Question: {question} """ 57 | ) 58 | 59 | query = "Who are the key figures in the ancient greek history of philosophy?" 60 | 61 | 62 | @chain 63 | def multi_query_qa(input): 64 | # fetch relevant documents 65 | docs = retrieval_chain.invoke(input) # format prompt 66 | formatted = prompt.invoke( 67 | {"context": docs, "question": input}) # generate answer 68 | answer = llm.invoke(formatted) 69 | return answer 70 | 71 | 72 | # run 73 | print("Running multi query qa\n") 74 | result = multi_query_qa.invoke(query) 75 | print(result.content) 76 | -------------------------------------------------------------------------------- /ch3/py/e-hyde.py: -------------------------------------------------------------------------------- 1 | from langchain_community.document_loaders import TextLoader 2 | from langchain_openai import OpenAIEmbeddings 3 | from langchain_text_splitters import RecursiveCharacterTextSplitter 4 | from langchain_postgres.vectorstores import PGVector 5 | from langchain_openai import ChatOpenAI 6 | from langchain_core.prompts import ChatPromptTemplate 7 | from langchain_core.runnables import chain 8 | from langchain_core.output_parsers import StrOutputParser 9 | 10 | # See docker command above to launch a postgres instance with pgvector enabled. 11 | connection = "postgresql+psycopg://langchain:langchain@localhost:6024/langchain" 12 | 13 | # Load the document, split it into chunks 14 | raw_documents = TextLoader('./test.txt', encoding='utf-8').load() 15 | text_splitter = RecursiveCharacterTextSplitter( 16 | chunk_size=1000, chunk_overlap=200) 17 | documents = text_splitter.split_documents(raw_documents) 18 | 19 | # Create embeddings for the documents 20 | embeddings_model = OpenAIEmbeddings() 21 | 22 | db = PGVector.from_documents( 23 | documents, embeddings_model, connection=connection) 24 | 25 | # create retriever to retrieve 2 relevant documents 26 | retriever = db.as_retriever(search_kwargs={"k": 5}) 27 | 28 | prompt_hyde = ChatPromptTemplate.from_template( 29 | """Please write a passage to answer the question.\n Question: {question} \n Passage:""") 30 | 31 | generate_doc = (prompt_hyde | ChatOpenAI(temperature=0) | StrOutputParser()) 32 | 33 | """ 34 | Next, we take the hypothetical document generated above and use it as input to the retriever, 35 | which will generate its embedding and search for similar documents in the vector store: 36 | """ 37 | retrieval_chain = generate_doc | retriever 38 | 39 | query = "Who are some lesser known philosophers in the ancient greek history of philosophy?" 40 | 41 | prompt = ChatPromptTemplate.from_template( 42 | """Answer the question based only on the following context: {context} Question: {question} """ 43 | ) 44 | 45 | llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0) 46 | 47 | 48 | @chain 49 | def qa(input): 50 | # fetch relevant documents from the hyde retrieval chain defined earlier 51 | docs = retrieval_chain.invoke(input) 52 | # format prompt 53 | formatted = prompt.invoke({"context": docs, "question": input}) 54 | # generate answer 55 | answer = llm.invoke(formatted) 56 | return answer 57 | 58 | 59 | print("Running hyde\n") 60 | result = qa.invoke(query) 61 | print("\n\n") 62 | print(result.content) 63 | -------------------------------------------------------------------------------- /ch3/py/f-router.py: -------------------------------------------------------------------------------- 1 | 2 | from typing import Literal 3 | from langchain_core.prompts import ChatPromptTemplate 4 | from pydantic import BaseModel, Field 5 | from langchain_openai import ChatOpenAI 6 | from langchain_core.runnables import RunnableLambda 7 | 8 | 9 | # Data model class 10 | class RouteQuery(BaseModel): 11 | """Route a user query to the most relevant datasource.""" 12 | datasource: Literal["python_docs", "js_docs"] = Field( 13 | ..., 14 | description="Given a user question, choose which datasource would be most relevant for answering their question", 15 | ) 16 | 17 | 18 | # Prompt template 19 | # LLM with function call 20 | llm = ChatOpenAI(model="gpt-4o", temperature=0) 21 | 22 | """ 23 | with_structured_output: Model wrapper that returns outputs formatted to match the given schema. 24 | 25 | """ 26 | structured_llm = llm.with_structured_output(RouteQuery) 27 | 28 | # Prompt 29 | system = """You are an expert at routing a user question to the appropriate data source. Based on the programming language the question is referring to, route it to the relevant data source.""" 30 | prompt = ChatPromptTemplate.from_messages( 31 | [("system", system), ("human", "{question}")] 32 | ) 33 | 34 | # Define router 35 | router = prompt | structured_llm 36 | 37 | # Run 38 | question = """Why doesn't the following code work: 39 | from langchain_core.prompts 40 | import ChatPromptTemplate 41 | prompt = ChatPromptTemplate.from_messages(["human", "speak in {language}"]) 42 | prompt.invoke("french") """ 43 | 44 | result = router.invoke({"question": question}) 45 | print("\nRouting to: ", result) 46 | 47 | """ 48 | Once we extracted the relevant data source, we can pass the value into another function to execute additional logic as required: 49 | """ 50 | 51 | 52 | def choose_route(result): 53 | if "python_docs" in result.datasource.lower(): 54 | return "chain for python_docs" 55 | else: 56 | return "chain for js_docs" 57 | 58 | 59 | full_chain = router | RunnableLambda(choose_route) 60 | 61 | result = full_chain.invoke({"question": question}) 62 | print("\nChoose route: ", result) 63 | -------------------------------------------------------------------------------- /ch3/py/g-semantic-router.py: -------------------------------------------------------------------------------- 1 | from langchain.utils.math import cosine_similarity 2 | from langchain_core.output_parsers import StrOutputParser 3 | from langchain_core.prompts import PromptTemplate 4 | from langchain_core.runnables import chain 5 | from langchain_openai import ChatOpenAI, OpenAIEmbeddings 6 | 7 | physics_template = """You are a very smart physics professor. You are great at answering questions about physics in a concise and easy-to-understand manner. When you don't know the answer to a question, you admit that you don't know. Here is a question: {query}""" 8 | math_template = """You are a very good mathematician. You are great at answering math questions. You are so good because you are able to break down hard problems into their component parts, answer the component parts, and then put them together to answer the broader question. Here is a question: {query}""" 9 | 10 | # Embed prompts 11 | embeddings = OpenAIEmbeddings() 12 | prompt_templates = [physics_template, math_template] 13 | prompt_embeddings = embeddings.embed_documents(prompt_templates) 14 | 15 | # Route question to prompt 16 | 17 | 18 | @chain 19 | def prompt_router(query): 20 | query_embedding = embeddings.embed_query(query) 21 | similarity = cosine_similarity([query_embedding], prompt_embeddings)[0] 22 | most_similar = prompt_templates[similarity.argmax()] 23 | print("Using MATH" if most_similar == math_template else "Using PHYSICS") 24 | return PromptTemplate.from_template(most_similar) 25 | 26 | 27 | semantic_router = (prompt_router | ChatOpenAI() | StrOutputParser()) 28 | 29 | result = semantic_router.invoke("What's a black hole") 30 | print("\nSemantic router result: ", result) 31 | -------------------------------------------------------------------------------- /ch3/py/h-self-query.py: -------------------------------------------------------------------------------- 1 | # pip install lark 2 | 3 | from langchain.chains.query_constructor.base import AttributeInfo 4 | from langchain.retrievers.self_query.base import SelfQueryRetriever 5 | from langchain_openai import ChatOpenAI 6 | from langchain_community.document_loaders import TextLoader 7 | from langchain_openai import OpenAIEmbeddings 8 | from langchain_text_splitters import RecursiveCharacterTextSplitter 9 | from langchain_postgres.vectorstores import PGVector 10 | from langchain_core.documents import Document 11 | 12 | # See docker command above to launch a postgres instance with pgvector enabled. 13 | connection = "postgresql+psycopg://langchain:langchain@localhost:6024/langchain" 14 | 15 | docs = [ 16 | Document( 17 | page_content="A bunch of scientists bring back dinosaurs and mayhem breaks loose", 18 | metadata={"year": 1993, "rating": 7.7, "genre": "science fiction"}, 19 | ), 20 | Document( 21 | page_content="Leo DiCaprio gets lost in a dream within a dream within a dream within a ...", 22 | metadata={"year": 2010, "director": "Christopher Nolan", "rating": 8.2}, 23 | ), 24 | Document( 25 | page_content="A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea", 26 | metadata={"year": 2006, "director": "Satoshi Kon", "rating": 8.6}, 27 | ), 28 | Document( 29 | page_content="A bunch of normal-sized women are supremely wholesome and some men pine after them", 30 | metadata={"year": 2019, "director": "Greta Gerwig", "rating": 8.3}, 31 | ), 32 | Document( 33 | page_content="Toys come alive and have a blast doing so", 34 | metadata={"year": 1995, "genre": "animated"}, 35 | ), 36 | Document( 37 | page_content="Three men walk into the Zone, three men walk out of the Zone", 38 | metadata={ 39 | "year": 1979, 40 | "director": "Andrei Tarkovsky", 41 | "genre": "thriller", 42 | "rating": 9.9, 43 | }, 44 | ), 45 | ] 46 | 47 | # Create embeddings for the documents 48 | embeddings_model = OpenAIEmbeddings() 49 | 50 | vectorstore = PGVector.from_documents( 51 | docs, embeddings_model, connection=connection) 52 | 53 | # Define the fields for the query 54 | fields = [ 55 | AttributeInfo( 56 | name="genre", 57 | description="The genre of the movie", 58 | type="string or list[string]", 59 | ), 60 | AttributeInfo( 61 | name="year", 62 | description="The year the movie was released", 63 | type="integer", 64 | ), 65 | AttributeInfo( 66 | name="director", 67 | description="The name of the movie director", 68 | type="string", 69 | ), 70 | AttributeInfo( 71 | name="rating", 72 | description="A 1-10 rating for the movie", 73 | type="float", 74 | ), 75 | ] 76 | 77 | description = "Brief summary of a movie" 78 | llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0) 79 | retriever = SelfQueryRetriever.from_llm(llm, vectorstore, description, fields) 80 | 81 | # This example only specifies a filter 82 | print(retriever.invoke("I want to watch a movie rated higher than 8.5")) 83 | 84 | print('\n') 85 | 86 | # This example specifies multiple filters 87 | print(retriever.invoke( 88 | "What's a highly rated (above 8.5) science fiction film?")) 89 | -------------------------------------------------------------------------------- /ch3/py/i-sql-example.py: -------------------------------------------------------------------------------- 1 | """ 2 | The below example will use a SQLite connection with the Chinook database, which is a sample database that represents a digital media store. Follow these installation steps to create Chinook.db in the same directory as this notebook. You can also download and build the database via the command line: 3 | 4 | ```bash 5 | curl -s https://raw.githubusercontent.com/lerocha/chinook-database/master/ChinookDatabase/DataSources/Chinook_Sqlite.sql | sqlite3 Chinook.db 6 | 7 | ``` 8 | 9 | Afterwards, place `Chinook.db` in the same directory where this code is running. 10 | 11 | """ 12 | 13 | from langchain_community.tools import QuerySQLDatabaseTool 14 | from langchain_community.utilities import SQLDatabase 15 | from langchain.chains import create_sql_query_chain 16 | # replace this with the connection details of your db 17 | from langchain_openai import ChatOpenAI 18 | 19 | db = SQLDatabase.from_uri("sqlite:///Chinook.db") 20 | print(db.get_usable_table_names()) 21 | llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0) 22 | 23 | # convert question to sql query 24 | write_query = create_sql_query_chain(llm, db) 25 | 26 | # Execute SQL query 27 | execute_query = QuerySQLDatabaseTool(db=db) 28 | 29 | # combined chain = write_query | execute_query 30 | combined_chain = write_query | execute_query 31 | 32 | # run the chain 33 | result = combined_chain.invoke({"question": "How many employees are there?"}) 34 | 35 | print(result) 36 | -------------------------------------------------------------------------------- /ch4/js/a-simple-memory.js: -------------------------------------------------------------------------------- 1 | import { ChatPromptTemplate } from '@langchain/core/prompts'; 2 | import { ChatOpenAI } from '@langchain/openai'; 3 | 4 | const prompt = ChatPromptTemplate.fromMessages([ 5 | [ 6 | 'system', 7 | 'You are a helpful assistant. Answer all questions to the best of your ability.', 8 | ], 9 | ['placeholder', '{messages}'], 10 | ]); 11 | const model = new ChatOpenAI(); 12 | const chain = prompt.pipe(model); 13 | 14 | const response = await chain.invoke({ 15 | messages: [ 16 | [ 17 | 'human', 18 | 'Translate this sentence from English to French: I love programming.', 19 | ], 20 | ['ai', "J'adore programmer."], 21 | ['human', 'What did you just say?'], 22 | ], 23 | }); 24 | 25 | console.log(response.content); 26 | -------------------------------------------------------------------------------- /ch4/js/b-state-graph.js: -------------------------------------------------------------------------------- 1 | import { 2 | StateGraph, 3 | Annotation, 4 | messagesStateReducer, 5 | START, 6 | END, 7 | } from '@langchain/langgraph'; 8 | import { ChatOpenAI } from '@langchain/openai'; 9 | import { HumanMessage } from '@langchain/core/messages'; 10 | 11 | const State = { 12 | messages: Annotation({ 13 | reducer: messagesStateReducer, 14 | default: () => [], 15 | }), 16 | }; 17 | 18 | let builder = new StateGraph(State); 19 | 20 | const model = new ChatOpenAI(); 21 | 22 | async function chatbot(state) { 23 | const answer = await model.invoke(state.messages); 24 | return { messages: answer }; 25 | } 26 | 27 | builder = builder.addNode('chatbot', chatbot); 28 | 29 | builder = builder.addEdge(START, 'chatbot').addEdge('chatbot', END); 30 | 31 | let graph = builder.compile(); 32 | 33 | // Run the graph 34 | const input = { messages: [new HumanMessage('hi!')] }; 35 | for await (const chunk of await graph.stream(input)) { 36 | console.log(chunk); 37 | } 38 | -------------------------------------------------------------------------------- /ch4/js/c-persistent-memory.js: -------------------------------------------------------------------------------- 1 | import { 2 | StateGraph, 3 | Annotation, 4 | messagesStateReducer, 5 | START, 6 | } from "@langchain/langgraph"; 7 | import { ChatOpenAI } from "@langchain/openai"; 8 | import { MemorySaver } from "@langchain/langgraph"; 9 | import { HumanMessage } from "@langchain/core/messages"; 10 | 11 | const State = { 12 | messages: Annotation({ 13 | reducer: messagesStateReducer, 14 | default: () => [], 15 | }), 16 | }; 17 | 18 | let builder = new StateGraph(State); 19 | 20 | const model = new ChatOpenAI(); 21 | 22 | async function chatbot(state) { 23 | const answer = await model.invoke(state.messages); 24 | return { messages: answer }; 25 | } 26 | 27 | builder = builder.addNode("chatbot", chatbot); 28 | 29 | builder = builder.addEdge(START, "chatbot").addEdge("chatbot", END); 30 | 31 | // Add persistence 32 | const graph = builder.compile({ checkpointer: new MemorySaver() }); 33 | 34 | // Configure thread 35 | const thread1 = { configurable: { thread_id: "1" } }; 36 | 37 | // Run with persistence 38 | const result_1 = await graph.invoke( 39 | { 40 | messages: [new HumanMessage("hi, my name is Jack!")], 41 | }, 42 | thread1, 43 | ); 44 | console.log(result_1); 45 | 46 | const result_2 = await graph.invoke( 47 | { 48 | messages: [new HumanMessage("what is my name?")], 49 | }, 50 | thread1, 51 | ); 52 | console.log(result_2); 53 | 54 | // Get state 55 | await graph.getState(thread1); 56 | -------------------------------------------------------------------------------- /ch4/js/d-trim-messages.js: -------------------------------------------------------------------------------- 1 | import { 2 | AIMessage, 3 | HumanMessage, 4 | SystemMessage, 5 | trimMessages, 6 | } from "@langchain/core/messages"; 7 | import { ChatOpenAI } from "@langchain/openai"; 8 | 9 | const messages = [ 10 | new SystemMessage("you're a good assistant"), 11 | new HumanMessage("hi! I'm bob"), 12 | new AIMessage("hi!"), 13 | new HumanMessage("I like vanilla ice cream"), 14 | new AIMessage("nice"), 15 | new HumanMessage("whats 2 + 2"), 16 | new AIMessage("4"), 17 | new HumanMessage("thanks"), 18 | new AIMessage("no problem!"), 19 | new HumanMessage("having fun?"), 20 | new AIMessage("yes!"), 21 | ]; 22 | 23 | const trimmer = trimMessages({ 24 | maxTokens: 65, 25 | strategy: "last", 26 | tokenCounter: new ChatOpenAI({ modelName: "gpt-4o" }), 27 | includeSystem: true, 28 | allowPartial: false, 29 | startOn: "human", 30 | }); 31 | 32 | const trimmed = await trimmer.invoke(messages); 33 | console.log(trimmed); 34 | -------------------------------------------------------------------------------- /ch4/js/e-filter-messages.js: -------------------------------------------------------------------------------- 1 | import { 2 | HumanMessage, 3 | SystemMessage, 4 | AIMessage, 5 | filterMessages, 6 | } from '@langchain/core/messages'; 7 | 8 | const messages = [ 9 | new SystemMessage({ content: 'you are a good assistant', id: '1' }), 10 | new HumanMessage({ content: 'example input', id: '2', name: 'example_user' }), 11 | new AIMessage({ 12 | content: 'example output', 13 | id: '3', 14 | name: 'example_assistant', 15 | }), 16 | new HumanMessage({ content: 'real input', id: '4', name: 'bob' }), 17 | new AIMessage({ content: 'real output', id: '5', name: 'alice' }), 18 | ]; 19 | 20 | // Filter for human messages 21 | const filterByHumanMessages = filterMessages(messages, { 22 | includeTypes: ['human'], 23 | }); 24 | console.log(`Human messages: ${JSON.stringify(filterByHumanMessages)}`); 25 | 26 | // Filter to exclude names 27 | const filterByExcludedNames = filterMessages(messages, { 28 | excludeNames: ['example_user', 'example_assistant'], 29 | }); 30 | console.log( 31 | `\nExcluding example names: ${JSON.stringify(filterByExcludedNames)}` 32 | ); 33 | 34 | // Filter by types and IDs 35 | const filterByTypesAndIDs = filterMessages(messages, { 36 | includeTypes: ['human', 'ai'], 37 | excludeIds: ['3'], 38 | }); 39 | console.log( 40 | `\nFiltered by types and IDs: ${JSON.stringify(filterByTypesAndIDs)}` 41 | ); 42 | -------------------------------------------------------------------------------- /ch4/js/f-merge-messages.js: -------------------------------------------------------------------------------- 1 | import { 2 | HumanMessage, 3 | SystemMessage, 4 | AIMessage, 5 | mergeMessageRuns, 6 | } from '@langchain/core/messages'; 7 | 8 | const messages = [ 9 | new SystemMessage("you're a good assistant."), 10 | new SystemMessage('you always respond with a joke.'), 11 | new HumanMessage({ 12 | content: [{ type: 'text', text: "i wonder why it's called langchain" }], 13 | }), 14 | new HumanMessage('and who is harrison chasing anyways'), 15 | new AIMessage( 16 | 'Well, I guess they thought "WordRope" and "SentenceString" just didn\'t have the same ring to it!' 17 | ), 18 | new AIMessage( 19 | "Why, he's probably chasing after the last cup of coffee in the office!" 20 | ), 21 | ]; 22 | 23 | // Merge consecutive messages 24 | const mergedMessages = mergeMessageRuns(messages); 25 | console.log(mergedMessages); 26 | -------------------------------------------------------------------------------- /ch4/py/a-simple-memory.py: -------------------------------------------------------------------------------- 1 | from langchain_core.prompts import ChatPromptTemplate 2 | from langchain_openai import ChatOpenAI 3 | 4 | prompt = ChatPromptTemplate.from_messages([ 5 | ("system", "You are a helpful assistant. Answer all questions to the best of your ability."), 6 | ("placeholder", "{messages}"), 7 | ]) 8 | 9 | model = ChatOpenAI() 10 | 11 | chain = prompt | model 12 | 13 | response = chain.invoke({ 14 | "messages": [ 15 | ("human", "Translate this sentence from English to French: I love programming."), 16 | ("ai", "J'adore programmer."), 17 | ("human", "What did you just say?"), 18 | ], 19 | }) 20 | 21 | print(response.content) 22 | -------------------------------------------------------------------------------- /ch4/py/b-state-graph.py: -------------------------------------------------------------------------------- 1 | from typing import Annotated, TypedDict 2 | 3 | from langchain_core.messages import HumanMessage 4 | from langchain_openai import ChatOpenAI 5 | from langgraph.graph import StateGraph, START, END, add_messages 6 | from langgraph.checkpoint.memory import MemorySaver 7 | 8 | 9 | class State(TypedDict): 10 | messages: Annotated[list, add_messages] 11 | 12 | 13 | builder = StateGraph(State) 14 | 15 | model = ChatOpenAI() 16 | 17 | 18 | def chatbot(state: State): 19 | answer = model.invoke(state["messages"]) 20 | return {"messages": [answer]} 21 | 22 | 23 | # Add the chatbot node 24 | builder.add_node("chatbot", chatbot) 25 | 26 | # Add edges 27 | builder.add_edge(START, "chatbot") 28 | builder.add_edge("chatbot", END) 29 | 30 | graph = builder.compile() 31 | 32 | # Run the graph 33 | input = {"messages": [HumanMessage("hi!")]} 34 | for chunk in graph.stream(input): 35 | print(chunk) 36 | -------------------------------------------------------------------------------- /ch4/py/c-persistent-memory.py: -------------------------------------------------------------------------------- 1 | from typing import Annotated, TypedDict 2 | 3 | from langchain_core.messages import HumanMessage 4 | from langchain_openai import ChatOpenAI 5 | from langgraph.graph import StateGraph, START, END, add_messages 6 | from langgraph.checkpoint.memory import MemorySaver 7 | 8 | 9 | class State(TypedDict): 10 | messages: Annotated[list, add_messages] 11 | 12 | 13 | builder = StateGraph(State) 14 | 15 | model = ChatOpenAI() 16 | 17 | 18 | def chatbot(state: State): 19 | answer = model.invoke(state["messages"]) 20 | return {"messages": [answer]} 21 | 22 | 23 | builder.add_node("chatbot", chatbot) 24 | builder.add_edge(START, "chatbot") 25 | builder.add_edge("chatbot", END) 26 | 27 | # Add persistence with MemorySaver 28 | graph = builder.compile(checkpointer=MemorySaver()) 29 | 30 | # Configure thread 31 | thread1 = {"configurable": {"thread_id": "1"}} 32 | 33 | # Run with persistence 34 | result_1 = graph.invoke({"messages": [HumanMessage("hi, my name is Jack!")]}, thread1) 35 | print(result_1) 36 | 37 | result_2 = graph.invoke({"messages": [HumanMessage("what is my name?")]}, thread1) 38 | print(result_2) 39 | 40 | # Get state 41 | print(graph.get_state(thread1)) 42 | -------------------------------------------------------------------------------- /ch4/py/d-trim-messages.py: -------------------------------------------------------------------------------- 1 | from langchain_core.messages import ( 2 | SystemMessage, 3 | HumanMessage, 4 | AIMessage, 5 | trim_messages, 6 | ) 7 | from langchain_openai import ChatOpenAI 8 | 9 | # Define sample messages 10 | messages = [ 11 | SystemMessage(content="you're a good assistant"), 12 | HumanMessage(content="hi! I'm bob"), 13 | AIMessage(content="hi!"), 14 | HumanMessage(content="I like vanilla ice cream"), 15 | AIMessage(content="nice"), 16 | HumanMessage(content="whats 2 + 2"), 17 | AIMessage(content="4"), 18 | HumanMessage(content="thanks"), 19 | AIMessage(content="no problem!"), 20 | HumanMessage(content="having fun?"), 21 | AIMessage(content="yes!"), 22 | ] 23 | 24 | # Create trimmer 25 | trimmer = trim_messages( 26 | max_tokens=65, 27 | strategy="last", 28 | token_counter=ChatOpenAI(model="gpt-4o"), 29 | include_system=True, 30 | allow_partial=False, 31 | start_on="human", 32 | ) 33 | 34 | # Apply trimming 35 | trimmed = trimmer.invoke(messages) 36 | print(trimmed) 37 | -------------------------------------------------------------------------------- /ch4/py/e-filter-messages.py: -------------------------------------------------------------------------------- 1 | from langchain_core.messages import ( 2 | AIMessage, 3 | HumanMessage, 4 | SystemMessage, 5 | filter_messages, 6 | ) 7 | 8 | # Sample messages 9 | messages = [ 10 | SystemMessage(content="you are a good assistant", id="1"), 11 | HumanMessage(content="example input", id="2", name="example_user"), 12 | AIMessage(content="example output", id="3", name="example_assistant"), 13 | HumanMessage(content="real input", id="4", name="bob"), 14 | AIMessage(content="real output", id="5", name="alice"), 15 | ] 16 | 17 | # Filter for human messages 18 | human_messages = filter_messages(messages, include_types="human") 19 | print("Human messages:", human_messages) 20 | 21 | # Filter to exclude certain names 22 | excluded_names = filter_messages( 23 | messages, exclude_names=["example_user", "example_assistant"] 24 | ) 25 | print("\nExcluding example names:", excluded_names) 26 | 27 | # Filter by types and IDs 28 | filtered_messages = filter_messages( 29 | messages, include_types=["human", "ai"], exclude_ids=["3"] 30 | ) 31 | print("\nFiltered by types and IDs:", filtered_messages) 32 | -------------------------------------------------------------------------------- /ch4/py/f-merge-messages.py: -------------------------------------------------------------------------------- 1 | from langchain_core.messages import ( 2 | AIMessage, 3 | HumanMessage, 4 | SystemMessage, 5 | merge_message_runs, 6 | ) 7 | 8 | # Sample messages with consecutive messages of same type 9 | messages = [ 10 | SystemMessage(content="you're a good assistant."), 11 | SystemMessage(content="you always respond with a joke."), 12 | HumanMessage( 13 | content=[{"type": "text", "text": "i wonder why it's called langchain"}] 14 | ), 15 | HumanMessage(content="and who is harrison chasing anyways"), 16 | AIMessage( 17 | content='Well, I guess they thought "WordRope" and "SentenceString" just didn\'t have the same ring to it!' 18 | ), 19 | AIMessage( 20 | content="Why, he's probably chasing after the last cup of coffee in the office!" 21 | ), 22 | ] 23 | 24 | # Merge consecutive messages 25 | merged = merge_message_runs(messages) 26 | print(merged) 27 | -------------------------------------------------------------------------------- /ch5/js/a-chatbot.js: -------------------------------------------------------------------------------- 1 | import { 2 | StateGraph, 3 | Annotation, 4 | messagesStateReducer, 5 | START, 6 | END, 7 | } from '@langchain/langgraph'; 8 | 9 | import { ChatOpenAI } from '@langchain/openai'; 10 | import { HumanMessage } from '@langchain/core/messages'; 11 | 12 | const model = new ChatOpenAI(); 13 | 14 | const State = { 15 | // Messages have the type "list". The `add_messages` 16 | // function in the annotation defines how this state should 17 | // be updated (in this case, it appends new messages to the 18 | // list, rather than replacing the previous messages) 19 | messages: Annotation({ 20 | reducer: messagesStateReducer, 21 | default: () => [], 22 | }), 23 | }; 24 | 25 | async function chatbot(state) { 26 | const answer = await model.invoke(state.messages); 27 | return { messages: answer }; 28 | } 29 | 30 | const builder = new StateGraph(State) 31 | .addNode('chatbot', chatbot) 32 | .addEdge(START, 'chatbot') 33 | .addEdge('chatbot', END); 34 | 35 | const graph = builder.compile(); 36 | 37 | // Example usage 38 | const input = { messages: [new HumanMessage('hi!')] }; 39 | for await (const chunk of await graph.stream(input)) { 40 | console.log(chunk); 41 | } 42 | -------------------------------------------------------------------------------- /ch5/js/b-sql-generator.js: -------------------------------------------------------------------------------- 1 | import { HumanMessage, SystemMessage } from "@langchain/core/messages"; 2 | import { ChatOpenAI } from "@langchain/openai"; 3 | import { 4 | StateGraph, 5 | Annotation, 6 | messagesStateReducer, 7 | START, 8 | END, 9 | } from "@langchain/langgraph"; 10 | 11 | // useful to generate SQL query 12 | const modelLowTemp = new ChatOpenAI({ temperature: 0.1 }); 13 | // useful to generate natural language outputs 14 | const modelHighTemp = new ChatOpenAI({ temperature: 0.7 }); 15 | 16 | const annotation = Annotation.Root({ 17 | messages: Annotation({ reducer: messagesStateReducer, default: () => [] }), 18 | user_query: Annotation(), 19 | sql_query: Annotation(), 20 | sql_explanation: Annotation(), 21 | }); 22 | 23 | const generatePrompt = new SystemMessage( 24 | "You are a helpful data analyst, who generates SQL queries for users based on their questions.", 25 | ); 26 | 27 | async function generateSql(state) { 28 | const userMessage = new HumanMessage(state.user_query); 29 | const messages = [generatePrompt, ...state.messages, userMessage]; 30 | const res = await modelLowTemp.invoke(messages); 31 | return { 32 | sql_query: res.content, 33 | // update conversation history 34 | messages: [userMessage, res], 35 | }; 36 | } 37 | 38 | const explainPrompt = new SystemMessage( 39 | "You are a helpful data analyst, who explains SQL queries to users.", 40 | ); 41 | 42 | async function explainSql(state) { 43 | const messages = [explainPrompt, ...state.messages]; 44 | const res = await modelHighTemp.invoke(messages); 45 | return { 46 | sql_explanation: res.content, 47 | // update conversation history 48 | messages: res, 49 | }; 50 | } 51 | 52 | const builder = new StateGraph(annotation) 53 | .addNode("generate_sql", generateSql) 54 | .addNode("explain_sql", explainSql) 55 | .addEdge(START, "generate_sql") 56 | .addEdge("generate_sql", "explain_sql") 57 | .addEdge("explain_sql", END); 58 | 59 | const graph = builder.compile(); 60 | 61 | // Example usage 62 | const result = await graph.invoke({ 63 | user_query: "What is the total sales for each product?", 64 | }); 65 | console.log(result); 66 | -------------------------------------------------------------------------------- /ch5/py/a-chatbot.py: -------------------------------------------------------------------------------- 1 | from typing import Annotated, TypedDict 2 | 3 | from langgraph.graph import StateGraph, START, END 4 | from langgraph.graph.message import add_messages 5 | from langchain_openai import ChatOpenAI 6 | from langchain_core.messages import HumanMessage 7 | 8 | model = ChatOpenAI() 9 | 10 | 11 | class State(TypedDict): 12 | # Messages have the type "list". The `add_messages` 13 | # function in the annotation defines how this state should 14 | # be updated (in this case, it appends new messages to the 15 | # list, rather than replacing the previous messages) 16 | messages: Annotated[list, add_messages] 17 | 18 | 19 | def chatbot(state: State): 20 | answer = model.invoke(state["messages"]) 21 | return {"messages": [answer]} 22 | 23 | 24 | builder = StateGraph(State) 25 | 26 | builder.add_node("chatbot", chatbot) 27 | 28 | builder.add_edge(START, "chatbot") 29 | builder.add_edge("chatbot", END) 30 | 31 | graph = builder.compile() 32 | 33 | # Example usage 34 | 35 | input = {"messages": [HumanMessage("hi!")]} 36 | for chunk in graph.stream(input): 37 | print(chunk) 38 | -------------------------------------------------------------------------------- /ch5/py/b-sql-generator.py: -------------------------------------------------------------------------------- 1 | from typing import Annotated, TypedDict 2 | 3 | from langchain_core.messages import HumanMessage, SystemMessage 4 | from langchain_openai import ChatOpenAI 5 | from langgraph.graph import END, START, StateGraph 6 | from langgraph.graph.message import add_messages 7 | 8 | # useful to generate SQL query 9 | model_low_temp = ChatOpenAI(temperature=0.1) 10 | # useful to generate natural language outputs 11 | model_high_temp = ChatOpenAI(temperature=0.7) 12 | 13 | 14 | class State(TypedDict): 15 | # to track conversation history 16 | messages: Annotated[list, add_messages] 17 | # input 18 | user_query: str 19 | # output 20 | sql_query: str 21 | sql_explanation: str 22 | 23 | 24 | class Input(TypedDict): 25 | user_query: str 26 | 27 | 28 | class Output(TypedDict): 29 | sql_query: str 30 | sql_explanation: str 31 | 32 | 33 | generate_prompt = SystemMessage( 34 | "You are a helpful data analyst, who generates SQL queries for users based on their questions." 35 | ) 36 | 37 | 38 | def generate_sql(state: State) -> State: 39 | user_message = HumanMessage(state["user_query"]) 40 | messages = [generate_prompt, *state["messages"], user_message] 41 | res = model_low_temp.invoke(messages) 42 | return { 43 | "sql_query": res.content, 44 | # update conversation history 45 | "messages": [user_message, res], 46 | } 47 | 48 | 49 | explain_prompt = SystemMessage( 50 | "You are a helpful data analyst, who explains SQL queries to users." 51 | ) 52 | 53 | 54 | def explain_sql(state: State) -> State: 55 | messages = [ 56 | explain_prompt, 57 | # contains user's query and SQL query from prev step 58 | *state["messages"], 59 | ] 60 | res = model_high_temp.invoke(messages) 61 | return { 62 | "sql_explanation": res.content, 63 | # update conversation history 64 | "messages": res, 65 | } 66 | 67 | 68 | builder = StateGraph(State, input=Input, output=Output) 69 | builder.add_node("generate_sql", generate_sql) 70 | builder.add_node("explain_sql", explain_sql) 71 | builder.add_edge(START, "generate_sql") 72 | builder.add_edge("generate_sql", "explain_sql") 73 | builder.add_edge("explain_sql", END) 74 | 75 | graph = builder.compile() 76 | 77 | # Example usage 78 | result = graph.invoke({"user_query": "What is the total sales for each product?"}) 79 | print(result) 80 | -------------------------------------------------------------------------------- /ch6/js/a-basic-agent.js: -------------------------------------------------------------------------------- 1 | import { DuckDuckGoSearch } from "@langchain/community/tools/duckduckgo_search"; 2 | import { Calculator } from "@langchain/community/tools/calculator"; 3 | import { 4 | StateGraph, 5 | Annotation, 6 | messagesStateReducer, 7 | START, 8 | } from "@langchain/langgraph"; 9 | import { ToolNode, toolsCondition } from "@langchain/langgraph/prebuilt"; 10 | import { ChatOpenAI } from "@langchain/openai"; 11 | import { HumanMessage } from "@langchain/core/messages"; 12 | 13 | const search = new DuckDuckGoSearch(); 14 | const calculator = new Calculator(); 15 | const tools = [search, calculator]; 16 | const model = new ChatOpenAI({ 17 | temperature: 0.1, 18 | }).bindTools(tools); 19 | 20 | const annotation = Annotation.Root({ 21 | messages: Annotation({ 22 | reducer: messagesStateReducer, 23 | default: () => [], 24 | }), 25 | }); 26 | 27 | async function modelNode(state) { 28 | const res = await model.invoke(state.messages); 29 | return { messages: res }; 30 | } 31 | 32 | const builder = new StateGraph(annotation) 33 | .addNode("model", modelNode) 34 | .addNode("tools", new ToolNode(tools)) 35 | .addEdge(START, "model") 36 | .addConditionalEdges("model", toolsCondition) 37 | .addEdge("tools", "model"); 38 | 39 | const graph = builder.compile(); 40 | 41 | // Example usage 42 | const input = { 43 | messages: [ 44 | new HumanMessage( 45 | "How old was the 30th president of the United States when he died?", 46 | ), 47 | ], 48 | }; 49 | 50 | for await (const c of await graph.stream(input)) { 51 | console.log(c); 52 | } 53 | -------------------------------------------------------------------------------- /ch6/js/b-force-first-tool.js: -------------------------------------------------------------------------------- 1 | import { DuckDuckGoSearch } from "@langchain/community/tools/duckduckgo_search"; 2 | import { Calculator } from "@langchain/community/tools/calculator"; 3 | import { AIMessage, HumanMessage } from "@langchain/core/messages"; 4 | import { 5 | StateGraph, 6 | Annotation, 7 | messagesStateReducer, 8 | START, 9 | } from "@langchain/langgraph"; 10 | import { ToolNode, toolsCondition } from "@langchain/langgraph/prebuilt"; 11 | import { ChatOpenAI } from "@langchain/openai"; 12 | 13 | const search = new DuckDuckGoSearch(); 14 | const calculator = new Calculator(); 15 | const tools = [search, calculator]; 16 | const model = new ChatOpenAI({ temperature: 0.1 }).bindTools(tools); 17 | 18 | const annotation = Annotation.Root({ 19 | messages: Annotation({ reducer: messagesStateReducer, default: () => [] }), 20 | }); 21 | 22 | async function firstModelNode(state) { 23 | const query = state.messages[state.messages.length - 1].content; 24 | const searchToolCall = { 25 | name: "duckduckgo_search", 26 | args: { query }, 27 | id: Math.random().toString(), 28 | }; 29 | return { 30 | messages: [new AIMessage({ content: "", tool_calls: [searchToolCall] })], 31 | }; 32 | } 33 | 34 | async function modelNode(state) { 35 | const res = await model.invoke(state.messages); 36 | return { messages: res }; 37 | } 38 | 39 | const builder = new StateGraph(annotation) 40 | .addNode("first_model", firstModelNode) 41 | .addNode("model", modelNode) 42 | .addNode("tools", new ToolNode(tools)) 43 | .addEdge(START, "first_model") 44 | .addEdge("first_model", "tools") 45 | .addEdge("tools", "model") 46 | .addConditionalEdges("model", toolsCondition); 47 | 48 | const graph = builder.compile(); 49 | 50 | // Example usage 51 | const input = { 52 | messages: [ 53 | new HumanMessage( 54 | "How old was the 30th president of the United States when he died?", 55 | ), 56 | ], 57 | }; 58 | 59 | for await (const c of await graph.stream(input)) { 60 | console.log(c); 61 | } 62 | -------------------------------------------------------------------------------- /ch6/js/c-many-tools.js: -------------------------------------------------------------------------------- 1 | import { DuckDuckGoSearch } from '@langchain/community/tools/duckduckgo_search'; 2 | import { Calculator } from '@langchain/community/tools/calculator'; 3 | import { ChatOpenAI } from '@langchain/openai'; 4 | import { OpenAIEmbeddings } from '@langchain/openai'; 5 | import { Document } from '@langchain/core/documents'; 6 | import { MemoryVectorStore } from 'langchain/vectorstores/memory'; 7 | import { 8 | StateGraph, 9 | Annotation, 10 | messagesStateReducer, 11 | START, 12 | } from '@langchain/langgraph'; 13 | import { ToolNode, toolsCondition } from '@langchain/langgraph/prebuilt'; 14 | import { HumanMessage } from '@langchain/core/messages'; 15 | 16 | const search = new DuckDuckGoSearch(); 17 | const calculator = new Calculator(); 18 | const tools = [search, calculator]; 19 | 20 | const embeddings = new OpenAIEmbeddings(); 21 | const model = new ChatOpenAI({ temperature: 0.1 }); 22 | 23 | // Create vector store and retriever 24 | const toolsStore = await MemoryVectorStore.fromDocuments( 25 | tools.map( 26 | (tool) => 27 | new Document({ 28 | pageContent: tool.description, 29 | metadata: { name: tool.constructor.name }, 30 | }) 31 | ), 32 | embeddings 33 | ); 34 | const toolsRetriever = toolsStore.asRetriever(); 35 | 36 | const annotation = Annotation.Root({ 37 | messages: Annotation({ reducer: messagesStateReducer, default: () => [] }), 38 | selected_tools: Annotation(), 39 | }); 40 | 41 | async function modelNode(state) { 42 | const selectedTools = tools.filter((tool) => 43 | state.selected_tools.includes(tool.constructor.name) 44 | ); 45 | const res = await model.bindTools(selectedTools).invoke(state.messages); 46 | return { messages: res }; 47 | } 48 | 49 | async function selectTools(state) { 50 | const query = state.messages[state.messages.length - 1].content; 51 | const toolDocs = await toolsRetriever.invoke(query); 52 | return { 53 | selected_tools: toolDocs.map((doc) => doc.metadata.name), 54 | }; 55 | } 56 | 57 | const builder = new StateGraph(annotation) 58 | .addNode('select_tools', selectTools) 59 | .addNode('model', modelNode) 60 | .addNode('tools', new ToolNode(tools)) 61 | .addEdge(START, 'select_tools') 62 | .addEdge('select_tools', 'model') 63 | .addConditionalEdges('model', toolsCondition) 64 | .addEdge('tools', 'model'); 65 | 66 | const graph = builder.compile(); 67 | 68 | // Example usage 69 | const input = { 70 | messages: [ 71 | new HumanMessage( 72 | 'How old was the 30th president of the United States when he died?' 73 | ), 74 | ], 75 | }; 76 | 77 | for await (const c of await graph.stream(input)) { 78 | console.log(c); 79 | } 80 | -------------------------------------------------------------------------------- /ch6/py/a-basic-agent.py: -------------------------------------------------------------------------------- 1 | import ast 2 | from typing import Annotated, TypedDict 3 | 4 | from langchain_community.tools import DuckDuckGoSearchRun 5 | from langchain_core.messages import HumanMessage 6 | from langchain_core.tools import tool 7 | from langchain_openai import ChatOpenAI 8 | from langgraph.graph import START, StateGraph 9 | from langgraph.graph.message import add_messages 10 | from langgraph.prebuilt import ToolNode, tools_condition 11 | 12 | 13 | @tool 14 | def calculator(query: str) -> str: 15 | """A simple calculator tool. Input should be a mathematical expression.""" 16 | return ast.literal_eval(query) 17 | 18 | 19 | search = DuckDuckGoSearchRun() 20 | tools = [search, calculator] 21 | model = ChatOpenAI(temperature=0.1).bind_tools(tools) 22 | 23 | 24 | class State(TypedDict): 25 | messages: Annotated[list, add_messages] 26 | 27 | 28 | def model_node(state: State) -> State: 29 | res = model.invoke(state["messages"]) 30 | return {"messages": res} 31 | 32 | 33 | builder = StateGraph(State) 34 | builder.add_node("model", model_node) 35 | builder.add_node("tools", ToolNode(tools)) 36 | builder.add_edge(START, "model") 37 | builder.add_conditional_edges("model", tools_condition) 38 | builder.add_edge("tools", "model") 39 | 40 | graph = builder.compile() 41 | 42 | # Example usage 43 | 44 | input = { 45 | "messages": [ 46 | HumanMessage( 47 | "How old was the 30th president of the United States when he died?" 48 | ) 49 | ] 50 | } 51 | 52 | for c in graph.stream(input): 53 | print(c) 54 | -------------------------------------------------------------------------------- /ch6/py/b-force-first-tool.py: -------------------------------------------------------------------------------- 1 | import ast 2 | from typing import Annotated, TypedDict 3 | from uuid import uuid4 4 | 5 | from langchain_community.tools import DuckDuckGoSearchRun 6 | from langchain_core.messages import AIMessage, HumanMessage, ToolCall 7 | from langchain_core.tools import tool 8 | from langchain_openai import ChatOpenAI 9 | 10 | from langgraph.graph import START, StateGraph 11 | from langgraph.graph.message import add_messages 12 | from langgraph.prebuilt import ToolNode, tools_condition 13 | 14 | 15 | @tool 16 | def calculator(query: str) -> str: 17 | """A simple calculator tool. Input should be a mathematical expression.""" 18 | return ast.literal_eval(query) 19 | 20 | 21 | search = DuckDuckGoSearchRun() 22 | tools = [search, calculator] 23 | model = ChatOpenAI(temperature=0.1).bind_tools(tools) 24 | 25 | 26 | class State(TypedDict): 27 | messages: Annotated[list, add_messages] 28 | 29 | 30 | def model_node(state: State) -> State: 31 | res = model.invoke(state["messages"]) 32 | return {"messages": res} 33 | 34 | 35 | def first_model(state: State) -> State: 36 | query = state["messages"][-1].content 37 | search_tool_call = ToolCall( 38 | name="duckduckgo_search", args={"query": query}, id=uuid4().hex 39 | ) 40 | return {"messages": AIMessage(content="", tool_calls=[search_tool_call])} 41 | 42 | 43 | builder = StateGraph(State) 44 | builder.add_node("first_model", first_model) 45 | builder.add_node("model", model_node) 46 | builder.add_node("tools", ToolNode(tools)) 47 | builder.add_edge(START, "first_model") 48 | builder.add_edge("first_model", "tools") 49 | builder.add_conditional_edges("model", tools_condition) 50 | builder.add_edge("tools", "model") 51 | 52 | graph = builder.compile() 53 | 54 | # Example usage 55 | input = { 56 | "messages": [ 57 | HumanMessage( 58 | "How old was the 30th president of the United States when he died?" 59 | ) 60 | ] 61 | } 62 | 63 | for c in graph.stream(input): 64 | print(c) 65 | -------------------------------------------------------------------------------- /ch6/py/c-many-tools.py: -------------------------------------------------------------------------------- 1 | import ast 2 | from typing import Annotated, TypedDict 3 | 4 | from langchain_community.tools import DuckDuckGoSearchRun 5 | from langchain_core.documents import Document 6 | from langchain_core.messages import HumanMessage 7 | from langchain_core.tools import tool 8 | from langchain_core.vectorstores.in_memory import InMemoryVectorStore 9 | from langchain_openai import ChatOpenAI, OpenAIEmbeddings 10 | 11 | from langgraph.graph import START, StateGraph 12 | from langgraph.graph.message import add_messages 13 | from langgraph.prebuilt import ToolNode, tools_condition 14 | 15 | 16 | @tool 17 | def calculator(query: str) -> str: 18 | """A simple calculator tool. Input should be a mathematical expression.""" 19 | return ast.literal_eval(query) 20 | 21 | 22 | search = DuckDuckGoSearchRun() 23 | tools = [search, calculator] 24 | 25 | embeddings = OpenAIEmbeddings() 26 | model = ChatOpenAI(temperature=0.1) 27 | 28 | tools_retriever = InMemoryVectorStore.from_documents( 29 | [Document(tool.description, metadata={"name": tool.name}) for tool in tools], 30 | embeddings, 31 | ).as_retriever() 32 | 33 | 34 | class State(TypedDict): 35 | messages: Annotated[list, add_messages] 36 | selected_tools: list[str] 37 | 38 | 39 | def model_node(state: State) -> State: 40 | selected_tools = [tool for tool in tools if tool.name in state["selected_tools"]] 41 | res = model.bind_tools(selected_tools).invoke(state["messages"]) 42 | return {"messages": res} 43 | 44 | 45 | def select_tools(state: State) -> State: 46 | query = state["messages"][-1].content 47 | tool_docs = tools_retriever.invoke(query) 48 | return {"selected_tools": [doc.metadata["name"] for doc in tool_docs]} 49 | 50 | 51 | builder = StateGraph(State) 52 | builder.add_node("select_tools", select_tools) 53 | builder.add_node("model", model_node) 54 | builder.add_node("tools", ToolNode(tools)) 55 | builder.add_edge(START, "select_tools") 56 | builder.add_edge("select_tools", "model") 57 | builder.add_conditional_edges("model", tools_condition) 58 | builder.add_edge("tools", "model") 59 | 60 | graph = builder.compile() 61 | 62 | # Example usage 63 | input = { 64 | "messages": [ 65 | HumanMessage( 66 | "How old was the 30th president of the United States when he died?" 67 | ) 68 | ] 69 | } 70 | 71 | for c in graph.stream(input): 72 | print(c) 73 | -------------------------------------------------------------------------------- /ch7/js/a-reflection.js: -------------------------------------------------------------------------------- 1 | import { 2 | AIMessage, 3 | SystemMessage, 4 | HumanMessage, 5 | } from '@langchain/core/messages'; 6 | import { ChatOpenAI } from '@langchain/openai'; 7 | import { 8 | StateGraph, 9 | Annotation, 10 | messagesStateReducer, 11 | START, 12 | END, 13 | } from '@langchain/langgraph'; 14 | 15 | const model = new ChatOpenAI(); 16 | 17 | const annotation = Annotation.Root({ 18 | messages: Annotation({ reducer: messagesStateReducer, default: () => [] }), 19 | }); 20 | 21 | const generatePrompt = new SystemMessage( 22 | `You are an essay assistant tasked with writing excellent 3-paragraph essays. 23 | Generate the best essay possible for the user's request. 24 | If the user provides critique, respond with a revised version of your previous attempts.` 25 | ); 26 | 27 | async function generate(state) { 28 | const answer = await model.invoke([generatePrompt, ...state.messages]); 29 | return { messages: [answer] }; 30 | } 31 | 32 | const reflectionPrompt = new SystemMessage( 33 | `You are a teacher grading an essay submission. Generate critique and recommendations for the user's submission. 34 | Provide detailed recommendations, including requests for length, depth, style, etc.` 35 | ); 36 | 37 | async function reflect(state) { 38 | // Invert the messages to get the LLM to reflect on its own output 39 | const clsMap = { 40 | ai: HumanMessage, 41 | human: AIMessage, 42 | }; 43 | // First message is the original user request. We hold it the same for all nodes 44 | const translated = [ 45 | reflectionPrompt, 46 | state.messages[0], 47 | ...state.messages 48 | .slice(1) 49 | .map((msg) => new clsMap[msg._getType()](msg.content)), 50 | ]; 51 | const answer = await model.invoke(translated); 52 | // We treat the output of this as human feedback for the generator 53 | return { messages: [new HumanMessage({ content: answer.content })] }; 54 | } 55 | 56 | function shouldContinue(state) { 57 | if (state.messages.length > 6) { 58 | // End after 3 iterations, each with 2 messages 59 | return END; 60 | } else { 61 | return 'reflect'; 62 | } 63 | } 64 | 65 | const builder = new StateGraph(annotation) 66 | .addNode('generate', generate) 67 | .addNode('reflect', reflect) 68 | .addEdge(START, 'generate') 69 | .addConditionalEdges('generate', shouldContinue) 70 | .addEdge('reflect', 'generate'); 71 | 72 | const graph = builder.compile(); 73 | 74 | // Example usage 75 | const initialState = { 76 | messages: [ 77 | new HumanMessage( 78 | "Write an essay about the relevance of 'The Little Prince' today." 79 | ), 80 | ], 81 | }; 82 | 83 | for await (const output of await graph.stream(initialState)) { 84 | const messageType = output.generate ? 'generate' : 'reflect'; 85 | console.log( 86 | '\nNew message:', 87 | output[messageType].messages[ 88 | output[messageType].messages.length - 1 89 | ].content.slice(0, 100), 90 | '...' 91 | ); 92 | } 93 | -------------------------------------------------------------------------------- /ch7/js/b-subgraph-direct.js: -------------------------------------------------------------------------------- 1 | import { StateGraph, START, Annotation } from '@langchain/langgraph'; 2 | 3 | const StateAnnotation = Annotation.Root({ 4 | foo: Annotation(), // string type 5 | }); 6 | 7 | const SubgraphStateAnnotation = Annotation.Root({ 8 | foo: Annotation(), // shared with parent graph state 9 | bar: Annotation(), 10 | }); 11 | 12 | // Define subgraph 13 | const subgraphNode = async (state) => { 14 | // note that this subgraph node can communicate with 15 | // the parent graph via the shared "foo" key 16 | return { foo: state.foo + 'bar' }; 17 | }; 18 | 19 | const subgraph = new StateGraph(SubgraphStateAnnotation) 20 | .addNode('subgraph', subgraphNode) 21 | .addEdge(START, 'subgraph') 22 | // Additional subgraph setup would go here 23 | .compile(); 24 | 25 | // Define parent graph 26 | const parentGraph = new StateGraph(StateAnnotation) 27 | .addNode('subgraph', subgraph) 28 | .addEdge(START, 'subgraph') 29 | // Additional parent graph setup would go here 30 | .compile(); 31 | 32 | // Example usage 33 | const initialState = { foo: 'hello' }; 34 | const result = await parentGraph.invoke(initialState); 35 | console.log(`Result: ${JSON.stringify(result)}`); // Should append "bar" to the foo value 36 | -------------------------------------------------------------------------------- /ch7/js/c-subgraph-function.js: -------------------------------------------------------------------------------- 1 | import { StateGraph, START, Annotation } from '@langchain/langgraph'; 2 | 3 | const StateAnnotation = Annotation.Root({ 4 | foo: Annotation(), 5 | }); 6 | 7 | const SubgraphStateAnnotation = Annotation.Root({ 8 | // note that none of these keys are shared with the parent graph state 9 | bar: Annotation(), 10 | baz: Annotation(), 11 | }); 12 | 13 | // Define subgraph 14 | const subgraphNode = async (state) => { 15 | return { bar: state.bar + 'baz' }; 16 | }; 17 | 18 | const subgraph = new StateGraph(SubgraphStateAnnotation) 19 | .addNode('subgraph', subgraphNode) 20 | .addEdge(START, 'subgraph') 21 | // Additional subgraph setup would go here 22 | .compile(); 23 | 24 | // Define parent graph 25 | const subgraphWrapperNode = async (state) => { 26 | // transform the state to the subgraph state 27 | const response = await subgraph.invoke({ 28 | bar: state.foo, 29 | }); 30 | // transform response back to the parent state 31 | return { 32 | foo: response.bar, 33 | }; 34 | }; 35 | 36 | const parentGraph = new StateGraph(StateAnnotation) 37 | .addNode('subgraph', subgraphWrapperNode) 38 | .addEdge(START, 'subgraph') 39 | // Additional parent graph setup would go here 40 | .compile(); 41 | 42 | // Example usage 43 | 44 | const initialState = { foo: 'hello' }; 45 | const result = await parentGraph.invoke(initialState); 46 | console.log(`Result: ${JSON.stringify(result)}`); // Should transform foo->bar, append "baz", then transform bar->foo 47 | -------------------------------------------------------------------------------- /ch7/js/d-supervisor.js: -------------------------------------------------------------------------------- 1 | import { ChatOpenAI } from "langchain-openai"; 2 | import { 3 | StateGraph, 4 | Annotation, 5 | MessagesAnnotation, 6 | START, 7 | END, 8 | } from "@langchain/langgraph"; 9 | import { z } from "zod"; 10 | 11 | // Define decision schema 12 | const SupervisorDecision = z.object({ 13 | next: z.enum(["researcher", "coder", "FINISH"]), 14 | }); 15 | 16 | // Initialize model 17 | const model = new ChatOpenAI({ model: "gpt-4", temperature: 0 }); 18 | const modelWithStructuredOutput = 19 | model.withStructuredOutput(SupervisorDecision); 20 | 21 | // Define available agents 22 | const agents = ["researcher", "coder"]; 23 | 24 | // Define system prompts 25 | const systemPromptPart1 = `You are a supervisor tasked with managing a conversation between the following workers: ${agents.join( 26 | ", ", 27 | )}. Given the following user request, respond with the worker to act next. Each worker will perform a task and respond with their results and status. When finished, respond with FINISH.`; 28 | 29 | const systemPromptPart2 = `Given the conversation above, who should act next? Or should we FINISH? Select one of: ${agents.join( 30 | ", ", 31 | )}, FINISH`; 32 | 33 | // Define supervisor 34 | const supervisor = async (state) => { 35 | const messages = [ 36 | { role: "system", content: systemPromptPart1 }, 37 | ...state.messages, 38 | { role: "system", content: systemPromptPart2 }, 39 | ]; 40 | 41 | return await modelWithStructuredOutput.invoke({ messages }); 42 | }; 43 | 44 | // Define state type 45 | const StateAnnotation = Annotation.Root({ 46 | ...MessagesAnnotation.spec, 47 | next: Annotation("researcher" | "coder" | "FINISH"), 48 | }); 49 | 50 | // Define agent functions 51 | const researcher = async (state) => { 52 | const response = await model.invoke([ 53 | { 54 | role: "system", 55 | content: 56 | "You are a research assistant. Analyze the request and provide relevant information.", 57 | }, 58 | state.messages[0], 59 | ]); 60 | return { messages: [response] }; 61 | }; 62 | 63 | const coder = async (state) => { 64 | const response = await model.invoke([ 65 | { 66 | role: "system", 67 | content: 68 | "You are a coding assistant. Implement the requested functionality.", 69 | }, 70 | state.messages[0], 71 | ]); 72 | return { messages: [response] }; 73 | }; 74 | 75 | // Build the graph 76 | const graph = new StateGraph(StateAnnotation) 77 | .addNode("supervisor", supervisor) 78 | .addNode("researcher", researcher) 79 | .addNode("coder", coder) 80 | .addEdge(START, "supervisor") 81 | // Route to one of the agents or exit based on the supervisor's decision 82 | .addConditionalEdges("supervisor", async (state) => 83 | state.next === "FINISH" ? END : state.next, 84 | ) 85 | .addEdge("researcher", "supervisor") 86 | .addEdge("coder", "supervisor") 87 | .compile(); 88 | 89 | // Example usage 90 | 91 | const initialState = { 92 | messages: [ 93 | { 94 | role: "user", 95 | content: "I need help analyzing some data and creating a visualization.", 96 | }, 97 | ], 98 | next: "supervisor", 99 | }; 100 | 101 | for await (const output of graph.stream(initialState)) { 102 | console.log(`\nStep decision: ${output.next || "N/A"}`); 103 | if (output.messages) { 104 | console.log( 105 | `Response: ${output.messages[output.messages.length - 1].content.slice( 106 | 0, 107 | 100, 108 | )}...`, 109 | ); 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /ch7/py/a-reflection.py: -------------------------------------------------------------------------------- 1 | from typing import Annotated, TypedDict 2 | 3 | from langchain_core.messages import ( 4 | AIMessage, 5 | BaseMessage, 6 | HumanMessage, 7 | SystemMessage, 8 | ) 9 | from langchain_openai import ChatOpenAI 10 | from langgraph.graph import END, START, StateGraph 11 | from langgraph.graph.message import add_messages 12 | 13 | # Initialize chat model 14 | model = ChatOpenAI() 15 | 16 | 17 | # Define state type 18 | class State(TypedDict): 19 | messages: Annotated[list[BaseMessage], add_messages] 20 | 21 | 22 | # Define prompts 23 | generate_prompt = SystemMessage( 24 | "You are an essay assistant tasked with writing excellent 3-paragraph essays." 25 | " Generate the best essay possible for the user's request." 26 | " If the user provides critique, respond with a revised version of your previous attempts." 27 | ) 28 | 29 | reflection_prompt = SystemMessage( 30 | "You are a teacher grading an essay submission. Generate critique and recommendations for the user's submission." 31 | " Provide detailed recommendations, including requests for length, depth, style, etc." 32 | ) 33 | 34 | 35 | def generate(state: State) -> State: 36 | answer = model.invoke([generate_prompt] + state["messages"]) 37 | return {"messages": [answer]} 38 | 39 | 40 | def reflect(state: State) -> State: 41 | # Invert the messages to get the LLM to reflect on its own output 42 | cls_map = {AIMessage: HumanMessage, HumanMessage: AIMessage} 43 | # First message is the original user request. We hold it the same for all nodes 44 | translated = [reflection_prompt, state["messages"][0]] + [ 45 | cls_map[msg.__class__](content=msg.content) for msg in state["messages"][1:] 46 | ] 47 | answer = model.invoke(translated) 48 | # We treat the output of this as human feedback for the generator 49 | return {"messages": [HumanMessage(content=answer.content)]} 50 | 51 | 52 | def should_continue(state: State): 53 | if len(state["messages"]) > 6: 54 | # End after 3 iterations, each with 2 messages 55 | return END 56 | else: 57 | return "reflect" 58 | 59 | 60 | # Build the graph 61 | builder = StateGraph(State) 62 | builder.add_node("generate", generate) 63 | builder.add_node("reflect", reflect) 64 | builder.add_edge(START, "generate") 65 | builder.add_conditional_edges("generate", should_continue) 66 | builder.add_edge("reflect", "generate") 67 | 68 | graph = builder.compile() 69 | 70 | # Example usage 71 | initial_state = { 72 | "messages": [ 73 | HumanMessage( 74 | content="Write an essay about the relevance of 'The Little Prince' today." 75 | ) 76 | ] 77 | } 78 | 79 | # Run the graph 80 | for output in graph.stream(initial_state): 81 | message_type = "generate" if "generate" in output else "reflect" 82 | print("\nNew message:", output[message_type] 83 | ["messages"][-1].content[:100], "...") 84 | -------------------------------------------------------------------------------- /ch7/py/b-subgraph-direct.py: -------------------------------------------------------------------------------- 1 | from typing import TypedDict 2 | 3 | from langgraph.graph import START, StateGraph 4 | 5 | 6 | # Define the state types for parent and subgraph 7 | class State(TypedDict): 8 | foo: str # this key is shared with the subgraph 9 | 10 | 11 | class SubgraphState(TypedDict): 12 | foo: str # this key is shared with the parent graph 13 | bar: str 14 | 15 | 16 | # Define subgraph 17 | def subgraph_node(state: SubgraphState): 18 | # note that this subgraph node can communicate with the parent graph via the shared "foo" key 19 | return {"foo": state["foo"] + "bar"} 20 | 21 | 22 | subgraph_builder = StateGraph(SubgraphState) 23 | subgraph_builder.add_node("subgraph_node", subgraph_node) 24 | # Additional subgraph setup would go here 25 | subgraph = subgraph_builder.compile() 26 | 27 | # Define parent graph 28 | builder = StateGraph(State) 29 | builder.add_node("subgraph", subgraph) 30 | builder.add_edge(START, "subgraph") 31 | # Additional parent graph setup would go here 32 | graph = builder.compile() 33 | 34 | # Example usage 35 | initial_state = {"foo": "hello"} 36 | result = graph.invoke(initial_state) 37 | print(f"Result: {result}") # Should append "bar" to the foo value 38 | -------------------------------------------------------------------------------- /ch7/py/c-subgraph-function.py: -------------------------------------------------------------------------------- 1 | from typing import TypedDict 2 | from langgraph.graph import START, StateGraph 3 | 4 | 5 | class State(TypedDict): 6 | foo: str 7 | 8 | 9 | class SubgraphState(TypedDict): 10 | # none of these keys are shared with the parent graph state 11 | bar: str 12 | baz: str 13 | 14 | 15 | # Define subgraph 16 | def subgraph_node(state: SubgraphState): 17 | return {"bar": state["bar"] + "baz"} 18 | 19 | 20 | subgraph_builder = StateGraph(SubgraphState) 21 | subgraph_builder.add_node("subgraph_node", subgraph_node) 22 | subgraph_builder.add_edge(START, "subgraph_node") 23 | # Additional subgraph setup would go here 24 | subgraph = subgraph_builder.compile() 25 | 26 | 27 | # Define parent graph node that invokes subgraph 28 | def node(state: State): 29 | # transform the state to the subgraph state 30 | response = subgraph.invoke({"bar": state["foo"]}) 31 | # transform response back to the parent state 32 | return {"foo": response["bar"]} 33 | 34 | 35 | builder = StateGraph(State) 36 | # note that we are using `node` function instead of a compiled subgraph 37 | builder.add_node("node", node) 38 | builder.add_edge(START, "node") 39 | # Additional parent graph setup would go here 40 | graph = builder.compile() 41 | 42 | # Example usage 43 | initial_state = {"foo": "hello"} 44 | result = graph.invoke(initial_state) 45 | print( 46 | f"Result: {result}" 47 | ) # Should transform foo->bar, append "baz", then transform bar->foo 48 | -------------------------------------------------------------------------------- /ch7/py/d-supervisor.py: -------------------------------------------------------------------------------- 1 | from typing import Literal 2 | 3 | from langchain_openai import ChatOpenAI 4 | from langgraph.graph import StateGraph, MessagesState, START 5 | from pydantic import BaseModel 6 | 7 | 8 | class SupervisorDecision(BaseModel): 9 | next: Literal["researcher", "coder", "FINISH"] 10 | 11 | 12 | # Initialize model 13 | model = ChatOpenAI(model="gpt-4", temperature=0) 14 | model = model.with_structured_output(SupervisorDecision) 15 | 16 | # Define available agents 17 | agents = ["researcher", "coder"] 18 | 19 | # Define system prompts 20 | system_prompt_part_1 = f"""You are a supervisor tasked with managing a conversation between the 21 | following workers: {agents}. Given the following user request, 22 | respond with the worker to act next. Each worker will perform a 23 | task and respond with their results and status. When finished, 24 | respond with FINISH.""" 25 | 26 | system_prompt_part_2 = f"""Given the conversation above, who should act next? Or should we FINISH? Select one of: {", ".join(agents)}, FINISH""" 27 | 28 | 29 | def supervisor(state): 30 | messages = [ 31 | ("system", system_prompt_part_1), 32 | *state["messages"], 33 | ("system", system_prompt_part_2), 34 | ] 35 | return model.invoke(messages) 36 | 37 | 38 | # Define agent state 39 | class AgentState(MessagesState): 40 | next: Literal["researcher", "coder", "FINISH"] 41 | 42 | 43 | # Define agent functions 44 | def researcher(state: AgentState): 45 | # In a real implementation, this would do research tasks 46 | response = model.invoke( 47 | [ 48 | { 49 | "role": "system", 50 | "content": "You are a research assistant. Analyze the request and provide relevant information.", 51 | }, 52 | {"role": "user", "content": state["messages"][0].content}, 53 | ] 54 | ) 55 | return {"messages": [response]} 56 | 57 | 58 | def coder(state: AgentState): 59 | # In a real implementation, this would write code 60 | response = model.invoke( 61 | [ 62 | { 63 | "role": "system", 64 | "content": "You are a coding assistant. Implement the requested functionality.", 65 | }, 66 | {"role": "user", "content": state["messages"][0].content}, 67 | ] 68 | ) 69 | return {"messages": [response]} 70 | 71 | 72 | # Build the graph 73 | builder = StateGraph(AgentState) 74 | builder.add_node("supervisor", supervisor) 75 | builder.add_node("researcher", researcher) 76 | builder.add_node("coder", coder) 77 | 78 | builder.add_edge(START, "supervisor") 79 | # Route to one of the agents or exit based on the supervisor's decision 80 | builder.add_conditional_edges("supervisor", lambda state: state["next"]) 81 | builder.add_edge("researcher", "supervisor") 82 | builder.add_edge("coder", "supervisor") 83 | 84 | graph = builder.compile() 85 | 86 | # Example usage 87 | initial_state = { 88 | "messages": [ 89 | { 90 | "role": "user", 91 | "content": "I need help analyzing some data and creating a visualization.", 92 | } 93 | ], 94 | "next": "supervisor", 95 | } 96 | 97 | for output in graph.stream(initial_state): 98 | print(f"\nStep decision: {output.get('next', 'N/A')}") 99 | if output.get("messages"): 100 | print(f"Response: {output['messages'][-1].content[:100]}...") 101 | -------------------------------------------------------------------------------- /ch8/js/a-structured-output.js: -------------------------------------------------------------------------------- 1 | import { z } from "zod"; 2 | import { ChatOpenAI } from "@langchain/openai"; 3 | 4 | const joke = z.object({ 5 | setup: z.string().describe("The setup of the joke"), 6 | punchline: z.string().describe("The punchline to the joke"), 7 | }); 8 | 9 | let model = new ChatOpenAI({ 10 | model: "gpt-3.5-turbo-0125", 11 | temperature: 0, 12 | }); 13 | 14 | model = model.withStructuredOutput(joke); 15 | 16 | const result = await model.invoke("Tell me a joke about cats"); 17 | console.log(result); 18 | -------------------------------------------------------------------------------- /ch8/js/b-streaming-output.js: -------------------------------------------------------------------------------- 1 | import { HumanMessage } from "@langchain/core/messages"; 2 | 3 | // Assuming graph is already created and configured 4 | const graph = new StateGraph().compile(); 5 | 6 | const input = { 7 | messages: [ 8 | new HumanMessage( 9 | "How old was the 30th president of the United States when he died?", 10 | ), 11 | ], 12 | }; 13 | 14 | const config = { configurable: { thread_id: "1" } }; 15 | 16 | // Assuming graph is already created and configured 17 | const output = await graph.stream(input, config); 18 | 19 | for await (const chunk of output) { 20 | console.log(chunk); 21 | } 22 | -------------------------------------------------------------------------------- /ch8/js/c-interrupt.js: -------------------------------------------------------------------------------- 1 | import { HumanMessage } from "@langchain/core/messages"; 2 | import { MemorySaver } from "@langchain/langgraph"; 3 | 4 | const controller = new AbortController(); 5 | 6 | const input = { 7 | messages: [ 8 | new HumanMessage( 9 | "How old was the 30th president of the United States when he died?", 10 | ), 11 | ], 12 | }; 13 | 14 | const config = { configurable: { thread_id: "1" } }; 15 | 16 | // Assuming graph is already created and configured 17 | const graph = new StateGraph().compile({ checkpointer: new MemorySaver() }); 18 | 19 | // Simulate interruption after 2 seconds 20 | setTimeout(() => { 21 | controller.abort(); 22 | }, 2000); 23 | 24 | try { 25 | const output = await graph.stream(input, { 26 | ...config, 27 | signal: controller.signal, 28 | }); 29 | 30 | for await (const chunk of output) { 31 | console.log(chunk); // do something with the output 32 | } 33 | } catch (e) { 34 | console.log(e); 35 | } 36 | -------------------------------------------------------------------------------- /ch8/js/d-authorize.js: -------------------------------------------------------------------------------- 1 | import { HumanMessage } from "@langchain/core/messages"; 2 | import { MemorySaver } from "@langchain/langgraph"; 3 | 4 | // Assuming graph is already created and configured 5 | const graph = new StateGraph().compile({ checkpointer: new MemorySaver() }); 6 | 7 | const input = { 8 | messages: [ 9 | new HumanMessage( 10 | "How old was the 30th president of the United States when he died?", 11 | ), 12 | ], 13 | }; 14 | 15 | const config = { configurable: { thread_id: "1" } }; 16 | 17 | const output = await graph.stream(input, { 18 | ...config, 19 | interruptBefore: ["tools"], 20 | }); 21 | 22 | for await (const chunk of output) { 23 | console.log(chunk); // do something with the output 24 | } 25 | -------------------------------------------------------------------------------- /ch8/js/e-resume.js: -------------------------------------------------------------------------------- 1 | import { MemorySaver } from "@langchain/langgraph"; 2 | 3 | // Assuming graph is already created and configured 4 | const graph = new StateGraph().compile({ checkpointer: new MemorySaver() }); 5 | 6 | const config = { configurable: { thread_id: "1" } }; 7 | 8 | const output = await graph.stream(null, { 9 | ...config, 10 | interruptBefore: ["tools"], 11 | }); 12 | 13 | for await (const chunk of output) { 14 | console.log(chunk); // do something with the output 15 | } 16 | -------------------------------------------------------------------------------- /ch8/js/f-edit-state.js: -------------------------------------------------------------------------------- 1 | import { MemorySaver } from "@langchain/langgraph"; 2 | 3 | // Assuming graph is already created and configured 4 | const graph = new StateGraph().compile({ checkpointer: new MemorySaver() }); 5 | 6 | const config = { configurable: { thread_id: "1" } }; 7 | 8 | const state = await graph.getState(config); 9 | console.log("Current state:", state); 10 | 11 | // something you want to add or replace 12 | const update = {}; 13 | 14 | await graph.updateState(config, update); 15 | console.log("State updated"); 16 | -------------------------------------------------------------------------------- /ch8/js/g-fork.js: -------------------------------------------------------------------------------- 1 | import { MemorySaver } from "@langchain/langgraph"; 2 | 3 | // Assuming graph is already created and configured 4 | const graph = new StateGraph().compile({ checkpointer: new MemorySaver() }); 5 | 6 | const config = { configurable: { thread_id: "1" } }; 7 | 8 | const history = await Array.fromAsync(graph.getStateHistory(config)); 9 | console.log("History states:", history.length); 10 | 11 | // replay a past state 12 | if (history.length >= 3) { 13 | const result = await graph.invoke(null, history[2].config); 14 | console.log("Replayed state result:", result); 15 | } 16 | -------------------------------------------------------------------------------- /ch8/py/a-structured-output.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel, Field 2 | from langchain_openai import ChatOpenAI 3 | 4 | 5 | class Joke(BaseModel): 6 | setup: str = Field(description="The setup of the joke") 7 | punchline: str = Field(description="The punchline to the joke") 8 | 9 | 10 | model = ChatOpenAI(model="gpt-4o", temperature=0) 11 | model = model.with_structured_output(Joke) 12 | 13 | result = model.invoke("Tell me a joke about cats") 14 | print(result) 15 | -------------------------------------------------------------------------------- /ch8/py/b-streaming-output.py: -------------------------------------------------------------------------------- 1 | from langchain_core.messages import HumanMessage 2 | from langgraph.graph import StateGraph 3 | 4 | 5 | def create_simple_graph(): 6 | # Create a simple graph for demonstration 7 | builder = StateGraph() 8 | # Add nodes and edges as needed 9 | return builder.compile() 10 | 11 | 12 | graph = create_simple_graph() 13 | 14 | input = { 15 | "messages": [ 16 | HumanMessage( 17 | "How old was the 30th president of the United States when he died?" 18 | ) 19 | ] 20 | } 21 | 22 | for c in graph.stream(input, stream_mode="updates"): 23 | print(c) 24 | -------------------------------------------------------------------------------- /ch8/py/c-interrupt.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from contextlib import aclosing 3 | 4 | from langchain.schema import HumanMessage 5 | from langgraph.graph import StateGraph 6 | from langgraph.checkpoint.memory import MemorySaver 7 | 8 | 9 | async def main(): 10 | # Create a simple graph 11 | builder = StateGraph() 12 | # Add nodes and edges as needed 13 | graph = builder.compile(checkpointer=MemorySaver()) 14 | 15 | event = asyncio.Event() 16 | 17 | input = { 18 | "messages": [ 19 | HumanMessage( 20 | "How old was the 30th president of the United States when he died?" 21 | ) 22 | ] 23 | } 24 | 25 | config = {"configurable": {"thread_id": "1"}} 26 | 27 | async with aclosing(graph.astream(input, config)) as stream: 28 | async for chunk in stream: 29 | if event.is_set(): 30 | break 31 | else: 32 | print(chunk) # do something with the output 33 | 34 | # Simulate interruption after 2 seconds 35 | await asyncio.sleep(2) 36 | event.set() 37 | 38 | 39 | if __name__ == "__main__": 40 | asyncio.run(main()) 41 | -------------------------------------------------------------------------------- /ch8/py/d-authorize.py: -------------------------------------------------------------------------------- 1 | from langchain.schema import HumanMessage 2 | from langgraph.graph import StateGraph 3 | from langgraph.checkpoint.memory import MemorySaver 4 | 5 | 6 | async def main(): 7 | # Create a simple graph 8 | builder = StateGraph() 9 | # Add nodes and edges as needed 10 | graph = builder.compile(checkpointer=MemorySaver()) 11 | 12 | input = { 13 | "messages": [ 14 | HumanMessage( 15 | "How old was the 30th president of the United States when he died?" 16 | ) 17 | ] 18 | } 19 | 20 | config = {"configurable": {"thread_id": "1"}} 21 | 22 | output = graph.astream(input, config, interrupt_before=["tools"]) 23 | 24 | async for c in output: 25 | print(c) # do something with the output 26 | 27 | 28 | if __name__ == "__main__": 29 | import asyncio 30 | 31 | asyncio.run(main()) 32 | -------------------------------------------------------------------------------- /ch8/py/e-resume.py: -------------------------------------------------------------------------------- 1 | from langgraph.graph import StateGraph 2 | from langgraph.checkpoint.memory import MemorySaver 3 | 4 | 5 | async def main(): 6 | # Create a simple graph 7 | builder = StateGraph() 8 | # Add nodes and edges as needed 9 | graph = builder.compile(checkpointer=MemorySaver()) 10 | 11 | config = {"configurable": {"thread_id": "1"}} 12 | 13 | output = graph.astream(None, config, interrupt_before=["tools"]) 14 | 15 | async for c in output: 16 | print(c) # do something with the output 17 | 18 | 19 | if __name__ == "__main__": 20 | import asyncio 21 | 22 | asyncio.run(main()) 23 | -------------------------------------------------------------------------------- /ch8/py/f-edit-state.py: -------------------------------------------------------------------------------- 1 | from langgraph.graph import StateGraph 2 | from langgraph.checkpoint.memory import MemorySaver 3 | 4 | 5 | def main(): 6 | # Create a simple graph 7 | builder = StateGraph() 8 | # Add nodes and edges as needed 9 | graph = builder.compile(checkpointer=MemorySaver()) 10 | 11 | config = {"configurable": {"thread_id": "1"}} 12 | 13 | state = graph.get_state(config) 14 | print("Current state:", state) 15 | 16 | # something you want to add or replace 17 | update = {} 18 | 19 | graph.update_state(config, update) 20 | print("State updated") 21 | 22 | 23 | if __name__ == "__main__": 24 | main() 25 | -------------------------------------------------------------------------------- /ch8/py/g-fork.py: -------------------------------------------------------------------------------- 1 | from langgraph.graph import StateGraph 2 | from langgraph.checkpoint.memory import MemorySaver 3 | 4 | 5 | def main(): 6 | # Create a simple graph 7 | builder = StateGraph() 8 | # Add nodes and edges as needed 9 | graph = builder.compile(checkpointer=MemorySaver()) 10 | 11 | config = {"configurable": {"thread_id": "1"}} 12 | 13 | history = [state for state in graph.get_state_history(config)] 14 | 15 | print("History states:", len(history)) 16 | 17 | # replay a past state 18 | if len(history) >= 3: 19 | result = graph.invoke(None, history[2].config) 20 | print("Replayed state result:", result) 21 | 22 | 23 | if __name__ == "__main__": 24 | main() 25 | -------------------------------------------------------------------------------- /ch9/js/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # LangGraph API 3 | .langgraph_api 4 | -------------------------------------------------------------------------------- /ch9/js/demo.ts: -------------------------------------------------------------------------------- 1 | // demo of the compiled graph running using the sdk 2 | import { Client } from '@langchain/langgraph-sdk'; 3 | import { graph } from './src/retrieval_graph/graph.js'; 4 | import dotenv from 'dotenv'; 5 | 6 | // Load environment variables from .env file 7 | dotenv.config(); 8 | 9 | // Environment variables needed: 10 | // LANGGRAPH_API_URL: The URL where your LangGraph server is running 11 | // - For local development: http://localhost:2024 (or your local server port) 12 | // - For LangSmith cloud: https://api.smith.langchain.com 13 | // 14 | 15 | const assistant_id = 'retrieval_graph'; 16 | async function runDemo() { 17 | // Initialize the LangGraph client 18 | const client = new Client({ 19 | apiUrl: process.env.LANGGRAPH_API_URL || 'http://localhost:2024', 20 | }); 21 | 22 | // Create a new thread for this conversation 23 | console.log('Creating new thread...'); 24 | const thread = await client.threads.create({ 25 | metadata: { 26 | demo: 'retrieval-graph', 27 | }, 28 | }); 29 | console.log('Thread created with ID:', thread.thread_id); 30 | 31 | // Example question 32 | const question = 'What is this document about?'; 33 | 34 | console.log('\n=== Streaming Example ==='); 35 | console.log('Question:', question); 36 | 37 | // Run the graph with streaming 38 | try { 39 | console.log('\nStarting stream...'); 40 | const stream = await client.runs.stream(thread.thread_id, assistant_id, { 41 | input: { query: question }, 42 | streamMode: ['values', 'messages', 'updates'], // Include all stream types 43 | }); 44 | 45 | // Process the stream chunks 46 | console.log('\nWaiting for stream chunks...'); 47 | for await (const chunk of stream) { 48 | console.log('\nReceived chunk:'); 49 | console.log('Event type:', chunk.event); 50 | if (chunk.event === 'values') { 51 | console.log('Values data:', JSON.stringify(chunk.data, null, 2)); 52 | } else if (chunk.event === 'messages/partial') { 53 | console.log('Messages data:', JSON.stringify(chunk, null, 2)); 54 | } else if (chunk.event === 'updates') { 55 | console.log('Update data:', JSON.stringify(chunk.data, null, 2)); 56 | } 57 | } 58 | console.log('\nStream completed.'); 59 | } catch (error) { 60 | console.error('Error in streaming run:', error); 61 | // Log more details about the error 62 | if (error instanceof Error) { 63 | console.error('Error message:', error.message); 64 | console.error('Error stack:', error.stack); 65 | } 66 | } 67 | } 68 | 69 | // Run the demo 70 | runDemo().catch((error) => { 71 | console.error('Fatal error:', error); 72 | process.exit(1); 73 | }); 74 | -------------------------------------------------------------------------------- /ch9/js/langgraph.json: -------------------------------------------------------------------------------- 1 | { 2 | "node_version": "20", 3 | "graphs": { 4 | "ingestion_graph": "./src/ingestion_graph/graph.ts:graph", 5 | "retrieval_graph": "./src/retrieval_graph/graph.ts:graph" 6 | }, 7 | "env": "../../.env", 8 | "dependencies": ["."] 9 | } -------------------------------------------------------------------------------- /ch9/js/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "module", 3 | "dependencies": { 4 | "@langchain/community": "^0.3.26", 5 | "@langchain/core": "^0.3.33", 6 | "@langchain/langgraph": "^0.2.41", 7 | "@langchain/openai": "^0.3.17", 8 | "@supabase/supabase-js": "^2.44.0", 9 | "langchain": "^0.3.12", 10 | "pdf-parse": "^1.1.1", 11 | "chromadb": "^1.10.4" 12 | }, 13 | "devDependencies": { 14 | "@types/node": "^20.0.0", 15 | "typescript": "^5.0.0", 16 | "@types/pdf-parse": "^1.1.4" 17 | } 18 | } -------------------------------------------------------------------------------- /ch9/js/src/ingestion_graph/configuration.ts: -------------------------------------------------------------------------------- 1 | import { Annotation } from '@langchain/langgraph'; 2 | import { RunnableConfig } from '@langchain/core/runnables'; 3 | 4 | // This path points to the directory containing the documents to index. 5 | const DEFAULT_DOCS_PATH = 'src/sample_docs.json'; 6 | 7 | /** 8 | * The configuration for the indexing process. 9 | */ 10 | export const IndexConfigurationAnnotation = Annotation.Root({ 11 | /** 12 | * Path to folder containing default documents to index. 13 | */ 14 | docsPath: Annotation, 15 | 16 | /** 17 | * Name of the openai embedding model to use. Must be a valid embedding model name. 18 | */ 19 | embeddingModel: Annotation<'text-embedding-3-small'>, 20 | 21 | /** 22 | * The vector store provider to store the embeddings. 23 | * Options are 'supabase', 'chroma'. 24 | */ 25 | retrieverProvider: Annotation<'supabase' | 'chroma'>, 26 | 27 | /** 28 | * Whether to index sample documents specified in the docsPath. 29 | */ 30 | useSampleDocs: Annotation, 31 | }); 32 | 33 | /** 34 | * Create an typeof IndexConfigurationAnnotation.State instance from a RunnableConfig object. 35 | * 36 | * @param config - The configuration object to use. 37 | * @returns An instance of typeof IndexConfigurationAnnotation.State with the specified configuration. 38 | */ 39 | export function ensureIndexConfiguration( 40 | config: RunnableConfig 41 | ): typeof IndexConfigurationAnnotation.State { 42 | const configurable = (config?.configurable || {}) as Partial< 43 | typeof IndexConfigurationAnnotation.State 44 | >; 45 | return { 46 | docsPath: configurable.docsPath || DEFAULT_DOCS_PATH, 47 | embeddingModel: configurable.embeddingModel || 'text-embedding-3-small', 48 | retrieverProvider: configurable.retrieverProvider || 'supabase', 49 | useSampleDocs: configurable.useSampleDocs || false, 50 | }; 51 | } 52 | -------------------------------------------------------------------------------- /ch9/js/src/ingestion_graph/graph.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * This "graph" simply exposes an endpoint for a user to upload docs to be indexed. 3 | */ 4 | import path from 'path'; 5 | import fs from 'fs/promises'; 6 | import { RunnableConfig } from '@langchain/core/runnables'; 7 | import { StateGraph, END, START } from '@langchain/langgraph'; 8 | import { IndexStateAnnotation } from './state.js'; 9 | import { DirectoryLoader } from 'langchain/document_loaders/fs/directory'; 10 | import { 11 | ensureIndexConfiguration, 12 | IndexConfigurationAnnotation, 13 | } from './configuration.js'; 14 | import { makeRetriever } from '../shared/retrieval.js'; 15 | import { reduceDocs } from '../shared/state.js'; 16 | 17 | async function ingestDocs( 18 | state: typeof IndexStateAnnotation.State, 19 | config?: RunnableConfig 20 | ): Promise { 21 | if (!config) { 22 | throw new Error('Configuration required to run index_docs.'); 23 | } 24 | 25 | const configuration = ensureIndexConfiguration(config); 26 | let docs = state.docs; 27 | 28 | if (!docs || docs.length === 0) { 29 | if (configuration.useSampleDocs) { 30 | const fileContent = await fs.readFile(configuration.docsPath, 'utf-8'); 31 | const serializedDocs = JSON.parse(fileContent); 32 | docs = reduceDocs([], serializedDocs); 33 | } else { 34 | throw new Error('No sample documents to index.'); 35 | } 36 | } else { 37 | docs = reduceDocs([], docs); 38 | } 39 | 40 | const retriever = await makeRetriever(config); 41 | await retriever.addDocuments(docs); 42 | 43 | return { docs: 'delete' }; 44 | } 45 | 46 | // Define the graph 47 | const builder = new StateGraph( 48 | IndexStateAnnotation, 49 | IndexConfigurationAnnotation 50 | ) 51 | .addNode('ingestDocs', ingestDocs) 52 | .addEdge(START, 'ingestDocs') 53 | .addEdge('ingestDocs', END); 54 | 55 | // Compile into a graph object that you can invoke and deploy. 56 | export const graph = builder 57 | .compile() 58 | .withConfig({ runName: 'IngestionGraph' }); 59 | -------------------------------------------------------------------------------- /ch9/js/src/ingestion_graph/state.ts: -------------------------------------------------------------------------------- 1 | import { Annotation } from '@langchain/langgraph'; 2 | import { Document } from '@langchain/core/documents'; 3 | import { reduceDocs } from '../shared/state.js'; 4 | 5 | /** 6 | * Represents the state for document indexing and retrieval. 7 | * 8 | * This interface defines the structure of the index state, which includes 9 | * the documents to be indexed and the retriever used for searching 10 | * these documents. 11 | */ 12 | export const IndexStateAnnotation = Annotation.Root({ 13 | /** 14 | * A list of documents that the agent can index. 15 | */ 16 | docs: Annotation< 17 | Document[], 18 | Document[] | { [key: string]: any }[] | string[] | string | 'delete' 19 | >({ 20 | default: () => [], 21 | reducer: reduceDocs, 22 | }), 23 | }); 24 | 25 | export type IndexStateType = typeof IndexStateAnnotation.State; 26 | -------------------------------------------------------------------------------- /ch9/js/src/retrieval_graph/configuration.ts: -------------------------------------------------------------------------------- 1 | import { Annotation } from '@langchain/langgraph'; 2 | import { 3 | BaseConfigurationAnnotation, 4 | ensureBaseConfiguration, 5 | } from '../shared/configuration.js'; 6 | import { RunnableConfig } from '@langchain/core/runnables'; 7 | 8 | export const AgentConfigurationAnnotation = Annotation.Root({ 9 | ...BaseConfigurationAnnotation.spec, 10 | 11 | // models 12 | /** 13 | * The language model used for processing and refining queries. 14 | * Should be in the form: provider/model-name. 15 | */ 16 | queryModel: Annotation, 17 | }); 18 | 19 | /** 20 | * Create a typeof ConfigurationAnnotation.State instance from a RunnableConfig object. 21 | * 22 | * @param config - The configuration object to use. 23 | * @returns An instance of typeof ConfigurationAnnotation.State with the specified configuration. 24 | */ 25 | export function ensureAgentConfiguration( 26 | config: RunnableConfig 27 | ): typeof AgentConfigurationAnnotation.State { 28 | const configurable = (config?.configurable || {}) as Partial< 29 | typeof AgentConfigurationAnnotation.State 30 | >; 31 | const baseConfig = ensureBaseConfiguration(config); 32 | return { 33 | ...baseConfig, 34 | queryModel: configurable.queryModel || 'openai/gpt-4o', 35 | }; 36 | } 37 | -------------------------------------------------------------------------------- /ch9/js/src/retrieval_graph/state.ts: -------------------------------------------------------------------------------- 1 | import { Annotation, MessagesAnnotation } from '@langchain/langgraph'; 2 | import { reduceDocs } from '../shared/state.js'; 3 | import { Document } from '@langchain/core/documents'; 4 | /** 5 | * Represents the state of the retrieval graph / agent. 6 | */ 7 | export const AgentStateAnnotation = Annotation.Root({ 8 | query: Annotation(), 9 | route: Annotation(), 10 | ...MessagesAnnotation.spec, 11 | 12 | /** 13 | * Populated by the retriever. This is a list of documents that the agent can reference. 14 | * @type {Document[]} 15 | */ 16 | documents: Annotation< 17 | Document[], 18 | Document[] | { [key: string]: any }[] | string[] | string | 'delete' 19 | >({ 20 | default: () => [], 21 | // @ts-ignore 22 | reducer: reduceDocs, 23 | }), 24 | 25 | // Additional attributes can be added here as needed 26 | }); 27 | -------------------------------------------------------------------------------- /ch9/js/src/retrieval_graph/utils.ts: -------------------------------------------------------------------------------- 1 | import { Document } from '@langchain/core/documents'; 2 | 3 | export function formatDoc(doc: Document): string { 4 | const metadata = doc.metadata || {}; 5 | const meta = Object.entries(metadata) 6 | .map(([k, v]) => ` ${k}=${v}`) 7 | .join(''); 8 | const metaStr = meta ? ` ${meta}` : ''; 9 | 10 | return `\n${doc.pageContent}\n`; 11 | } 12 | 13 | export function formatDocs(docs?: Document[]): string { 14 | /**Format a list of documents as XML. */ 15 | if (!docs || docs.length === 0) { 16 | return ''; 17 | } 18 | const formatted = docs.map(formatDoc).join('\n'); 19 | return `\n${formatted}\n`; 20 | } 21 | -------------------------------------------------------------------------------- /ch9/js/src/shared/configuration.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Define the configurable parameters for the agent. 3 | */ 4 | 5 | import { Annotation } from '@langchain/langgraph'; 6 | import { RunnableConfig } from '@langchain/core/runnables'; 7 | 8 | /** 9 | * typeof ConfigurationAnnotation.State class for indexing and retrieval operations. 10 | * 11 | * @property embeddingModel - The name of the openai embedding model to use. 12 | * @property retrieverProvider - The vector store provider to use for retrieval. 13 | * @property filter - Optional filter criteria to limit the items retrieved based on the specified filter type. 14 | * @property k - The number of results to return from the retriever. 15 | */ 16 | 17 | export const BaseConfigurationAnnotation = Annotation.Root({ 18 | /** 19 | * Name of the openai embedding model to use. Must be a valid embedding model name. 20 | */ 21 | embeddingModel: Annotation<'text-embedding-3-small'>, 22 | 23 | /** 24 | * The vector store provider to use for retrieval. 25 | * Options are 'supabase', 'chroma'. 26 | */ 27 | retrieverProvider: Annotation<'supabase' | 'chroma'>, 28 | 29 | /** 30 | * Optional filter criteria to limit the items retrieved. 31 | * Can be any metadata object that matches document metadata structure. 32 | */ 33 | filter: Annotation | undefined>, 34 | 35 | /** 36 | * The number of results to return from the retriever. 37 | */ 38 | k: Annotation, 39 | }); 40 | 41 | /** 42 | * Create an typeof BaseConfigurationAnnotation.State instance from a RunnableConfig object. 43 | * 44 | * @param config - The configuration object to use. 45 | * @returns An instance of typeof BaseConfigurationAnnotation.State with the specified configuration. 46 | */ 47 | export function ensureBaseConfiguration( 48 | config: RunnableConfig 49 | ): typeof BaseConfigurationAnnotation.State { 50 | const configurable = (config?.configurable || {}) as Partial< 51 | typeof BaseConfigurationAnnotation.State 52 | >; 53 | return { 54 | embeddingModel: configurable.embeddingModel || 'text-embedding-3-small', 55 | retrieverProvider: configurable.retrieverProvider || 'supabase', 56 | filter: configurable.filter, 57 | k: configurable.k || 4, 58 | }; 59 | } 60 | -------------------------------------------------------------------------------- /ch9/js/src/shared/retrieval.ts: -------------------------------------------------------------------------------- 1 | import { VectorStoreRetriever } from '@langchain/core/vectorstores'; 2 | import { OpenAIEmbeddings } from '@langchain/openai'; 3 | import { SupabaseVectorStore } from '@langchain/community/vectorstores/supabase'; 4 | import { createClient } from '@supabase/supabase-js'; 5 | import { RunnableConfig } from '@langchain/core/runnables'; 6 | import { Embeddings } from '@langchain/core/embeddings'; 7 | import { ensureBaseConfiguration } from './configuration.js'; 8 | import { Chroma } from '@langchain/community/vectorstores/chroma'; 9 | 10 | export async function makeSupabaseRetriever( 11 | configuration: ReturnType, 12 | embeddingModel: Embeddings 13 | ): Promise { 14 | if (!process.env.SUPABASE_URL || !process.env.SUPABASE_SERVICE_ROLE_KEY) { 15 | throw new Error( 16 | 'SUPABASE_URL or SUPABASE_SERVICE_ROLE_KEY environment variables are not defined' 17 | ); 18 | } 19 | const supabaseClient = createClient( 20 | process.env.SUPABASE_URL ?? '', 21 | process.env.SUPABASE_SERVICE_ROLE_KEY ?? '' 22 | ); 23 | const vectorStore = new SupabaseVectorStore(embeddingModel, { 24 | client: supabaseClient, 25 | tableName: 'documents', 26 | queryName: 'match_documents', 27 | }); 28 | return vectorStore.asRetriever({ 29 | filter: configuration.filter, 30 | k: configuration.k, 31 | }); 32 | } 33 | 34 | export async function makeChromaRetriever( 35 | configuration: ReturnType, 36 | embeddingModel: Embeddings 37 | ) { 38 | const vectorStore = new Chroma(embeddingModel, { 39 | collectionName: 'documents', 40 | }); 41 | return vectorStore.asRetriever({ 42 | filter: configuration.filter, 43 | k: configuration.k, 44 | }); 45 | } 46 | 47 | export async function makeRetriever( 48 | config: RunnableConfig 49 | ): Promise { 50 | const configuration = ensureBaseConfiguration(config); 51 | const embeddingModel = new OpenAIEmbeddings({ 52 | model: configuration.embeddingModel, 53 | }); 54 | switch (configuration.retrieverProvider) { 55 | case 'supabase': 56 | return makeSupabaseRetriever(configuration, embeddingModel); 57 | case 'chroma': 58 | return makeChromaRetriever(configuration, embeddingModel); 59 | default: 60 | throw new Error( 61 | `Unrecognized retrieverProvider in configuration: ${configuration.retrieverProvider}` 62 | ); 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /ch9/js/src/shared/state.ts: -------------------------------------------------------------------------------- 1 | import { Document } from '@langchain/core/documents'; 2 | import { v4 as uuidv4 } from 'uuid'; 3 | 4 | /** 5 | * Reduces the document array based on the provided new documents or actions. 6 | * 7 | * @param existing - The existing array of documents. 8 | * @param newDocs - The new documents or actions to apply. 9 | * @returns The updated array of documents. 10 | */ 11 | export function reduceDocs( 12 | existing?: Document[], 13 | newDocs?: Document[] | { [key: string]: any }[] | string[] | string | 'delete' 14 | ): Document[] { 15 | if (newDocs === 'delete') { 16 | return []; 17 | } 18 | 19 | const existingList = existing || []; 20 | const existingIds = new Set(existingList.map((doc) => doc.metadata?.uuid)); 21 | 22 | if (typeof newDocs === 'string') { 23 | const docId = uuidv4(); 24 | return [ 25 | ...existingList, 26 | { pageContent: newDocs, metadata: { uuid: docId } }, 27 | ]; 28 | } 29 | 30 | const newList: Document[] = []; 31 | if (Array.isArray(newDocs)) { 32 | for (const item of newDocs) { 33 | if (typeof item === 'string') { 34 | const itemId = uuidv4(); 35 | newList.push({ pageContent: item, metadata: { uuid: itemId } }); 36 | existingIds.add(itemId); 37 | } else if (typeof item === 'object') { 38 | const metadata = (item as Document).metadata ?? {}; 39 | let itemId = metadata.uuid ?? uuidv4(); 40 | 41 | if (!existingIds.has(itemId)) { 42 | if ('pageContent' in item) { 43 | // It's a Document-like object 44 | newList.push({ 45 | ...(item as Document), 46 | metadata: { ...metadata, uuid: itemId }, 47 | }); 48 | } else { 49 | // It's a generic object, treat it as metadata 50 | newList.push({ 51 | pageContent: '', 52 | metadata: { ...(item as { [key: string]: any }), uuid: itemId }, 53 | }); 54 | } 55 | existingIds.add(itemId); 56 | } 57 | } 58 | } 59 | } 60 | 61 | return [...existingList, ...newList]; 62 | } 63 | -------------------------------------------------------------------------------- /ch9/js/src/shared/utils.ts: -------------------------------------------------------------------------------- 1 | import { BaseChatModel } from '@langchain/core/language_models/chat_models'; 2 | import { initChatModel } from 'langchain/chat_models/universal'; 3 | 4 | const SUPPORTED_PROVIDERS = [ 5 | 'openai', 6 | 'anthropic', 7 | 'azure_openai', 8 | 'cohere', 9 | 'google-vertexai', 10 | 'google-vertexai-web', 11 | 'google-genai', 12 | 'ollama', 13 | 'together', 14 | 'fireworks', 15 | 'mistralai', 16 | 'groq', 17 | 'bedrock', 18 | 'cerebras', 19 | 'deepseek', 20 | 'xai', 21 | ] as const; 22 | /** 23 | * Load a chat model from a fully specified name. 24 | * @param fullySpecifiedName - String in the format 'provider/model' or 'provider/account/provider/model'. 25 | * @returns A Promise that resolves to a BaseChatModel instance. 26 | */ 27 | export async function loadChatModel( 28 | fullySpecifiedName: string, 29 | temperature: number = 0.2 30 | ): Promise { 31 | const index = fullySpecifiedName.indexOf('/'); 32 | if (index === -1) { 33 | // If there's no "/", assume it's just the model 34 | if ( 35 | !SUPPORTED_PROVIDERS.includes( 36 | fullySpecifiedName as (typeof SUPPORTED_PROVIDERS)[number] 37 | ) 38 | ) { 39 | throw new Error(`Unsupported model: ${fullySpecifiedName}`); 40 | } 41 | return await initChatModel(fullySpecifiedName, { 42 | temperature: temperature, 43 | }); 44 | } else { 45 | const provider = fullySpecifiedName.slice(0, index); 46 | const model = fullySpecifiedName.slice(index + 1); 47 | if ( 48 | !SUPPORTED_PROVIDERS.includes( 49 | provider as (typeof SUPPORTED_PROVIDERS)[number] 50 | ) 51 | ) { 52 | throw new Error(`Unsupported provider: ${provider}`); 53 | } 54 | return await initChatModel(model, { 55 | modelProvider: provider, 56 | temperature: temperature, 57 | }); 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /ch9/js/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2020", 4 | "module": "NodeNext", 5 | "outDir": "./dist", 6 | "rootDir": "./src", 7 | "strict": false, 8 | "esModuleInterop": true, 9 | "skipLibCheck": true, 10 | "forceConsistentCasingInFileNames": true, 11 | "resolveJsonModule": true 12 | }, 13 | "include": ["src/**/*"], 14 | "exclude": ["node_modules"] 15 | } -------------------------------------------------------------------------------- /ch9/py/demo.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from langgraph_sdk import get_client 3 | 4 | 5 | async def invoke_retrieval_assistant(): 6 | # Initialize the LangGraph client 7 | # Replace with your actual LangGraph deployment URL 8 | deployment_url = "http://localhost:2024" 9 | client = get_client(url=deployment_url) 10 | 11 | try: 12 | # Create a new thread 13 | thread = await client.threads.create( 14 | # Optional: Add metadata if needed 15 | metadata={ 16 | "user_id": "example_user", 17 | "session": "retrieval_session" 18 | } 19 | ) 20 | 21 | # Prepare the input for the retrieval graph 22 | input_data = { 23 | # You can add additional state keys if your graph expects them 24 | "query": "What is this document about?", 25 | } 26 | 27 | # Invoke the assistant on the created thread 28 | # Replace "retrieval_graph" with your actual assistant ID 29 | async for event in client.runs.stream( 30 | thread_id=thread["thread_id"], 31 | assistant_id="retrieval_graph", 32 | input=input_data, 33 | stream_mode="updates" # Stream updates as they occur 34 | ): 35 | # Process and print each event 36 | print(f"Receiving event of type: {event.event}") 37 | print(event.data) 38 | print("\n") 39 | 40 | except Exception as e: 41 | print(f"An error occurred: {e}") 42 | 43 | # If you're running this in a script, you'll need to use asyncio to run the async function 44 | 45 | asyncio.run(invoke_retrieval_assistant()) 46 | -------------------------------------------------------------------------------- /ch9/py/langgraph.json: -------------------------------------------------------------------------------- 1 | { 2 | "dependencies": ["."], 3 | "graphs": { 4 | "indexer": "./src/ingestion_graph/graph.py:graph", 5 | "retrieval_graph": "./src/retrieval_graph/graph.py:graph" 6 | }, 7 | "env": "../../.env" 8 | } 9 | -------------------------------------------------------------------------------- /ch9/py/pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "rag-langgraph-template" 3 | version = "0.0.1" 4 | description = "A RAG agentin LangGraph." 5 | authors = [] 6 | license = { text = "MIT" } 7 | readme = "README.md" 8 | requires-python = ">=3.9,<4.0" 9 | dependencies = [ 10 | "langgraph>=0.2.6", 11 | "langchain-openai>=0.1.22", 12 | "langchain>=0.2.14", 13 | "python-dotenv>=1.0.1", 14 | "msgspec>=0.18.6", 15 | "langchain-community>=0.3.15", 16 | "supabase (>=2.13.0,<3.0.0)", 17 | "langchain-chroma>=0.2.0", 18 | "langgraph-sdk>=0.1.51" 19 | ] 20 | 21 | [project.optional-dependencies] 22 | dev = ["mypy>=1.11.1", "ruff>=0.6.1"] 23 | 24 | [build-system] 25 | requires = ["setuptools>=73.0.0", "wheel"] 26 | build-backend = "setuptools.build_meta" 27 | 28 | [tool.setuptools] 29 | packages = ["retrieval_graph", "ingestion_graph", "shared"] 30 | [tool.setuptools.package-dir] 31 | "langgraph.templates.retrieval_graph" = "src/retrieval_graph" 32 | "langgraph.templates.ingestion_graph" = "src/ingestion_graph" 33 | "retrieval_graph" = "src/retrieval_graph" 34 | "ingestion_graph" = "src/ingestion_graph" 35 | "shared" = "src/shared" 36 | 37 | 38 | [tool.setuptools.package-data] 39 | "*" = ["py.typed"] 40 | 41 | [tool.ruff] 42 | lint.select = [ 43 | "E", # pycodestyle 44 | "F", # pyflakes 45 | "I", # isort 46 | "D", # pydocstyle 47 | "D401", # First line should be in imperative mood 48 | "T201", 49 | "UP", 50 | ] 51 | lint.ignore = [ 52 | "UP006", 53 | "UP007", 54 | # We actually do want to import from typing_extensions 55 | "UP035", 56 | # Relax the convention by _not_ requiring documentation for every function parameter. 57 | "D417", 58 | "E501", 59 | ] 60 | [tool.ruff.lint.per-file-ignores] 61 | "tests/*" = ["D", "UP"] 62 | [tool.ruff.lint.pydocstyle] 63 | convention = "google" 64 | [tool.pytest.ini_options] 65 | pythonpath = [ 66 | "src" 67 | ] 68 | -------------------------------------------------------------------------------- /ch9/py/src/ingestion_graph/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langchain-ai/learning-langchain/89301c62ac34893b6fbb693b817f0425d496572b/ch9/py/src/ingestion_graph/__init__.py -------------------------------------------------------------------------------- /ch9/py/src/ingestion_graph/configuration.py: -------------------------------------------------------------------------------- 1 | """Define the configurable parameters for the index graph.""" 2 | 3 | from __future__ import annotations 4 | 5 | from dataclasses import dataclass, field, fields 6 | from typing import Annotated, Literal, Optional, Type, TypeVar, Any 7 | from langchain_core.runnables import RunnableConfig, ensure_config 8 | 9 | 10 | DEFAULT_DOCS_FILE = "src/docSplits.json" 11 | 12 | 13 | @dataclass(kw_only=True) 14 | class IndexConfiguration: 15 | """Configuration class for indexing and retrieval operations. 16 | 17 | This class defines the parameters needed for configuring the indexing and 18 | retrieval processes, including embedding model selection, retriever provider choice, and search parameters. 19 | """ 20 | 21 | docs_file: str = field( 22 | default=DEFAULT_DOCS_FILE, 23 | metadata={ 24 | "description": "Path to a JSON file containing default documents to index." 25 | }, 26 | ) 27 | 28 | embedding_model: Annotated[ 29 | str, 30 | {"__template_metadata__": {"kind": "embeddings"}}, 31 | ] = field( 32 | default="openai/text-embedding-3-small", 33 | metadata={ 34 | "description": "Name of the embedding model to use. Must be a valid embedding model name." 35 | }, 36 | ) 37 | 38 | retriever_provider: Annotated[ 39 | Literal["supabase", "chroma"], 40 | {"__template_metadata__": {"kind": "retriever"}}, 41 | ] = field( 42 | default="chroma", 43 | metadata={ 44 | "description": "The vector store provider to use for retrieval. Options are 'supabase', or 'chroma'." 45 | }, 46 | ) 47 | 48 | search_kwargs: dict[str, Any] = field( 49 | default_factory=dict, 50 | metadata={ 51 | "description": "Additional keyword arguments to pass to the search function of the retriever." 52 | }, 53 | ) 54 | 55 | @classmethod 56 | def from_runnable_config( 57 | cls: Type[T], config: Optional[RunnableConfig] = None 58 | ) -> T: 59 | """Create an IndexConfiguration instance from a RunnableConfig object. 60 | 61 | Args: 62 | cls (Type[T]): The class itself. 63 | config (Optional[RunnableConfig]): The configuration object to use. 64 | 65 | Returns: 66 | T: An instance of IndexConfiguration with the specified configuration. 67 | """ 68 | config = ensure_config(config) 69 | configurable = config.get("configurable") or {} 70 | _fields = {f.name for f in fields(cls) if f.init} 71 | return cls(**{k: v for k, v in configurable.items() if k in _fields}) 72 | 73 | 74 | T = TypeVar("T", bound=IndexConfiguration) 75 | -------------------------------------------------------------------------------- /ch9/py/src/ingestion_graph/graph.py: -------------------------------------------------------------------------------- 1 | import json 2 | from typing import Optional 3 | from langchain_core.runnables import RunnableConfig 4 | from langgraph.graph import StateGraph, START, END 5 | 6 | from ingestion_graph.configuration import IndexConfiguration 7 | from ingestion_graph.state import IndexState, reduce_docs 8 | 9 | from shared.retrieval import make_retriever 10 | 11 | 12 | async def ingest_docs(state: IndexState, config: Optional[RunnableConfig] = None) -> dict[str, str]: 13 | if not config: 14 | raise ValueError("Configuration required to run index_docs.") 15 | 16 | configuration = IndexConfiguration.from_runnable_config(config) 17 | docs = state["docs"] 18 | if not docs: 19 | with open(configuration.docs_file, encoding="utf-8") as file_content: 20 | serialized_docs = json.loads(file_content.read()) 21 | docs = reduce_docs([], serialized_docs) 22 | else: 23 | docs = reduce_docs([], docs) 24 | 25 | with make_retriever(configuration) as retriever: 26 | await retriever.aadd_documents(docs) 27 | 28 | return {"docs": "delete"} 29 | 30 | # Define the graph 31 | builder = StateGraph(IndexState, config_schema=IndexConfiguration) 32 | builder.add_node(ingest_docs) 33 | builder.add_edge(START, "ingest_docs") 34 | builder.add_edge("ingest_docs", END) 35 | 36 | # Compile into a graph object that you can invoke and deploy. 37 | graph = builder.compile() 38 | graph.name = "IngestionGraph" 39 | -------------------------------------------------------------------------------- /ch9/py/src/ingestion_graph/state.py: -------------------------------------------------------------------------------- 1 | """State management for the index graph.""" 2 | 3 | from dataclasses import dataclass, field 4 | from typing import Annotated 5 | 6 | from langchain_core.documents import Document 7 | 8 | from shared.state import reduce_docs 9 | 10 | 11 | # The index state defines the simple IO for the single-node index graph 12 | @dataclass(kw_only=True) 13 | class IndexState: 14 | """Represents the state for document indexing and retrieval. 15 | 16 | This class defines the structure of the index state, which includes 17 | the documents to be indexed and the retriever used for searching 18 | these documents. 19 | """ 20 | 21 | docs: Annotated[list[Document], reduce_docs] = field( 22 | default_factory=list, 23 | metadata={ 24 | "description": "A list of documents that the agent can index." 25 | }, 26 | ) 27 | -------------------------------------------------------------------------------- /ch9/py/src/retrieval_graph/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langchain-ai/learning-langchain/89301c62ac34893b6fbb693b817f0425d496572b/ch9/py/src/retrieval_graph/__init__.py -------------------------------------------------------------------------------- /ch9/py/src/retrieval_graph/configuration.py: -------------------------------------------------------------------------------- 1 | """Define the configurable parameters for the agent.""" 2 | 3 | from __future__ import annotations 4 | 5 | from dataclasses import dataclass, field, fields 6 | from typing import Annotated, Any, Literal, Optional, Type, TypeVar 7 | 8 | from langchain_core.runnables import RunnableConfig, ensure_config 9 | from shared.configuration import BaseConfiguration 10 | 11 | 12 | @dataclass(kw_only=True) 13 | class Configuration(BaseConfiguration): 14 | """The configuration for the agent.""" 15 | 16 | query_model: Annotated[str, {"__template_metadata__": {"kind": "llm"}}] = field( 17 | default="openai/gpt-4o", 18 | metadata={ 19 | "description": "The language model used for processing and refining queries. Should be in the form: provider/model-name." 20 | }, 21 | ) 22 | -------------------------------------------------------------------------------- /ch9/py/src/retrieval_graph/graph.py: -------------------------------------------------------------------------------- 1 | from typing import Literal 2 | from langchain.hub import pull 3 | from langchain_core.prompts import ChatPromptTemplate 4 | from langchain_core.messages import HumanMessage 5 | from langchain_openai import ChatOpenAI 6 | from langgraph.graph import END, START, StateGraph 7 | from pydantic import BaseModel 8 | 9 | from retrieval_graph.utils import format_docs, load_chat_model 10 | from retrieval_graph.configuration import Configuration 11 | from shared.retrieval import make_retriever 12 | from langchain_core.runnables import RunnableConfig 13 | 14 | from retrieval_graph.state import AgentState 15 | 16 | 17 | class Schema(BaseModel): 18 | route: str = Literal['retrieve', 'direct'] 19 | direct_answer: str 20 | 21 | 22 | async def check_query_type(state: AgentState, *, config: RunnableConfig): 23 | configuration = Configuration.from_runnable_config(config) 24 | structured_llm = load_chat_model( 25 | configuration.query_model).with_structured_output(Schema) 26 | routing_prompt = ChatPromptTemplate.from_messages([ 27 | ("system", "You are a routing assistant. Your job is to determine if a question needs document retrieval or can be answered directly.\n\nRespond with either:\n'retrieve' - if the question requires retrieving documents\n'direct' - if the question can be answered directly AND your direct answer"), 28 | ("human", "{query}") 29 | ]) 30 | 31 | formatted_prompt = routing_prompt.invoke({"query": state["query"]}) 32 | response = structured_llm.invoke(formatted_prompt) 33 | 34 | route = response.route 35 | 36 | if route == "retrieve": 37 | return {"route": "retrieve_documents"} 38 | else: 39 | direct_answer = response.direct_answer 40 | return {"route": END, "messages": [HumanMessage(content=direct_answer)]} 41 | 42 | 43 | async def route_query(state: AgentState, *, config: RunnableConfig): 44 | route = state["route"] 45 | if not route: 46 | raise ValueError("Route is not set") 47 | 48 | if route == "retrieve_documents": 49 | return "retrieve_documents" 50 | else: 51 | return END 52 | 53 | 54 | async def retrieve_documents(state: AgentState, *, config: RunnableConfig): 55 | configuration = Configuration.from_runnable_config(config) 56 | retriever = make_retriever(configuration) 57 | response = retriever.invoke(state["query"]) 58 | return {"documents": response} 59 | 60 | 61 | async def generate_response(state: AgentState, *, config: RunnableConfig): 62 | configuration = Configuration.from_runnable_config(config) 63 | context = format_docs(state["documents"]) 64 | prompt_template = pull("rlm/rag-prompt") 65 | formatted_prompt = prompt_template.invoke( 66 | {"context": context, "question": state["query"]}) 67 | messages = formatted_prompt.messages + state["messages"] 68 | response = load_chat_model(configuration.query_model).invoke(messages) 69 | return {"messages": response} 70 | 71 | 72 | builder = StateGraph(AgentState, config_schema=Configuration) 73 | builder.add_node("check_query_type", check_query_type) 74 | builder.add_node("retrieve_documents", retrieve_documents) 75 | builder.add_node("generate_response", generate_response) 76 | builder.add_edge(START, "check_query_type") 77 | builder.add_conditional_edges("check_query_type", route_query) 78 | builder.add_edge("retrieve_documents", "generate_response") 79 | builder.add_edge("generate_response", END) 80 | 81 | # Compile into a graph object that you can invoke and deploy. 82 | graph = builder.compile() 83 | graph.name = "RetrievalGraph" 84 | -------------------------------------------------------------------------------- /ch9/py/src/retrieval_graph/state.py: -------------------------------------------------------------------------------- 1 | from typing import Annotated 2 | from langgraph.graph import MessagesState 3 | from langchain_core.documents import Document 4 | 5 | from shared.state import reduce_docs 6 | 7 | 8 | class AgentState(MessagesState): 9 | query: str 10 | route: str 11 | documents: Annotated[list[Document], reduce_docs] 12 | 13 | -------------------------------------------------------------------------------- /ch9/py/src/retrieval_graph/utils.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | from langchain_core.documents import Document 3 | from langchain_core.language_models import BaseChatModel 4 | from langchain.chat_models import init_chat_model 5 | 6 | 7 | def _format_doc(doc: Document) -> str: 8 | """Format a single document as XML. 9 | 10 | Args: 11 | doc (Document): The document to format. 12 | 13 | Returns: 14 | str: The formatted document as an XML string. 15 | """ 16 | metadata = doc.metadata or {} 17 | meta = "".join(f" {k}={v!r}" for k, v in metadata.items()) 18 | if meta: 19 | meta = f" {meta}" 20 | 21 | return f"\n{doc.page_content}\n" 22 | 23 | 24 | def format_docs(docs: Optional[list[Document]]) -> str: 25 | """Format a list of documents as XML. 26 | 27 | This function takes a list of Document objects and formats them into a single XML string. 28 | 29 | Args: 30 | docs (Optional[list[Document]]): A list of Document objects to format, or None. 31 | 32 | Returns: 33 | str: A string containing the formatted documents in XML format. 34 | 35 | Examples: 36 | >>> docs = [Document(page_content="Hello"), Document(page_content="World")] 37 | >>> print(format_docs(docs)) 38 | 39 | 40 | Hello 41 | 42 | 43 | World 44 | 45 | 46 | 47 | >>> print(format_docs(None)) 48 | 49 | """ 50 | if not docs: 51 | return "" 52 | formatted = "\n".join(_format_doc(doc) for doc in docs) 53 | return f""" 54 | {formatted} 55 | """ 56 | 57 | 58 | def load_chat_model(fully_specified_name: str) -> BaseChatModel: 59 | """Load a chat model from a fully specified name. 60 | 61 | Args: 62 | fully_specified_name (str): String in the format 'provider/model'. 63 | """ 64 | if "/" in fully_specified_name: 65 | provider, model = fully_specified_name.split("/", maxsplit=1) 66 | else: 67 | provider = "" 68 | model = fully_specified_name 69 | return init_chat_model(model, model_provider=provider) 70 | -------------------------------------------------------------------------------- /ch9/py/src/shared/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langchain-ai/learning-langchain/89301c62ac34893b6fbb693b817f0425d496572b/ch9/py/src/shared/__init__.py -------------------------------------------------------------------------------- /ch9/py/src/shared/configuration.py: -------------------------------------------------------------------------------- 1 | """Define the configurable parameters for the agent.""" 2 | 3 | from __future__ import annotations 4 | 5 | from dataclasses import dataclass, field, fields 6 | from typing import Annotated, Any, Literal, Optional, Type, TypeVar 7 | 8 | from langchain_core.runnables import RunnableConfig, ensure_config 9 | 10 | 11 | @dataclass(kw_only=True) 12 | class BaseConfiguration: 13 | """Configuration class for indexing and retrieval operations. 14 | 15 | This class defines the parameters needed for configuring the indexing and 16 | retrieval processes, including user identification, embedding model selection, 17 | retriever provider choice, and search parameters. 18 | """ 19 | embedding_model: Annotated[ 20 | str, 21 | {"__template_metadata__": {"kind": "embeddings"}}, 22 | ] = field( 23 | default="openai/text-embedding-3-small", 24 | metadata={ 25 | "description": "Name of the embedding model to use. Must be a valid embedding model name." 26 | }, 27 | ) 28 | 29 | retriever_provider: Annotated[ 30 | Literal["supabase", "chroma"], 31 | {"__template_metadata__": {"kind": "retriever"}}, 32 | ] = field( 33 | default="chroma", 34 | metadata={ 35 | "description": "The vector store provider to use for retrieval. Options are 'supabase' or 'chroma'." 36 | }, 37 | ) 38 | 39 | search_kwargs: dict[str, Any] = field( 40 | default_factory=dict, 41 | metadata={ 42 | "description": "Additional keyword arguments to pass to the search function of the retriever." 43 | }, 44 | ) 45 | 46 | @classmethod 47 | def from_runnable_config( 48 | cls: Type[T], config: Optional[RunnableConfig] = None 49 | ) -> T: 50 | """Create an BaseConfiguration instance from a RunnableConfig object. 51 | 52 | Args: 53 | cls (Type[T]): The class itself. 54 | config (Optional[RunnableConfig]): The configuration object to use. 55 | 56 | Returns: 57 | T: An instance of BaseConfiguration with the specified configuration. 58 | """ 59 | config = ensure_config(config) 60 | configurable = config.get("configurable") or {} 61 | _fields = {f.name for f in fields(cls) if f.init} 62 | return cls(**{k: v for k, v in configurable.items() if k in _fields}) 63 | 64 | 65 | T = TypeVar("T", bound=BaseConfiguration) 66 | -------------------------------------------------------------------------------- /ch9/py/src/shared/retrieval.py: -------------------------------------------------------------------------------- 1 | from contextlib import contextmanager 2 | import os 3 | from langchain_chroma import Chroma 4 | from langchain_core.embeddings import Embeddings 5 | from langchain_core.runnables import RunnableConfig 6 | from langchain_openai import OpenAIEmbeddings 7 | from langchain_community.vectorstores import SupabaseVectorStore 8 | from langchain_chroma import Chroma 9 | from supabase import create_client 10 | import chromadb 11 | 12 | 13 | from ingestion_graph.configuration import IndexConfiguration 14 | 15 | 16 | def make_text_encoder(model: str) -> Embeddings: 17 | """Connect to the configured text encoder.""" 18 | provider, model = model.split("/", maxsplit=1) 19 | if provider == "openai": 20 | from langchain_openai import OpenAIEmbeddings 21 | return OpenAIEmbeddings(model=model) 22 | else: 23 | raise ValueError(f"Unsupported embedding provider: {provider}") 24 | 25 | 26 | @contextmanager 27 | def make_supabase_retriever(configuration: RunnableConfig, embedding_model: Embeddings): 28 | supabase_url = os.environ.get("SUPABASE_URL") 29 | supabase_key = os.environ.get("SUPABASE_SERVICE_ROLE_KEY") 30 | 31 | if not supabase_url or not supabase_key: 32 | raise ValueError( 33 | "Please set SUPABASE_URL and SUPABASE_SERVICE_ROLE_KEY env variables") 34 | 35 | client = create_client(supabase_url, supabase_key) 36 | vectorstore = SupabaseVectorStore( 37 | client=client, embedding=embedding_model, table_name="documents", query_name="match_documents") 38 | search_kwargs = configuration.search_kwargs 39 | yield vectorstore.as_retriever(search_kwargs=search_kwargs) 40 | 41 | 42 | @contextmanager 43 | def make_chroma_retriever(configuration: IndexConfiguration, embedding_model: Embeddings): 44 | client = chromadb.HttpClient(host='localhost', port=8000) 45 | 46 | vectorstore = Chroma( 47 | collection_name="documents", 48 | embedding_function=embedding_model, 49 | client=client 50 | ) 51 | search_kwargs = configuration.search_kwargs 52 | search_filter = search_kwargs.setdefault("filter", {}) 53 | yield vectorstore.as_retriever(search_kwargs=search_kwargs) 54 | 55 | 56 | @contextmanager 57 | def make_retriever( 58 | config: RunnableConfig, 59 | ): 60 | """Create a retriever for the agent, based on the current configuration.""" 61 | configuration = IndexConfiguration.from_runnable_config(config) 62 | embedding_model = make_text_encoder(configuration.embedding_model) 63 | if configuration.retriever_provider == "supabase": 64 | with make_supabase_retriever(configuration, embedding_model) as retriever: 65 | yield retriever 66 | elif configuration.retriever_provider == "chroma": 67 | with make_chroma_retriever(configuration, embedding_model) as retriever: 68 | yield retriever 69 | else: 70 | raise ValueError( 71 | "Unrecognized retriever_provider in configuration. " 72 | f"Expected one of: {', '.join(Configuration.__annotations__['retriever_provider'].__args__)}\n" 73 | f"Got: {configuration.retriever_provider}" 74 | ) 75 | -------------------------------------------------------------------------------- /ch9/py/src/shared/state.py: -------------------------------------------------------------------------------- 1 | """Shared functions for state management.""" 2 | 3 | import hashlib 4 | import uuid 5 | from typing import Any, Literal, Optional, Union 6 | 7 | from langchain_core.documents import Document 8 | 9 | from typing import Any, Literal, Optional 10 | 11 | 12 | 13 | def _generate_uuid(page_content: str) -> str: 14 | """Generate a UUID for a document based on page content.""" 15 | md5_hash = hashlib.md5(page_content.encode()).hexdigest() 16 | return str(uuid.UUID(md5_hash)) 17 | 18 | 19 | def reduce_docs( 20 | existing: Optional[list[Document]], 21 | new: Union[ 22 | list[Document], 23 | list[dict[str, Any]], 24 | list[str], 25 | str, 26 | Literal["delete"], 27 | ], 28 | ) -> list[Document]: 29 | """Reduce and process documents based on the input type. 30 | 31 | This function handles various input types and converts them into a sequence of Document objects. 32 | It can delete existing documents, create new ones from strings or dictionaries, or return the existing documents. 33 | It also combines existing documents with the new one based on the document ID. 34 | 35 | Args: 36 | existing (Optional[Sequence[Document]]): The existing docs in the state, if any. 37 | new (Union[Sequence[Document], Sequence[dict[str, Any]], Sequence[str], str, Literal["delete"]]): 38 | The new input to process. Can be a sequence of Documents, dictionaries, strings, a single string, 39 | or the literal "delete". 40 | """ 41 | if new == "delete": 42 | return [] 43 | 44 | existing_list = list(existing) if existing else [] 45 | if isinstance(new, str): 46 | return existing_list + [ 47 | Document(page_content=new, metadata={"uuid": _generate_uuid(new)}) 48 | ] 49 | 50 | new_list = [] 51 | if isinstance(new, list): 52 | existing_ids = set(doc.metadata.get("uuid") for doc in existing_list) 53 | for item in new: 54 | if isinstance(item, str): 55 | item_id = _generate_uuid(item) 56 | new_list.append(Document(page_content=item, metadata={"uuid": item_id})) 57 | existing_ids.add(item_id) 58 | 59 | elif isinstance(item, dict): 60 | metadata = item.get("metadata", {}) 61 | item_id = metadata.get("uuid") or _generate_uuid( 62 | item.get("page_content", "") 63 | ) 64 | 65 | if item_id not in existing_ids: 66 | new_list.append( 67 | Document(**{**item, "metadata": {**metadata, "uuid": item_id}}) 68 | ) 69 | existing_ids.add(item_id) 70 | 71 | elif isinstance(item, Document): 72 | item_id = item.metadata.get("uuid", "") 73 | if not item_id: 74 | item_id = _generate_uuid(item.page_content) 75 | new_item = item.copy(deep=True) 76 | new_item.metadata["uuid"] = item_id 77 | else: 78 | new_item = item 79 | 80 | if item_id not in existing_ids: 81 | new_list.append(new_item) 82 | existing_ids.add(item_id) 83 | 84 | return existing_list + new_list 85 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "learning-langchain-repo", 3 | "description": "Learning LangChain O'Reilly book code examples", 4 | "type": "module", 5 | "author": "Nuno Campos and Mayo Oshin", 6 | "scripts": { 7 | "langgraph:dev": "npx @langchain/langgraph-cli dev -c ch9/js/langgraph.json --verbose" 8 | }, 9 | "dependencies": { 10 | "@langchain/community": "^0.3.26", 11 | "@langchain/core": "^0.3.33", 12 | "@langchain/langgraph": "^0.2.41", 13 | "@langchain/langgraph-cli": "^0.0.1", 14 | "@langchain/langgraph-sdk": "^0.0.36", 15 | "@langchain/openai": "^0.3.17", 16 | "@supabase/supabase-js": "^2.44.0", 17 | "duck-duck-scrape": "^2.2.7", 18 | "expr-eval": "^2.0.2", 19 | "langchain": "^0.3.15", 20 | "pdf-parse": "^1.1.1", 21 | "pg": "^8.13.1", 22 | "sqlite3": "^5.1.7", 23 | "typeorm": "^0.3.20" 24 | }, 25 | "devDependencies": { 26 | "@types/pdf-parse": "^1.1.4" 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "learning-langchain" 3 | version = "0.0.1" 4 | description = "Code blocks for the book Learning LangChain." 5 | authors = [] 6 | license = { text = "MIT" } 7 | readme = "README.md" 8 | requires-python = ">=3.9" 9 | dependencies = [ 10 | "langgraph>=0.2.6", 11 | "langchain-openai>=0.1.22", 12 | "langchain>=0.2.14", 13 | "python-dotenv>=1.0.1", 14 | "langchain-community>=0.3.15", 15 | "langchain-postgres>=0.0.12", 16 | "langchain-chroma>=0.2.0", 17 | "beautifulsoup4>=4.12.2", 18 | "pypdf>=5.1.0", 19 | "psycopg[binary]>=3.2.4", # Updated to include [binary] extra 20 | "setuptools>=75.8.0", 21 | "langsmith>=0.3.2", 22 | "langgraph-checkpoint-sqlite>=2.0.3", 23 | "duckduckgo-search>=7.3.0", 24 | "langgraph-cli>=0.1.73" 25 | ] 26 | 27 | [build-system] 28 | requires = ["setuptools>=73.0.0", "wheel"] 29 | build-backend = "setuptools.build_meta" 30 | 31 | [tool.setuptools] 32 | packages = [] 33 | 34 | [tool.setuptools.package-data] 35 | "*" = ["py.typed"] 36 | 37 | [project.scripts] 38 | langgraph-dev = "langgraph.cli:dev_command --config ch9/py/langgraph.json --verbose" 39 | -------------------------------------------------------------------------------- /test.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langchain-ai/learning-langchain/89301c62ac34893b6fbb693b817f0425d496572b/test.pdf --------------------------------------------------------------------------------