├── .github
    └── workflows
    │   └── eval.yml
├── .gitignore
├── Dockerfile
├── README.md
├── app
    ├── __init__.py
    ├── deploy_chain.py
    ├── server.py
    └── test_chains
    │   ├── base_rag_chain.py
    │   ├── context_stuffing_chain.py
    │   ├── langgraph.py
    │   ├── multi_query_chain.py
    │   └── prompts.py
├── eval
    ├── build_eval_set.ipynb
    ├── create_dataset.py
    ├── eval.csv
    ├── eval_lcel_teacher.ipynb
    └── test_chain.py
├── ntbk
    └── lcel-teacher-langgraph.ipynb
├── poetry.lock
└── pyproject.toml


/.github/workflows/eval.yml:
--------------------------------------------------------------------------------
 1 | name: Run Evaluation
 2 | 
 3 | on:
 4 |     workflow_dispatch:
 5 | 
 6 | env:
 7 |     POETRY_VERSION: "1.4.2"
 8 |     VOYAGE_API_KEY: ${{ secrets.VOYAGE_API_KEY }}
 9 |     VOYAGE_AI_MODEL: ${{ secrets.VOYAGE_AI_MODEL }}
10 |     WEAVIATE_API_KEY: ${{ secrets.WEAVIATE_API_KEY }}
11 |     WEAVIATE_URL: ${{ secrets.WEAVIATE_URL }}
12 |     OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
13 |     LANGCHAIN_ENDPOINT: "https://api.smith.langchain.com"
14 |     LANGCHAIN_API_KEY: ${{ secrets.LANGCHAIN_API_KEY }}
15 | 
16 | jobs:
17 |     run_evaluation:
18 |         runs-on: ubuntu-latest
19 | 
20 |         steps:
21 |             - name: Checkout
22 |               uses: actions/checkout@v3
23 | 
24 |             - name: Set up Python
25 |               uses: actions/setup-python@v4
26 |               with:
27 |                 python-version: 3.11
28 |                 cache: "pip"
29 |             - name: Install poetry
30 |               run: pipx install poetry==$POETRY_VERSION
31 |             - name: Install dependencies
32 |               run: poetry install --with dev
33 |             - name: Run evaluation
34 |               run: poetry run python -m eval.test_chain
35 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | .DS_Store
3 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.11-slim
 2 | 
 3 | RUN pip install poetry==1.6.1
 4 | 
 5 | RUN poetry config virtualenvs.create false
 6 | 
 7 | WORKDIR /code
 8 | 
 9 | COPY ./pyproject.toml ./README.md ./poetry.lock* ./
10 | 
11 | RUN poetry install  --no-interaction --no-ansi --no-root
12 | 
13 | COPY ./app ./app
14 | 
15 | RUN poetry install --no-interaction --no-ansi
16 | 
17 | EXPOSE 8080
18 | 
19 | CMD exec uvicorn app.server:app --host 0.0.0.0 --port 8080
20 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # LCEL-Teacher
  2 | 
  3 | ## Introduction
  4 | 
  5 | [LangChain Expression Language](https://python.langchain.com/docs/expression_language/) has a number of important benefits, including transparent composability of LLM components, seamless support for prototyping and production (with [LangServe](https://python.langchain.com/docs/langserve)) using the same code, and a common [interface](https://python.langchain.com/docs/expression_language/interface) for every chain. But, there a learning curve for using LCEL. Here, we aim to build a coding assistant for LCEL. 
  6 | 
  7 | ## Architecture
  8 | 
  9 | We explore three several architectures for LCEL-teacher in this repo, including:
 10 | 
 11 | * `Context stuffing` of LCEL docs into the LLM context window
 12 | * `RAG` using retrieval from a vector databases of all LangChain documentation  
 13 | * `RAG using multi-question and answer generation` using retrieval from a vector databases of all LangChain documentation  
 14 | * `Context stuffing with recovery` using LangGraph for code execution and re-try
 15 |   
 16 | ![Screenshot 2024-02-16 at 2 30 50 PM](https://github.com/langchain-ai/lcel-teacher/assets/122662504/ed53836b-69c7-4507-8683-5f728f9281bc)
 17 | 
 18 | Code for each can be found in the `/app` directory.
 19 | 
 20 | ## Environment 
 21 | 
 22 | We use Poetry for dependency management. 
 23 |  
 24 | `Context stuffing` requires no vectorstore access because we will directly read the docs and stuff them into the LLM context windopw.
 25 |  
 26 | Both `RAG` approaches rely on an vectorstore index of LangChain documentation (Weaviate) with fine-tuned embeddings from Voyage:
 27 | 
 28 | * `WEAVIATE_URL`
 29 | * `WEAVIATE_API_KEY`
 30 | * `VOYAGE_API_KEY`
 31 | * `VOYAGE_AI_MODEL`
 32 | 
 33 | ## Using the app
 34 | 
 35 | This repo is a LangServe app. We host it using hosted LangServe. To learn more [see this video](https://www.youtube.com/watch?v=EhlPDL4QrWY).
 36 | 
 37 | You can access it [here](https://lcel-teacher-07fb6cd4e0815e64acb318d410f74a37-ffoprvkqsa-uc.a.run.app/lcel-teacher/playground/).
 38 | 
 39 | <img width="812" alt="Screenshot 2024-01-29 at 10 06 44 AM" src="https://github.com/langchain-ai/lcel-teacher/assets/122662504/eb49c291-a00b-460f-a0fe-7b6a4cba21e5">
 40 | 
 41 | The deployment in the LangChain org within LangSmith is [here](https://smith.langchain.com/o/ebbaf2eb-769b-4505-aca2-d11de10372a4/host/2ef29f66-e508-4a7a-8a22-8d5ef997f985).
 42 | 
 43 | The steps to deploy it for yourself are shown below.
 44 | 
 45 | --- 
 46 | 
 47 | ## Running locally and deployment
 48 | 
 49 | This repo was created following these steps:
 50 | 
 51 | **(1) Create a LangChain app.**
 52 | 
 53 | Run:
 54 | ```
 55 | langchain app new .  
 56 | ```
 57 | 
 58 | This creates two folders:
 59 | ```
 60 | app: This is where LangServe code will live
 61 | packages: This is where your chains or agents will live
 62 | ```
 63 | 
 64 | It also creates:
 65 | ```
 66 | Dockerfile: App configurations
 67 | pyproject.toml: Project configurations
 68 | ```
 69 | 
 70 | Add app dependencies to `pyproject.toml` and `poetry.lock`:
 71 | ```
 72 | poetry add weaviate-client
 73 | poetry add langchainhub
 74 | poetry add openai
 75 | poetry add pandas
 76 | poetry add jupyter
 77 | poetry add tiktoken
 78 | poetry add scikit-learn
 79 | poetry add langchain_openai
 80 | ```
 81 | 
 82 | Update enviorment based on the updated lock file:
 83 | ```
 84 | poetry install
 85 | ```
 86 | 
 87 | **(2) Add the chains**
 88 | 
 89 | Add our custom retrieval code to the `app` directory.
 90 | 
 91 | In our case, I add the various `_chain.py` files.
 92 | 
 93 | Each file simply has a LCEL chain defined. For example:
 94 | 
 95 | ```
 96 | chain = (
 97 |     {
 98 |         "context": lambda x: concatenated_content,
 99 |         "question": RunnablePassthrough(),
100 |     }
101 |     | prompt
102 |     | model
103 |     | StrOutputParser()
104 | )
105 | 
106 | # Add typing for input
107 | class Question(BaseModel):
108 |     __root__: str
109 | 
110 | 
111 | chain = chain.with_types(input_type=Question)
112 | ```
113 | 
114 | Now, we simply import the chain in `server.py`:
115 | ```
116 | from app.deploy_chain import chain as chain_to_deploy
117 | add_routes(app, chain_to_deploy, path="/lcel-teacher")
118 | ```
119 | 
120 | Run locally
121 | ```
122 | poetry run langchain serve
123 | ```
124 | 
125 | Simply, the invocation methods of our LCEl chain are mapped to HTTP endpoints in the LangServe app:
126 | ![Screenshot 2024-01-26 at 11 48 06 AM](https://github.com/langchain-ai/lcel-teacher/assets/122662504/46c4f65b-1719-4212-b450-142062fd0d5b)
127 | 
128 | For hosted LangServe, sign up in your LangSmith console on the `Deployments` tab and connect to your fork of this repo.
129 | 
130 | ## Eval
131 | 
132 | In `eval/` you will see `eval.csv`.
133 | 
134 | Use this to create a LangSmith as a [dataset](https://smith.langchain.com/public/3b0fe661-e3ed-4d84-9d88-96c7ee8c4a2d/d), `lcel-teacher-eval`.
135 | 
136 | Run notebook to kick off eval:
137 | ```
138 | poetry run jupyter notebook
139 | ```
140 | 
141 | Use `eval/eval_lcel_teacher.ipynb` to run evals.
142 | 


--------------------------------------------------------------------------------
/app/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/langchain-ai/lcel-teacher/4dd239314ea7c864d4da13f172b046c5433056f2/app/__init__.py


--------------------------------------------------------------------------------
/app/deploy_chain.py:
--------------------------------------------------------------------------------
 1 | from bs4 import BeautifulSoup as Soup
 2 | from langchain_community.document_loaders.recursive_url_loader import RecursiveUrlLoader
 3 | from langchain_openai import ChatOpenAI
 4 | from langchain.prompts import ChatPromptTemplate
 5 | from langchain_core.output_parsers import StrOutputParser
 6 | from langchain_core.runnables import RunnablePassthrough
 7 | from langchain_core.pydantic_v1 import BaseModel
 8 | 
 9 | # Load LCEL docs
10 | url = "https://python.langchain.com/docs/expression_language/"
11 | loader = RecursiveUrlLoader(
12 |     url=url, max_depth=20, extractor=lambda x: Soup(x, "html.parser").text
13 | )
14 | docs = loader.load()
15 | 
16 | # LCEL w/ PydanticOutputParser (outside the primary LCEL docs)
17 | url = "https://python.langchain.com/docs/modules/model_io/output_parsers/quick_start"
18 | loader = RecursiveUrlLoader(
19 |     url=url, max_depth=1, extractor=lambda x: Soup(x, "html.parser").text
20 | )
21 | docs_pydantic = loader.load()
22 | 
23 | # LCEL w/ Self Query (outside the primary LCEL docs)
24 | url = "https://python.langchain.com/docs/modules/data_connection/retrievers/self_query/"
25 | loader = RecursiveUrlLoader(
26 |     url=url, max_depth=1, extractor=lambda x: Soup(x, "html.parser").text
27 | )
28 | docs_sq = loader.load()
29 | 
30 | # Add 
31 | docs.extend([*docs_pydantic, *docs_sq])
32 | 
33 | # Sort the list based on the URLs in 'metadata' -> 'source'
34 | d_sorted = sorted(docs, key=lambda x: x.metadata["source"])
35 | d_reversed = list(reversed(d_sorted))
36 | 
37 | # Concatenate the 'page_content' of each sorted dictionary
38 | concatenated_content = "\n\n\n --- \n\n\n".join(
39 |     [doc.page_content for doc in d_reversed]
40 | )
41 | 
42 | # Prompt template
43 | template = """You are a coding assistant with expertise in LCEL, LangChain expression language. Here is a full set of documentation:
44 | {context}
45 | 
46 | Now, answer the user question based on the above provided documentation and ensure any code you provide can be executed with all required imports and variables defined: {question}
47 | """
48 | prompt = ChatPromptTemplate.from_template(template)
49 | 
50 | model = ChatOpenAI(temperature=0, model="gpt-4-0125-preview") 
51 | 
52 | chain = (
53 |     {
54 |         "context": lambda x: concatenated_content,
55 |         "question": RunnablePassthrough(),
56 |     }
57 |     | prompt
58 |     | model
59 |     | StrOutputParser()
60 | )
61 | 
62 | # Add typing for input
63 | class Question(BaseModel):
64 |     __root__: str
65 | 
66 | 
67 | chain = chain.with_types(input_type=Question)


--------------------------------------------------------------------------------
/app/server.py:
--------------------------------------------------------------------------------
 1 | from fastapi import FastAPI
 2 | from fastapi.responses import RedirectResponse
 3 | from langserve import add_routes
 4 | from app.deploy_chain import chain as chain_to_deploy
 5 | 
 6 | app = FastAPI()
 7 | 
 8 | 
 9 | @app.get("/")
10 | async def redirect_root_to_docs():
11 |     return RedirectResponse("/docs")
12 | 
13 | 
14 | add_routes(app, chain_to_deploy, path="/lcel-teacher")
15 | 
16 | if __name__ == "__main__":
17 |     import uvicorn
18 | 
19 |     uvicorn.run(app, host="0.0.0.0", port=8000)
20 | 


--------------------------------------------------------------------------------
/app/test_chains/base_rag_chain.py:
--------------------------------------------------------------------------------
 1 | import weaviate
 2 | from langchain_openai import ChatOpenAI, OpenAIEmbeddings
 3 | from langchain.vectorstores import Weaviate
 4 | from langchain.prompts import PromptTemplate
 5 | from langchain.embeddings.voyageai import VoyageEmbeddings
 6 | from langchain_core.runnables import RunnablePassthrough
 7 | from langchain_core.pydantic_v1 import BaseModel, Field
 8 | from langchain.output_parsers.openai_tools import PydanticToolsParser
 9 | from langchain_core.utils.function_calling import convert_to_openai_tool
10 | 
11 | # Keys
12 | import os
13 | 
14 | WEAVIATE_URL = os.environ["WEAVIATE_URL"]
15 | WEAVIATE_API_KEY = os.environ["WEAVIATE_API_KEY"]
16 | WEAVIATE_DOCS_INDEX_NAME = "LangChain_agent_docs"
17 | 
18 | # Fine-tuned embd and vectorstore
19 | def get_embeddings_model():
20 |     if os.environ.get("VOYAGE_API_KEY") and os.environ.get("VOYAGE_AI_MODEL"):
21 |         return VoyageEmbeddings(model=os.environ["VOYAGE_AI_MODEL"])
22 |     return OpenAIEmbeddings(chunk_size=200)
23 | 
24 | 
25 | def get_retriever():
26 |     weaviate_client = weaviate.Client(
27 |         url=WEAVIATE_URL,
28 |         auth_client_secret=weaviate.AuthApiKey(api_key=WEAVIATE_API_KEY),
29 |     )
30 |     weaviate_client = Weaviate(
31 |         client=weaviate_client,
32 |         index_name=WEAVIATE_DOCS_INDEX_NAME,
33 |         text_key="text",
34 |         embedding=get_embeddings_model(),
35 |         by_text=False,
36 |         attributes=["source", "title"],
37 |     )
38 |     return weaviate_client.as_retriever(search_kwargs=dict(k=6))
39 | 
40 | # Retriever
41 | retriever = get_retriever()
42 | 
43 | ## Data model
44 | class code(BaseModel):
45 |     """Code output"""
46 |     prefix: str = Field(description="Description of the problem and approach")
47 |     imports: str = Field(description="Code block import statements")
48 |     code: str = Field(description="Code block not including import statements")
49 | 
50 | ## LLM
51 | model = ChatOpenAI(temperature=0, model="gpt-4-0125-preview", streaming=True)
52 | 
53 | # Tool
54 | code_tool_oai = convert_to_openai_tool(code)
55 | 
56 | # LLM with tool and enforce invocation
57 | llm_with_tool = model.bind(
58 |     tools=[convert_to_openai_tool(code_tool_oai)],
59 |     tool_choice={"type": "function", "function": {"name": "code"}},
60 | )
61 | 
62 | # Parser
63 | parser_tool = PydanticToolsParser(tools=[code])
64 | 
65 | # Create a prompt template with format instructions and the query
66 | prompt = PromptTemplate(
67 |     template = """You are a coding assistant with expertise in LangChain. \n 
68 |     Here is relevant context: 
69 |     \n ------- \n
70 |     {context} 
71 |     \n ------- \n
72 |     Ensure any code you provide can be executed with all required imports and variables defined. \n
73 |     Structure your answer with a description of the code solution. \n
74 |     Then list the imports. And finally list the functioning code block. \n
75 |     Here is the user question: \n --- --- --- \n {question}""",
76 |     input_variables=["question","context"])
77 | 
78 | # Chain
79 | chain = (
80 |     {"context": retriever, "question": RunnablePassthrough()}
81 |     | prompt
82 |     | llm_with_tool
83 |     | parser_tool
84 | )
85 | 
86 | # Add typing for input
87 | class Question(BaseModel):
88 |     __root__: str
89 | 
90 | chain = chain.with_types(input_type=Question)


--------------------------------------------------------------------------------
/app/test_chains/context_stuffing_chain.py:
--------------------------------------------------------------------------------
 1 | from bs4 import BeautifulSoup as Soup
 2 | from langchain_community.document_loaders.recursive_url_loader import RecursiveUrlLoader
 3 | from langchain_openai import ChatOpenAI
 4 | from langchain.prompts import PromptTemplate
 5 | from langchain_core.runnables import RunnablePassthrough
 6 | from langchain_core.pydantic_v1 import BaseModel, Field
 7 | from langchain.output_parsers.openai_tools import PydanticToolsParser
 8 | from langchain_core.utils.function_calling import convert_to_openai_tool
 9 | 
10 | # Load LCEL docs
11 | url = "https://python.langchain.com/docs/expression_language/"
12 | loader = RecursiveUrlLoader(
13 |     url=url, max_depth=20, extractor=lambda x: Soup(x, "html.parser").text
14 | )
15 | docs = loader.load()
16 | 
17 | # LCEL w/ PydanticOutputParser (outside the primary LCEL docs)
18 | url = "https://python.langchain.com/docs/modules/model_io/output_parsers/quick_start"
19 | loader = RecursiveUrlLoader(
20 |     url=url, max_depth=1, extractor=lambda x: Soup(x, "html.parser").text
21 | )
22 | docs_pydantic = loader.load()
23 | 
24 | # LCEL w/ Self Query (outside the primary LCEL docs)
25 | url = "https://python.langchain.com/docs/modules/data_connection/retrievers/self_query/"
26 | loader = RecursiveUrlLoader(
27 |     url=url, max_depth=1, extractor=lambda x: Soup(x, "html.parser").text
28 | )
29 | docs_sq = loader.load()
30 | 
31 | # Add 
32 | docs.extend([*docs_pydantic, *docs_sq])
33 | 
34 | # Sort the list based on the URLs in 'metadata' -> 'source'
35 | d_sorted = sorted(docs, key=lambda x: x.metadata["source"])
36 | d_reversed = list(reversed(d_sorted))
37 | 
38 | # Concatenate the 'page_content' of each sorted dictionary
39 | concatenated_content = "\n\n\n --- \n\n\n".join(
40 |     [doc.page_content for doc in d_reversed]
41 | )
42 | 
43 | ## Data model
44 | class code(BaseModel):
45 |     """Code output"""
46 |     prefix: str = Field(description="Description of the problem and approach")
47 |     imports: str = Field(description="Code block import statements")
48 |     code: str = Field(description="Code block not including import statements")
49 | 
50 | ## LLM
51 | model = ChatOpenAI(temperature=0, model="gpt-4-0125-preview", streaming=True)
52 | 
53 | # Tool
54 | code_tool_oai = convert_to_openai_tool(code)
55 | 
56 | # LLM with tool and enforce invocation
57 | llm_with_tool = model.bind(
58 |     tools=[convert_to_openai_tool(code_tool_oai)],
59 |     tool_choice={"type": "function", "function": {"name": "code"}},
60 | )
61 | 
62 | # Parser
63 | parser_tool = PydanticToolsParser(tools=[code])
64 | 
65 | # Create a prompt template with format instructions and the query
66 | prompt = PromptTemplate(
67 |     template = """You are a coding assistant with expertise in LCEL, LangChain expression language. \n 
68 |         Here is a full set of LCEL documentation: 
69 |         \n ------- \n
70 |         {context} 
71 |         \n ------- \n
72 |         Answer the user question based on the above provided documentation. \n
73 |         Ensure any code you provide can be executed with all required imports and variables defined. \n
74 |         Structure your answer with a description of the code solution. \n
75 |         Then list the imports. And finally list the functioning code block. \n
76 |         Here is the user question: \n --- --- --- \n {question}""",
77 |     input_variables=["question","context"])
78 | 
79 | chain = (
80 |     {
81 |         "context": lambda x: concatenated_content,
82 |         "question": RunnablePassthrough(),
83 |     }
84 |     | prompt
85 |     | llm_with_tool
86 |     | parser_tool
87 | )
88 | 
89 | # Add typing for input
90 | class Question(BaseModel):
91 |     __root__: str
92 | 
93 | 
94 | chain = chain.with_types(input_type=Question)


--------------------------------------------------------------------------------
/app/test_chains/langgraph.py:
--------------------------------------------------------------------------------
  1 | from operator import itemgetter
  2 | from bs4 import BeautifulSoup as Soup
  3 | from langchain_community.document_loaders.recursive_url_loader import RecursiveUrlLoader
  4 | from langchain_openai import ChatOpenAI
  5 | from langchain.prompts import PromptTemplate
  6 | from langchain_core.output_parsers import StrOutputParser
  7 | from langchain_core.runnables import RunnablePassthrough
  8 | from langchain_core.pydantic_v1 import BaseModel, Field
  9 | from langchain.output_parsers import PydanticOutputParser
 10 | from langchain.output_parsers.openai_tools import PydanticToolsParser
 11 | from langchain_core.utils.function_calling import convert_to_openai_tool
 12 | from langgraph.graph import END, StateGraph
 13 | 
 14 | from typing import Dict, TypedDict
 15 | 
 16 | from langchain_core.messages import BaseMessage
 17 | 
 18 | 
 19 | class GraphState(TypedDict):
 20 |     """
 21 |     Represents the state of our graph.
 22 | 
 23 |     Attributes:
 24 |         keys: A dictionary where each key is a string.
 25 |     """
 26 | 
 27 |     keys: Dict[str, any]
 28 | 
 29 | 
 30 | def generate(state):
 31 |     """
 32 |     Generate a code solution based on LCEL docs and the input question 
 33 |     with optional feedback from code execution tests 
 34 | 
 35 |     Args:
 36 |         state (dict): The current graph state
 37 | 
 38 |     Returns:
 39 |         state (dict): New key added to state, documents, that contains retrieved documents
 40 |     """
 41 |     
 42 |     ## State
 43 |     state_dict = state["keys"]
 44 |     question = state_dict["question"]
 45 |     
 46 |     ## Context 
 47 |     # LCEL docs
 48 |     url = "https://python.langchain.com/docs/expression_language/"
 49 |     loader = RecursiveUrlLoader(
 50 |         url=url, max_depth=20, extractor=lambda x: Soup(x, "html.parser").text
 51 |     )
 52 |     docs = loader.load()
 53 |     
 54 |     # LCEL w/ PydanticOutputParser (outside the primary LCEL docs)
 55 |     url = "https://python.langchain.com/docs/modules/model_io/output_parsers/quick_start"
 56 |     loader = RecursiveUrlLoader(
 57 |         url=url, max_depth=1, extractor=lambda x: Soup(x, "html.parser").text
 58 |     )
 59 |     docs_pydantic = loader.load()
 60 |     
 61 |     # LCEL w/ Self Query (outside the primary LCEL docs)
 62 |     url = "https://python.langchain.com/docs/modules/data_connection/retrievers/self_query/"
 63 |     loader = RecursiveUrlLoader(
 64 |         url=url, max_depth=1, extractor=lambda x: Soup(x, "html.parser").text
 65 |     )
 66 |     docs_sq = loader.load()
 67 |     
 68 |     # Add 
 69 |     docs.extend([*docs_pydantic, *docs_sq])
 70 |     
 71 |     # Sort the list based on the URLs in 'metadata' -> 'source'
 72 |     d_sorted = sorted(docs, key=lambda x: x.metadata["source"])
 73 |     d_reversed = list(reversed(d_sorted))
 74 |     
 75 |     # Concatenate the 'page_content' of each sorted dictionary
 76 |     concatenated_content = "\n\n\n --- \n\n\n".join(
 77 |         [doc.page_content for doc in d_reversed]
 78 |     )
 79 |     
 80 |     ## Data model
 81 |     class code(BaseModel):
 82 |         """Code output"""
 83 |         prefix: str = Field(description="Description of the problem and approach")
 84 |         imports: str = Field(description="Code block import statements")
 85 |         code: str = Field(description="Code block not including import statements")
 86 |     
 87 |     ## LLM
 88 |     model = ChatOpenAI(temperature=0, model="gpt-4-0125-preview", streaming=True)
 89 |     
 90 |     # Tool
 91 |     code_tool_oai = convert_to_openai_tool(code)
 92 |     
 93 |     # LLM with tool and enforce invocation
 94 |     llm_with_tool = model.bind(
 95 |         tools=[convert_to_openai_tool(code_tool_oai)],
 96 |         tool_choice={"type": "function", "function": {"name": "code"}},
 97 |     )
 98 |     
 99 |     # Parser
100 |     parser_tool = PydanticToolsParser(tools=[code])
101 |     
102 |     ## Prompt
103 |     template = """You are a coding assistant with expertise in LCEL, LangChain expression language. \n 
104 |         Here is a full set of LCEL documentation: 
105 |         \n ------- \n
106 |         {context} 
107 |         \n ------- \n
108 |         Answer the user question based on the above provided documentation. \n
109 |         Ensure any code you provide can be executed with all required imports and variables defined. \n
110 |         Structure your answer with a description of the code solution. \n
111 |         Then list the imports. And finally list the functioning code block. \n
112 |         Here is the user question: \n --- --- --- \n {question}"""
113 | 
114 |     ## Generation
115 |     if "error" in state_dict:
116 |         print("---RE-GENERATE SOLUTION w/ ERROR FEEDBACK---")
117 |         
118 |         error = state_dict["error"]
119 |         code_solution = state_dict["generation"]
120 |         
121 |         # Udpate prompt 
122 |         addendum = """  \n --- --- --- \n You previously tried to solve this problem. \n Here is your solution:  
123 |                     \n --- --- --- \n {generation}  \n --- --- --- \n  Here is the resulting error from code 
124 |                     execution:  \n --- --- --- \n {error}  \n --- --- --- \n Please re-try to answer this. 
125 |                     Structure your answer with a description of the code solution. \n Then list the imports. 
126 |                     And finally list the functioning code block. Structure your answer with a description of 
127 |                     the code solution. \n Then list the imports. And finally list the functioning code block. 
128 |                     \n Here is the user question: \n --- --- --- \n {question}"""
129 |         template = template +  addendum
130 | 
131 |         # Prompt 
132 |         prompt = PromptTemplate(
133 |             template=template,
134 |             input_variables=["context", "question", "generation", "error"],
135 |         )
136 |         
137 |         # Chain
138 |         chain = (
139 |             {
140 |                 "context": lambda x: concatenated_content,
141 |                 "question": itemgetter("question"),
142 |                 "generation": itemgetter("generation"),
143 |                 "error": itemgetter("error"),
144 |             }
145 |             | prompt
146 |             | llm_with_tool 
147 |             | parser_tool
148 |         )
149 | 
150 |         code_solution = chain.invoke({"question":question,
151 |                                       "generation":str(code_solution[0]),
152 |                                       "error":error})
153 |                 
154 |     else:
155 |         print("---GENERATE SOLUTION---")
156 |         
157 |         # Prompt 
158 |         prompt = PromptTemplate(
159 |             template=template,
160 |             input_variables=["context", "question"],
161 |         )
162 | 
163 |         # Chain
164 |         chain = (
165 |             {
166 |                 "context": lambda x: concatenated_content,
167 |                 "question": itemgetter("question"),
168 |             }
169 |             | prompt
170 |             | llm_with_tool 
171 |             | parser_tool
172 |         )
173 | 
174 |         code_solution = chain.invoke({"question":question})
175 |     
176 |     return {"keys": {"generation": code_solution, "question": question}}
177 | 
178 | def check_code_imports(state):
179 |     """
180 |     Check imports
181 | 
182 |     Args:
183 |         state (dict): The current graph state
184 | 
185 |     Returns:
186 |         state (dict): New key added to state, error
187 |     """
188 |     
189 |     ## State
190 |     print("---CHECKING CODE IMPORTS---")
191 |     state_dict = state["keys"]
192 |     question = state_dict["question"]
193 |     code_solution = state_dict["generation"]
194 |     imports = code_solution[0].imports
195 | 
196 |     try:        
197 |         # Attempt to execute the imports
198 |         exec(imports)
199 |     except Exception as e:
200 |         print("---CODE IMPORT CHECK: FAILED---")
201 |         # Catch any error during execution (e.g., ImportError, SyntaxError)
202 |         error = f"Execution error: {e}"
203 |         if "error" in state_dict:
204 |             error_prev_runs = state_dict["error"]
205 |             error = error_prev_runs + "\n --- Most recent run error --- \n" + error     
206 |     else:
207 |         print("---CODE IMPORT CHECK: SUCCESS---")
208 |         # No errors occurred
209 |         error = "None"
210 | 
211 |     return {"keys": {"generation": code_solution, "question": question, "error": error}}
212 | 
213 | def check_code_execution(state):
214 |     """
215 |     Check code block execution
216 | 
217 |     Args:
218 |         state (dict): The current graph state
219 | 
220 |     Returns:
221 |         state (dict): New key added to state, error
222 |     """
223 |     
224 |     ## State
225 |     print("---CHECKING CODE EXECUTION---")
226 |     state_dict = state["keys"]
227 |     question = state_dict["question"]
228 |     code_solution = state_dict["generation"]
229 |     imports = code_solution[0].imports
230 |     code = code_solution[0].code
231 |     code_block = imports +"\n"+ code
232 | 
233 |     try:        
234 |         # Attempt to execute the code block
235 |         exec(code_block)
236 |     except Exception as e:
237 |         print("---CODE BLOCK CHECK: FAILED---")
238 |         # Catch any error during execution (e.g., ImportError, SyntaxError)
239 |         error = f"Execution error: {e}"
240 |         if "error" in state_dict:
241 |             error_prev_runs = state_dict["error"]
242 |             error = error_prev_runs + "\n --- Most recent run error --- \n" + error  
243 |     else:
244 |         print("---CODE BLOCK CHECK: SUCCESS---")
245 |         # No errors occurred
246 |         error = "None"
247 | 
248 |     return {"keys": {"generation": code_solution, "question": question, "error": error}}
249 | 
250 | 
251 | ### Edges
252 | 
253 | def decide_to_check_code_exec(state):
254 |     """
255 |     Determines whether to test code execution, or re-try answer generation.
256 | 
257 |     Args:
258 |         state (dict): The current state of the agent, including all keys.
259 | 
260 |     Returns:
261 |         str: Next node to call
262 |     """
263 | 
264 |     print("---DECIDE TO TEST CODE EXECUTION---")
265 |     state_dict = state["keys"]
266 |     question = state_dict["question"]
267 |     code_solution = state_dict["generation"]
268 |     error = state_dict["error"]
269 | 
270 |     if error == "None":
271 |         # All documents have been filtered check_relevance
272 |         # We will re-generate a new query
273 |         print("---DECISION: TEST CODE EXECUTION---")
274 |         return "check_code_execution"
275 |     else:
276 |         # We have relevant documents, so generate answer
277 |         print("---DECISION: RE-TRY SOLUTION---")
278 |         return "generate"
279 | 
280 | def decide_to_finish(state):
281 |     """
282 |     Determines whether to finish.
283 | 
284 |     Args:
285 |         state (dict): The current state of the agent, including all keys.
286 | 
287 |     Returns:
288 |         str: Next node to call
289 |     """
290 | 
291 |     print("---DECIDE TO TEST CODE EXECUTION---")
292 |     state_dict = state["keys"]
293 |     question = state_dict["question"]
294 |     code_solution = state_dict["generation"]
295 |     error = state_dict["error"]
296 | 
297 |     if error == "None":
298 |         # All documents have been filtered check_relevance
299 |         # We will re-generate a new query
300 |         print("---DECISION: TEST CODE EXECUTION---")
301 |         return "end"
302 |     else:
303 |         # We have relevant documents, so generate answer
304 |         print("---DECISION: RE-TRY SOLUTION---")
305 |         return "generate"
306 |     
307 | # Flow
308 | workflow = StateGraph(GraphState)
309 | 
310 | # Define the nodes
311 | workflow.add_node("generate", generate)  # generation solution
312 | workflow.add_node("check_code_imports", check_code_imports)  # check imports
313 | workflow.add_node("check_code_execution", check_code_execution)  # check execution
314 | 
315 | # Build graph
316 | workflow.set_entry_point("generate")
317 | workflow.add_edge("generate", "check_code_imports")
318 | workflow.add_conditional_edges(
319 |     "check_code_imports",
320 |     decide_to_check_code_exec,
321 |     {
322 |         "check_code_execution": "check_code_execution",
323 |         "generate": "generate",
324 |     },
325 | )
326 | workflow.add_conditional_edges(
327 |     "check_code_execution",
328 |     decide_to_finish,
329 |     {
330 |         "end": END,
331 |         "generate": "generate",
332 |     },
333 | )
334 | 
335 | # Compile
336 | app = workflow.compile()


--------------------------------------------------------------------------------
/app/test_chains/multi_query_chain.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from operator import itemgetter
  3 | from typing import Dict, List, Optional, Sequence
  4 | 
  5 | import weaviate
  6 | from langchain import hub
  7 | from langchain_openai import ChatOpenAI
  8 | from langchain.embeddings.openai import OpenAIEmbeddings
  9 | from langchain.embeddings.voyageai import VoyageEmbeddings
 10 | from langchain.output_parsers.json import SimpleJsonOutputParser
 11 | from langchain.prompts import ChatPromptTemplate, PromptTemplate
 12 | from langchain.schema import Document
 13 | from langchain.schema.embeddings import Embeddings
 14 | from langchain.schema.language_model import BaseLanguageModel
 15 | from langchain.schema.messages import AIMessage, HumanMessage
 16 | from langchain.schema.output_parser import StrOutputParser
 17 | from langchain.schema.retriever import BaseRetriever
 18 | from langchain.schema.runnable import (
 19 |     Runnable,
 20 |     RunnableBranch,
 21 |     RunnableLambda,
 22 |     RunnableMap,
 23 | )
 24 | from langchain.vectorstores import Weaviate
 25 | from langchain_core.runnables import RunnablePassthrough
 26 | from langchain_core.pydantic_v1 import BaseModel, Field
 27 | from langchain.output_parsers.openai_tools import PydanticToolsParser
 28 | from langchain_core.utils.function_calling import convert_to_openai_tool
 29 | 
 30 | # Prompts
 31 | from .prompts import REPHRASE_TEMPLATE, RESPONSE_TEMPLATE
 32 | 
 33 | # Keys
 34 | WEAVIATE_URL = os.environ["WEAVIATE_URL"]
 35 | WEAVIATE_API_KEY = os.environ["WEAVIATE_API_KEY"]
 36 | WEAVIATE_DOCS_INDEX_NAME = "LangChain_agent_docs"
 37 | 
 38 | 
 39 | # Define the data structure for chat requests
 40 | class ChatRequest(BaseModel):
 41 |     question: str  # The question asked in the chat
 42 |     chat_history: Optional[List[Dict[str, str]]]  # Optional chat history
 43 | 
 44 | 
 45 | # Function to get the embeddings model based on environment variables
 46 | def get_embeddings_model() -> Embeddings:
 47 |     # Check for specific environment variables to determine the embeddings model
 48 |     if os.environ.get("VOYAGE_API_KEY") and os.environ.get("VOYAGE_AI_MODEL"):
 49 |         return VoyageEmbeddings(model=os.environ["VOYAGE_AI_MODEL"])
 50 |     # Default to OpenAI embeddings if the specific environment variables are not set
 51 |     return OpenAIEmbeddings(chunk_size=200)
 52 | 
 53 | 
 54 | # Function to initialize and return the retriever
 55 | def get_retriever() -> BaseRetriever:
 56 |     # Initialize Weaviate client with authentication and connection details
 57 |     weaviate_client = weaviate.Client(
 58 |         url=WEAVIATE_URL,
 59 |         auth_client_secret=weaviate.AuthApiKey(api_key=WEAVIATE_API_KEY),
 60 |     )
 61 |     # Configure the Weaviate client with specific settings
 62 |     weaviate_client = Weaviate(
 63 |         client=weaviate_client,
 64 |         index_name=WEAVIATE_DOCS_INDEX_NAME,
 65 |         text_key="text",
 66 |         embedding=get_embeddings_model(),
 67 |         by_text=False,
 68 |         attributes=["source", "title"],
 69 |     )
 70 |     # Return the configured retriever
 71 |     return weaviate_client.as_retriever(search_kwargs=dict(k=6))
 72 | 
 73 | 
 74 | # Function to create a chain of retrievers
 75 | def create_retriever_chain(
 76 |     llm: BaseLanguageModel, retriever: BaseRetriever
 77 | ) -> Runnable:
 78 |     # Template to condense the question
 79 |     CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(REPHRASE_TEMPLATE)
 80 |     # Create a chain to process the question and retrieve relevant information
 81 |     condense_question_chain = (
 82 |         CONDENSE_QUESTION_PROMPT | llm | StrOutputParser()
 83 |     ).with_config(
 84 |         run_name="CondenseQuestion",
 85 |     )
 86 |     conversation_chain = condense_question_chain | retriever
 87 |     # Return a branch of runnables depending on whether there's chat history
 88 |     return RunnableBranch(
 89 |         (
 90 |             RunnableLambda(lambda x: bool(x.get("chat_history"))).with_config(
 91 |                 run_name="HasChatHistoryCheck"
 92 |             ),
 93 |             conversation_chain.with_config(run_name="RetrievalChainWithHistory"),
 94 |         ),
 95 |         (
 96 |             RunnableLambda(itemgetter("question")).with_config(
 97 |                 run_name="Itemgetter:question"
 98 |             )
 99 |             | retriever
100 |         ).with_config(run_name="RetrievalChainWithNoHistory"),
101 |     ).with_config(run_name="RouteDependingOnChatHistory")
102 | 
103 | 
104 | # Function to format the retrieved documents
105 | def format_docs(docs: Sequence[Document]) -> str:
106 |     formatted_docs = []
107 |     # Iterate through each document and format it
108 |     for i, doc in enumerate(docs):
109 |         doc_string = f"<doc id='{i}'>{doc.page_content}</doc>"
110 |         formatted_docs.append(doc_string)
111 |     return "\n".join(formatted_docs)
112 | 
113 | 
114 | # Function to serialize the chat history from a chat request
115 | def serialize_history(request: ChatRequest):
116 |     chat_history = request["chat_history"] or []
117 |     converted_chat_history = []
118 |     # Convert each message in the chat history to the appropriate message type
119 |     for message in chat_history:
120 |         if message.get("human") is not None:
121 |             converted_chat_history.append(HumanMessage(content=message["human"]))
122 |         if message.get("ai") is not None:
123 |             converted_chat_history.append(AIMessage(content=message["ai"]))
124 |     return converted_chat_history
125 | 
126 | 
127 | # Function to create the answer chain
128 | def create_question_anwser_chain(
129 |     llm: BaseLanguageModel,
130 |     retriever: BaseRetriever,
131 | ) -> Runnable:
132 |     # Create a retriever chain and configure it
133 |     retriever_chain = create_retriever_chain(
134 |         llm,
135 |         retriever,
136 |     ).with_config(run_name="FindDocs")
137 |     _context = RunnableMap(
138 |         {
139 |             "context": retriever_chain | format_docs,
140 |             "question": itemgetter("question"),
141 |         }
142 |     ).with_config(run_name="RetrieveDocs")
143 |     # Define the chat prompt template
144 |     prompt = ChatPromptTemplate.from_messages(
145 |         [
146 |             ("system", RESPONSE_TEMPLATE),
147 |             ("human", "{question}"),
148 |         ]
149 |     )
150 | 
151 |     # Create a response synthesizer using the defined prompt
152 |     response_synthesizer = (prompt | llm | StrOutputParser()).with_config(
153 |         run_name="GenerateResponse",
154 |     )
155 |     # Return the final chain of processes
156 |     return (
157 |         {
158 |             "question": RunnableLambda(itemgetter("question")).with_config(
159 |                 run_name="Itemgetter:question"
160 |             ),
161 |         }
162 |         | _context
163 |         | response_synthesizer
164 |     )
165 | 
166 | 
167 | # Retriever
168 | retriever = get_retriever()
169 | 
170 | # Sub-question prompt
171 | sub_question_prompt = hub.pull("hwchase17/code-langchain-sub-question")
172 | 
173 | # Chain for sub-question generation
174 | sub_question_chain = (
175 |     RunnablePassthrough.assign(context=(lambda x: x["question"]) | retriever)
176 |     | sub_question_prompt
177 |     | ChatOpenAI(model="gpt-4-1106-preview")
178 |     | SimpleJsonOutputParser()
179 | )
180 | 
181 | # LLM
182 | llm = ChatOpenAI(
183 |     model="gpt-3.5-turbo-16k",
184 |     streaming=True,
185 |     temperature=0,
186 | )
187 | 
188 | # Chain that answers questions
189 | answer_chain = create_question_anwser_chain(
190 |     llm,
191 |     retriever,
192 | )
193 | 
194 | # Chain for sub-question answering
195 | sub_question_answer_chain = (
196 |     sub_question_chain
197 |     | (lambda x: [{"question": v} for v in x])
198 |     | RunnablePassthrough.assign(answer=answer_chain).map()
199 | )
200 | 
201 | ## Data model
202 | class code(BaseModel):
203 |     """Code output"""
204 |     prefix: str = Field(description="Description of the problem and approach")
205 |     imports: str = Field(description="Code block import statements")
206 |     code: str = Field(description="Code block not including import statements")
207 | 
208 | ## LLM
209 | model = ChatOpenAI(temperature=0, model="gpt-4-0125-preview", streaming=True)
210 | 
211 | # Tool
212 | code_tool_oai = convert_to_openai_tool(code)
213 | 
214 | # LLM with tool and enforce invocation
215 | llm_with_tool = model.bind(
216 |     tools=[convert_to_openai_tool(code_tool_oai)],
217 |     tool_choice={"type": "function", "function": {"name": "code"}},
218 | )
219 | 
220 | # Parser
221 | parser_tool = PydanticToolsParser(tools=[code])
222 | 
223 | # Create a prompt template with format instructions and the query
224 | prompt = PromptTemplate(
225 |     template = """You are an expert coder. You got a high level question:
226 | 
227 |     <question>
228 |     {question}
229 |     </question>
230 | 
231 |     Based on this question, you broke it down into sub questions and answered those. These are the results of that:
232 | 
233 |     <subquestions>
234 |     {subq}
235 |     </subquestions>
236 |         
237 |     Ensure any code you provide can be executed with all required imports and variables defined. \n
238 |     Structure your answer with a description of the code solution. \n
239 |     Then list the imports. And finally list the functioning code block. \n
240 |     """,
241 |     input_variables=["question","subq"],
242 | )
243 | 
244 | # Answer chain
245 | chain = (
246 |     RunnablePassthrough().assign(
247 |         subq=sub_question_answer_chain
248 |         | (
249 |             lambda sub_questions_answers: "\n\n".join(
250 |                 [
251 |                     f"Question: {q['question']}\n\nAnswer: {q['answer']}"
252 |                     for q in sub_questions_answers
253 |                 ]
254 |             )
255 |         )
256 |     )
257 |     | prompt
258 |     | llm_with_tool
259 |     | parser_tool
260 | )
261 | 
262 | # Add typing for input
263 | class Question(BaseModel):
264 |     __root__: str
265 | 
266 | chain = chain.with_types(input_type=Question)


--------------------------------------------------------------------------------
/app/test_chains/prompts.py:
--------------------------------------------------------------------------------
 1 | # Answer template
 2 | RESPONSE_TEMPLATE = """\
 3 | You are an expert programmer and problem-solver, tasked with answering any question \
 4 | about Langchain.
 5 | 
 6 | Generate a comprehensive and informative answer of 80 words or less for the \
 7 | given question based solely on the provided search results (URL and content). You must \
 8 | only use information from the provided search results. Use an unbiased and \
 9 | journalistic tone. Combine search results together into a coherent answer. Do not \
10 | repeat text. Cite search results using [${{number}}] notation. Only cite the most \
11 | relevant results that answer the question accurately. Place these citations at the end \
12 | of the sentence or paragraph that reference them - do not put them all at the end. If \
13 | different results refer to different entities within the same name, write separate \
14 | answers for each entity.
15 | 
16 | You should use bullet points in your answer for readability. Put citations where they apply
17 | rather than putting them all at the end.
18 | 
19 | If there is nothing in the context relevant to the question at hand, just say "Hmm, \
20 | I'm not sure." Don't try to make up an answer.
21 | 
22 | Anything between the following `context`  html blocks is retrieved from a knowledge \
23 | bank, not part of the conversation with the user. 
24 | 
25 | <context>
26 |     {context} 
27 | <context/>
28 | 
29 | REMEMBER: If there is no relevant information within the context, just say "Hmm, I'm \
30 | not sure." Don't try to make up an answer. Anything between the preceding 'context' \
31 | html blocks is retrieved from a knowledge bank, not part of the conversation with the \
32 | user.\
33 | """
34 | 
35 | # Stand-alone question template
36 | REPHRASE_TEMPLATE = """\
37 | Given the following conversation and a follow up question, rephrase the follow up \
38 | question to be a standalone question.
39 | 
40 | Chat History:
41 | {chat_history}
42 | Follow Up Input: {question}
43 | Standalone Question:"""
44 | 


--------------------------------------------------------------------------------
/eval/build_eval_set.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "fc4d2f37-d4af-4076-a14f-a004af5fc7da",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "## Get LCEL-related questions from `chat-langchain`\n",
  9 |     "\n",
 10 |     "We use this to extract chat history from `chat-langchain`.\n",
 11 |     "\n",
 12 |     "We will use this to get LCEL related questions.\n",
 13 |     "\n",
 14 |     "See [here](https://raw.githubusercontent.com/hinthornw/lspopscripts/main/download_runs.py) if you want code the get full traces.\n",
 15 |     "\n",
 16 |     "Set the correct `LANGCHAIN_API_KEY` for `chat-langchain`."
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "markdown",
 21 |    "id": "c2adea52-baea-4634-a692-f49a1df571c7",
 22 |    "metadata": {},
 23 |    "source": [
 24 |     "## Get Questions\n"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": 24,
 30 |    "id": "052a5d8c-23cb-47ea-8a52-9c030c3c6a74",
 31 |    "metadata": {},
 32 |    "outputs": [
 33 |     {
 34 |      "name": "stderr",
 35 |      "output_type": "stream",
 36 |      "text": [
 37 |       "152090it [42:48, 59.22it/s]"
 38 |      ]
 39 |     },
 40 |     {
 41 |      "name": "stdout",
 42 |      "output_type": "stream",
 43 |      "text": [
 44 |       "Saved to fetched_data.csv\n"
 45 |      ]
 46 |     },
 47 |     {
 48 |      "name": "stderr",
 49 |      "output_type": "stream",
 50 |      "text": [
 51 |       "\n"
 52 |      ]
 53 |     }
 54 |    ],
 55 |    "source": [
 56 |     "import datetime\n",
 57 |     "import csv\n",
 58 |     "from concurrent.futures import ThreadPoolExecutor, as_completed\n",
 59 |     "from itertools import islice\n",
 60 |     "\n",
 61 |     "import langsmith\n",
 62 |     "from tqdm import tqdm\n",
 63 |     "\n",
 64 |     "client = langsmith.Client()\n",
 65 |     "\n",
 66 |     "def download_data(\n",
 67 |     "    project_name: str,\n",
 68 |     "    nested: bool = False,\n",
 69 |     "    since: datetime.datetime = yesterday,\n",
 70 |     "    exclude_followups: bool = True,\n",
 71 |     "    filename: str = \"fetched_data.csv\",\n",
 72 |     "):\n",
 73 |     "    \"\"\"\n",
 74 |     "    Downloads and saves data from Langsmith runs to a CSV file.\n",
 75 |     "\n",
 76 |     "    This function retrieves run data from the Langsmith project specified by 'project_name'.\n",
 77 |     "    It extracts 'question' and 'output' from each run's inputs and outputs, respectively,\n",
 78 |     "    and saves them into a CSV file. The function can handle both nested and non-nested runs.\n",
 79 |     "    Follow-up runs can be excluded if desired.\n",
 80 |     "\n",
 81 |     "    Parameters:\n",
 82 |     "    project_name (str): The name of the Langsmith project to retrieve data from.\n",
 83 |     "    nested (bool): Set to True to handle nested runs; False by default.\n",
 84 |     "    since (datetime): The start time from which to retrieve runs; defaults to yesterday.\n",
 85 |     "    exclude_followups (bool): Set to True to exclude follow-up runs; True by default.\n",
 86 |     "    filename (str): The name of the file to save the data to; defaults to 'fetched_data.csv'.\n",
 87 |     "    \"\"\"\n",
 88 |     "    traces = client.list_runs(\n",
 89 |     "        project_name=project_name, start_time=since, execution_order=1\n",
 90 |     "    )\n",
 91 |     "    batch_size = 10\n",
 92 |     "    executor = ThreadPoolExecutor(max_workers=batch_size) if nested else None\n",
 93 |     "\n",
 94 |     "    with open(filename, 'w', newline='', encoding='utf-8') as file_handle:\n",
 95 |     "        csv_writer = csv.writer(file_handle)\n",
 96 |     "        # Write the header\n",
 97 |     "        csv_writer.writerow(['question', 'output'])\n",
 98 |     "\n",
 99 |     "        try:\n",
100 |     "            if nested:\n",
101 |     "                pbar = tqdm()\n",
102 |     "                while True:\n",
103 |     "                    batch = list(islice(traces, batch_size))\n",
104 |     "                    if not batch:\n",
105 |     "                        break\n",
106 |     "                    futures = [\n",
107 |     "                        executor.submit(client.read_run, run.id, load_child_runs=True)\n",
108 |     "                        for run in batch\n",
109 |     "                    ]\n",
110 |     "                    for future in as_completed(futures):\n",
111 |     "                        loaded_run = future.result()\n",
112 |     "                        loaded_run_json=loaded_run.json()\n",
113 |     "                        loaded_run_json = json.loads(loaded_run_json)\n",
114 |     "                        question = loaded_run_json['inputs'].get('question', '')\n",
115 |     "                        output = loaded_run_json['outputs'].get('output', '')\n",
116 |     "                        csv_writer.writerow([question, output])\n",
117 |     "                    pbar.update(len(batch))\n",
118 |     "            else:\n",
119 |     "                for run in tqdm(traces):\n",
120 |     "                    if exclude_followups and run.inputs.get(\"chat_history\"):\n",
121 |     "                        continue\n",
122 |     "                    run_json = run.json()\n",
123 |     "                    run_json = json.loads(run_json)\n",
124 |     "                    question = run_json['inputs'].get('question', '')\n",
125 |     "                    output = run_json['outputs'].get('output', '')\n",
126 |     "                    csv_writer.writerow([question, output])\n",
127 |     "\n",
128 |     "        finally:\n",
129 |     "            if executor:\n",
130 |     "                executor.shutdown()\n",
131 |     "    \n",
132 |     "    print(f\"Saved to {filename}\")\n",
133 |     "\n",
134 |     "# Call the function\n",
135 |     "yesterday = datetime.datetime.now() - datetime.timedelta(days=1)\n",
136 |     "window_30_day = datetime.datetime.now() - datetime.timedelta(days=30)\n",
137 |     "download_data(project_name=\"chat-langchain\",\n",
138 |     "              since=window_30_day)"
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "markdown",
143 |    "id": "a9592ccc-5776-4e1d-b062-f3264929e023",
144 |    "metadata": {},
145 |    "source": [
146 |     "## Read Extracted QA Pairs"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "code",
151 |    "execution_count": 1,
152 |    "id": "c3406211-321a-40ff-8c9d-9ba48d83da23",
153 |    "metadata": {},
154 |    "outputs": [],
155 |    "source": [
156 |     "import pandas as pd\n",
157 |     "filename = 'fetched_data.csv'\n",
158 |     "df = pd.read_csv(filename)"
159 |    ]
160 |   },
161 |   {
162 |    "cell_type": "markdown",
163 |    "id": "68f2e2d3-b2b3-45cd-a253-c48e19936ff7",
164 |    "metadata": {},
165 |    "source": [
166 |     "## Filter for LCEL in the question"
167 |    ]
168 |   },
169 |   {
170 |    "cell_type": "code",
171 |    "execution_count": 3,
172 |    "id": "93d4415c-e916-4b5c-9fd0-8c86a034675c",
173 |    "metadata": {},
174 |    "outputs": [],
175 |    "source": [
176 |     "from langchain.schema import Document\n",
177 |     "\n",
178 |     "search_term = 'LCEL'\n",
179 |     "filtered_df = df[df['question'].str.contains(search_term, case=False, na=False)]\n",
180 |     "\n",
181 |     "# Group by unique instances of 'question' and then reset index\n",
182 |     "unique_questions_df = filtered_df.drop_duplicates(subset='question')\n",
183 |     "\n",
184 |     "# Extract the 'question' column and convert it to a list\n",
185 |     "unique_questions = unique_questions_df['question'].tolist()"
186 |    ]
187 |   },
188 |   {
189 |    "cell_type": "markdown",
190 |    "id": "f037e6e0-6f04-41a1-971f-7843bcb36d51",
191 |    "metadata": {},
192 |    "source": [
193 |     "## Cluster\n",
194 |     "\n",
195 |     "Some of the questions are highly verbose and contain large code blocks.\n",
196 |     "\n",
197 |     "Let's try to cluster so these types are questions are grouped (and can be most easily ignored)."
198 |    ]
199 |   },
200 |   {
201 |    "cell_type": "code",
202 |    "execution_count": null,
203 |    "id": "7d3a67c4-c33e-42d4-9be7-25694e2b345c",
204 |    "metadata": {},
205 |    "outputs": [],
206 |    "source": [
207 |     "# Embed and cluster \n",
208 |     "\n",
209 |     "from langchain.embeddings.openai import OpenAIEmbeddings\n",
210 |     "embd = OpenAIEmbeddings()\n",
211 |     "question_embeddings = embd.embed_documents(unique_questions)\n",
212 |     "\n",
213 |     "from sklearn.cluster import KMeans\n",
214 |     "clustering_model = KMeans(n_clusters=5, random_state=0)\n",
215 |     "clusters = clustering_model.fit_predict(question_embeddings)\n",
216 |     "unique_questions_df['cluster'] = clusters\n",
217 |     "\n",
218 |     "def fmt_qus(df):\n",
219 |     "\n",
220 |     "    unique_questions = df['question'].tolist()\n",
221 |     "    formatted_unique_questions = '--- --- \\n --- --- '.join(unique_questions)\n",
222 |     "    return formatted_unique_questions\n",
223 |     "\n",
224 |     "# Get unique values in the 'cluster' column\n",
225 |     "all_clusters = unique_questions_df['cluster'].unique()\n",
226 |     "\n",
227 |     "# Process each cluster\n",
228 |     "cluster_context=[]\n",
229 |     "for i in all_clusters:\n",
230 |     "    df_cluster = unique_questions_df[unique_questions_df['cluster'] == i]\n",
231 |     "    formatted_questions = fmt_qus(df_cluster)\n",
232 |     "    cluster_context.append(formatted_questions)"
233 |    ]
234 |   },
235 |   {
236 |    "cell_type": "markdown",
237 |    "id": "7d0884c6-1b3d-49f2-b247-f7e9854b598d",
238 |    "metadata": {},
239 |    "source": [
240 |     "## Summarize\n",
241 |     "\n",
242 |     "Summarize major question themes in each cluster.\n",
243 |     "\n",
244 |     "This isolates lower quality / more vebose questions into its own cluster, limiting pollution of the overall themes."
245 |    ]
246 |   },
247 |   {
248 |    "cell_type": "code",
249 |    "execution_count": null,
250 |    "id": "c8001a4c-c697-4f85-97fc-85c459d7a4fa",
251 |    "metadata": {},
252 |    "outputs": [],
253 |    "source": [
254 |     "from langchain.chat_models import ChatOpenAI\n",
255 |     "from langchain.prompts import ChatPromptTemplate\n",
256 |     "from langchain_core.output_parsers import StrOutputParser\n",
257 |     "\n",
258 |     "# Prompt template\n",
259 |     "template = \"\"\"Here is a set of questions input to LangChain QA system. \\n\n",
260 |     "\n",
261 |     "They are related to LCEL, LangChain Expression Language. \\n\n",
262 |     "\n",
263 |     "Reason about the questions, first. \\n\n",
264 |     "\n",
265 |     "Then, give me a list of the top 10 question themes.\n",
266 |     "\n",
267 |     "Give me one reprentitive question per theme.\n",
268 |     "\n",
269 |     "Questions:\n",
270 |     "{context}\n",
271 |     "\"\"\"\n",
272 |     "prompt = ChatPromptTemplate.from_template(template)\n",
273 |     "model = ChatOpenAI(temperature=0, model=\"gpt-4-1106-preview\")\n",
274 |     "chain = prompt | model | StrOutputParser()\n",
275 |     "\n",
276 |     "answers = []\n",
277 |     "for c in cluster_context:\n",
278 |     "    answers.append(chain.invoke({\"context\":c}))"
279 |    ]
280 |   },
281 |   {
282 |    "cell_type": "markdown",
283 |    "id": "5b14a647-9d0f-4442-9963-566558c45ed8",
284 |    "metadata": {},
285 |    "source": [
286 |     "## Themes\n",
287 |     "\n",
288 |     "We can look at the group summaries in LangSmith.\n",
289 |     "\n",
290 |     "* https://smith.langchain.com/public/69d0b729-cd8c-4d4b-859d-6e5ee683fc7a/r\n",
291 |     "\n",
292 |     "```\n",
293 |     "1. **Basic Understanding of LCEL**\n",
294 |     "   - What is LCEL?\n",
295 |     "\n",
296 |     "2. **LCEL Integration with Agents**\n",
297 |     "   - Can I use agents with LCEL?\n",
298 |     "\n",
299 |     "3. **LCEL Coding and Implementation Examples**\n",
300 |     "   - Code me a question answering example with LCEL.\n",
301 |     "\n",
302 |     "4. **LCEL with Memory and Storage**\n",
303 |     "   - How to use VectorStoreRetrieverMemory in LCEL?\n",
304 |     "\n",
305 |     "5. **LCEL Configuration and Settings**\n",
306 |     "   - How to set verbose true for LCEL?\n",
307 |     "\n",
308 |     "6. **LCEL with Retrieval-Augmented Generation (RAG)**\n",
309 |     "   - Can you give me an example to run a simple RAG using LCEL in Python?\n",
310 |     "\n",
311 |     "7. **LCEL Asynchronous Operations**\n",
312 |     "   - LCEL 异步invoke (LCEL asynchronous invoke)\n",
313 |     "\n",
314 |     "8. **LCEL Error Handling and Debugging**\n",
315 |     "   - How can I get the finish reason using LCEL?\n",
316 |     "\n",
317 |     "9. **LCEL with Multiple Inputs and Variables**\n",
318 |     "   - How to use multiple partial variables in LCEL?\n",
319 |     "\n",
320 |     "10. **LCEL Advanced Features and Customization**\n",
321 |     "   - Can LCEL execute custom python functions?\n",
322 |     "```"
323 |    ]
324 |   },
325 |   {
326 |    "cell_type": "markdown",
327 |    "id": "42c0186a-3336-4297-b5d7-812f4be2c5fb",
328 |    "metadata": {},
329 |    "source": [
330 |     "* https://smith.langchain.com/public/b3aba7b6-e877-4d99-bede-e607138fe171/r\n",
331 |     "\n",
332 |     "```\n",
333 |     "1. **Parallel and Asynchronous Execution**: Questions about running multiple chains in parallel or asynchronously.\n",
334 |     "   - Representative question: \"I want to run three chains in parallel. They share the same input variables, but produce different output objects. How do I do this with LCEL?\"\n",
335 |     "\n",
336 |     "2. **Custom Functions and Configurations**: How to include custom functions or add configurable fields to a chain.\n",
337 |     "   - Representative question: \"How to include a custom function as part of an LCEL chain?\"\n",
338 |     "\n",
339 |     "3. **Memory Management**: Questions about how memory is handled within chains, including buffer memory and conversation memory.\n",
340 |     "   - Representative question: \"I have a LCEL chain with e.g. buffer memory, and I serve it via Langserve. When is the memory reset? Do all API calls use the same memory under the hood?\"\n",
341 |     "\n",
342 |     "4. **Chain Composition and Modularity**: How to compose chains from multiple components or steps, and how to pass data between them.\n",
343 |     "   - Representative question: \"How can I connect several chains, i.e. the output of the former chain is the input of the latter chain? Can I achieve this through LCEL?\"\n",
344 |     "\n",
345 |     "5. **Error Handling and Retries**: How to handle errors and implement retries within a chain.\n",
346 |     "   - Representative question: \"How can I use a RetryWithErrorOutputParser in a LCEL chain?\"\n",
347 |     "\n",
348 |     "6. **Retrieval and Querying**: Questions about setting up retrieval chains, including those with specific querying capabilities.\n",
349 |     "   - Representative question: \"How to create a Retrieval QA chain with streaming, using LCEL?\"\n",
350 |     "\n",
351 |     "7. **Verbose and Debugging**: How to enable verbose output or debugging within a chain.\n",
352 |     "   - Representative question: \"How to set verbose True in LangChain Expression Language (LCEL)?\"\n",
353 |     "\n",
354 |     "8. **Integration with External Services**: Questions about integrating LCEL chains with external services or databases.\n",
355 |     "   - Representative question: \"I need a LCEL chain that takes a YouTube link and transcribes it with Whisper.\"\n",
356 |     "\n",
357 |     "9. **Chain Customization and Enhancement**: How to enhance chains with additional features like callbacks, custom parsers, or specific output formatting.\n",
358 |     "   - Representative question: \"How do I pass a pre-written history variable into my LCEL chain?\"\n",
359 |     "\n",
360 |     "10. **Understanding LCEL Fundamentals**: Basic questions about what LCEL is and how to use it effectively.\n",
361 |     "    - Representative question: \"What is LangChain Expression Language (LCEL)?\"\n",
362 |     "```"
363 |    ]
364 |   },
365 |   {
366 |    "cell_type": "markdown",
367 |    "id": "2ec53ff1-40fc-4986-b799-bf1bdd90dbaa",
368 |    "metadata": {},
369 |    "source": [
370 |     "* https://smith.langchain.com/public/3346bc94-146e-451a-9af4-7b7fe07d9a84/r\n",
371 |     "\n",
372 |     "```\n",
373 |     "1. **LCEL Chain Construction**: How to build and structure chains using LCEL components.\n",
374 |     "   - Representative question: \"Give an example of an LCEL chain with LLMSingleActionAgent and AgentExecutor.\"\n",
375 |     "\n",
376 |     "2. **Output Parsing and Formatting**: How to parse and format the output from LCEL chains.\n",
377 |     "   - Representative question: \"What LangChain tool can I use to parse this output into a single message?\"\n",
378 |     "\n",
379 |     "3. **Component Ordering and Interaction**: Understanding the order and interaction between components in an LCEL chain.\n",
380 |     "   - Representative question: \"When using LCEL, is the order of the chained components arbitrary?\"\n",
381 |     "\n",
382 |     "4. **Custom Agents and Tools Integration**: How to integrate custom agents and tools within an LCEL chain.\n",
383 |     "   - Representative question: \"I would like to use my own custom agent in an LCEL chain. How do I build this chain?\"\n",
384 |     "\n",
385 |     "5. **Conditional Logic and Prompts**: Implementing conditional logic and handling prompts in LCEL.\n",
386 |     "   - Representative question: \"How to conditionally choose between prompts in LCEL.\"\n",
387 |     "\n",
388 |     "6. **Memory and Conversation History**: Utilizing memory and conversation history within LCEL chains.\n",
389 |     "   - Representative question: \"Conversation chain with memory using LCEL.\"\n",
390 |     "\n",
391 |     "7. **Runnable and Agent Configuration**: Configuring and using Runnables and agents in LCEL.\n",
392 |     "   - Representative question: \"How do I configure ReAct agent 'Thought' with custom OutputParser and Custom Agent, using LCEL?\"\n",
393 |     "\n",
394 |     "8. **LCEL Syntax and Expressions**: Understanding and using the syntax and expressions specific to LCEL.\n",
395 |     "   - Representative question: \"Can I create an LCEL chain with prompt templates having no input variables?\"\n",
396 |     "\n",
397 |     "9. **LCEL with Specific Models and Tools**: Using LCEL with specific models like GPT-4 and tools like vectorstore retrievers.\n",
398 |     "   - Representative question: \"Can you show me an example of an agent using gpt4 with a web search tool and memory? All using LCEL.\"\n",
399 |     "\n",
400 |     "10. **LCEL in Different Environments and Applications**: Applying LCEL in various environments and for different types of applications.\n",
401 |     "    - Representative question: \"Provide LCEL code for a simple chat app using Azure OpenAI.\"\n",
402 |     "```"
403 |    ]
404 |   },
405 |   {
406 |    "cell_type": "markdown",
407 |    "id": "7d214d20-e3f6-4c81-a32b-c30f4f15b8b8",
408 |    "metadata": {},
409 |    "source": [
410 |     "* https://smith.langchain.com/public/47648f18-b543-477c-91cf-84d612eb6810/r\n",
411 |     "\n",
412 |     "```\n",
413 |     "1. **Error Handling in LCEL Chains**\n",
414 |     "   - Representative Question: \"This code gives me this error TypeError: Expected a Runnable, callable or dict. Instead got an unsupported type: <class 'str'>\"\n",
415 |     "\n",
416 |     "2. **Integration of LangChain with AI Models**\n",
417 |     "   - Representative Question: \"Create the LCEL chain using ChatOpenAI with a specific model and temperature settings.\"\n",
418 |     "\n",
419 |     "3. **PDF Processing with PyMuPDF**\n",
420 |     "   - Representative Question: \"Convert a PDF page to a pixmap using the PyMuPDF library.\"\n",
421 |     "\n",
422 |     "4. **Base64 Encoding of Images**\n",
423 |     "   - Representative Question: \"Encode a pixmap to a base64 string for image processing.\"\n",
424 |     "\n",
425 |     "5. **Template Formatting and Data Injection**\n",
426 |     "   - Representative Question: \"Define the prompt templates and format them with dynamic data for the AI model.\"\n",
427 |     "\n",
428 |     "6. **AI-Assisted Data Interpretation**\n",
429 |     "   - Representative Question: \"Use the AI model to assist in marking images using a provided mark scheme.\"\n",
430 |     "\n",
431 |     "7. **File I/O Operations**\n",
432 |     "   - Representative Question: \"Write the results of the LCEL chain to a file.\"\n",
433 |     "\n",
434 |     "8. **Debugging Lambda Functions in LCEL**\n",
435 |     "   - Representative Question: \"Change the RunnableLambda to RunnablePassthrough from the start of the template.\"\n",
436 |     "\n",
437 |     "9. **Understanding LCEL Chain Outputs**\n",
438 |     "   - Representative Question: \"What will be the type of marking_output and how to make it a string?\"\n",
439 |     "\n",
440 |     "10. **Correct Usage of LCEL Components**\n",
441 |     "    - Representative Question: \"The output of the LCEL chain isn't a string; find a way to fix it.\"\n",
442 |     "```"
443 |    ]
444 |   },
445 |   {
446 |    "cell_type": "markdown",
447 |    "id": "fab392a3-9455-4207-8dc9-c996bddee834",
448 |    "metadata": {},
449 |    "source": [
450 |     "We do some manual curation, and put our final question set into `eval/eval.csv`."
451 |    ]
452 |   }
453 |  ],
454 |  "metadata": {
455 |   "kernelspec": {
456 |    "display_name": "Python 3 (ipykernel)",
457 |    "language": "python",
458 |    "name": "python3"
459 |   },
460 |   "language_info": {
461 |    "codemirror_mode": {
462 |     "name": "ipython",
463 |     "version": 3
464 |    },
465 |    "file_extension": ".py",
466 |    "mimetype": "text/x-python",
467 |    "name": "python",
468 |    "nbconvert_exporter": "python",
469 |    "pygments_lexer": "ipython3",
470 |    "version": "3.11.4"
471 |   }
472 |  },
473 |  "nbformat": 4,
474 |  "nbformat_minor": 5
475 | }
476 | 


--------------------------------------------------------------------------------
/eval/create_dataset.py:
--------------------------------------------------------------------------------
 1 | import langsmith
 2 | 
 3 | if __name__ == "__main__":
 4 |     client = langsmith.Client()
 5 |     client.upload_csv(
 6 |         csv_file="eval/eval.csv",
 7 |         input_keys=["question"],
 8 |         output_keys=["answer"],
 9 |         name="lcel-teacher-eval",
10 |     )
11 | 


--------------------------------------------------------------------------------
/eval/eval.csv:
--------------------------------------------------------------------------------
 1 | question,answer
 2 | "How can I use a prompt and model to create a chain in LCEL that returns raw ChatMessages?","Here is an example of a prompt and LLM chain using LCEL without any output parsing to return ChatMessages: \nfrom langchain.chat_models import ChatOpenAI\nfrom langchain.prompts import ChatPromptTemplate\n\nprompt = ChatPromptTemplate.from_template(\'tell me a joke about {foo}\')\nmodel = ChatOpenAI()\nchain = prompt | model\n\nchain.invoke({\'foo\': \'bears\'})"
 3 | "How can I add memory to an arbitrary chain using LCEL?","Here is an example adding memory to a chain: \nfrom operator import itemgetter\n\nfrom langchain.chat_models import ChatOpenAI\nfrom langchain.memory import ConversationBufferMemory\nfrom langchain.prompts import ChatPromptTemplate, MessagesPlaceholder\nfrom langchain_core.runnables import RunnableLambda, RunnablePassthrough\n\nmodel = ChatOpenAI()\nprompt = ChatPromptTemplate.from_messages(\n[\n(\'system\',\'You are a helpful chatbot\'),\n MessagesPlaceholder(variable_name=\'history\'),\n('human', '{input}'),\n    ]\n)\n\nmemory = ConversationBufferMemory(return_messages=True)\n\nchain = (\n    RunnablePassthrough.assign(\n        history=RunnableLambda(memory.load_memory_variables) | itemgetter('history')\n    )\n    | prompt\n    | model\n)\n\ninputs = {'input': 'hi im bob'}\nresponse = chain.invoke(inputs)"
 4 | "I've defined a LCEL runnable chain = prompt | model. How can I look at the input schema?","All runnables expose input and output schemas to inspect the inputs and outputs. input_schema is an input Pydantic model auto-generated from the structure of the Runnable. You can call .schema() on it to obtain a JSONSchema representation of any runnable: # The input schema of the chain is the input schema of its first part, the prompt. chain.input_schema.schema()"
 5 | "I have a LCEL runnable, chain, and am passing in a map w/ {'question' 'where did harrison work', 'language': 'italian'}. How can I extract the value of 'language' to pass to my prompt?","For this example, we can use itemgetter to extract specific values from the map. Here is an example: from operator import itemgetter from langchain.chat_models import ChatOpenAI from langchain.embeddings import OpenAIEmbeddings from langchain.prompts import ChatPromptTemplate from langchain.vectorstores import FAISS from langchain_core.output_parsers import StrOutputParser from langchain_core.runnables import RunnablePassthrough vectorstore = FAISS.from_texts(['harrison worked at kensho'], embedding=OpenAIEmbeddings()) retriever = vectorstore.as_retriever() template = 'Answer the question based only on the following context: {context} Question: {question} Answer in the following language: {language}' prompt = ChatPromptTemplate.from_template(template) chain = ( { 'context': itemgetter('question') | retriever, 'question': itemgetter('question'), 'language': itemgetter('language'), } | prompt | model | StrOutputParser() ) chain.invoke({'question': 'where did harrison work', 'language': 'italian'})"
 6 | "I am passing text key 'foo' to my prompt and want to process it with a function, process_text(...), prior to the prompt. How can I do this using LCEL?","You can use a RunnableLambda to apply a function to the value of foo: chain = ( { 'a': itemgetter('foo') | RunnableLambda(process_text), | RunnableLambda(multiple_length_function), } | prompt | model )"
 7 | "My LCEL map contains the key 'question'. What is the difference between using itemgetter('question'), lambda x: x['question'], and x.get('question')?","Itemgetter can be used as shorthand to extract specific keys from the map. In the context of a map operation, the lambda function is applied to each element in the input map and the function returns the value associated with the key 'question'. (get) is safer for accessing values in a dictionary because it handles the case where the key might not exist."
 8 | "I'm invoking a LCEL chain with a map that contain {'question': 'how do I use Anthropic?'}. The full chain definition is full_chain = {'question': lambda x: x['question']} | sub_chain. Why is a lambda used?","The lambda function is an anonymous function that takes one argument, x, and returns x['question']. In the context of a map operation, this function is applied to each element in the input iterable. If the input is a dictionary (map), as in this case, x would be this map, and the function returns the value associated with the key 'question'."
 9 | "I’m passing {'a':1}  and want to create an output map of {'a':1,'b':2, 'c':3}. How can I do this in LCEL?","Use RunnablePassthrough: Use RunnableParallel with lambdas: \n from langchain_core.runnables import RunnableParallel, RunnablePassthrough; runnable = RunnableParallel(a=lambda x: x['a'], b=lambda x: x['a']+1, c=lambda x: x['a']+2); runnable.invoke({'num': 1}). Also you can use RunnablePassthrough: from langchain_core.runnables import RunnablePassthrough; original_input = {'a': 1}; chain = RunnablePassthrough.assign(b=lambda x: 2, c=lambda x: 3); output = chain.invoke(original_input); print(output)"
10 | "How can I make the output of my LCEL chain a string?","Use StrOutputParser. from langchain_openai import ChatOpenAI; from langchain_core.prompts import ChatPromptTemplate; from langchain_core.output_parsers import StrOutputParser; prompt = ChatPromptTemplate.from_template('Tell me a short joke about {topic}'); model = ChatOpenAI(model='gpt-3.5-turbo') #gpt-4 or other LLMs can be used here; output_parser = StrOutputParser(); chain = prompt | model | output_parser"
11 | "How can I apply a custom function to one of the inputs of an LCEL chain?","Use RunnableLambda with itemgetter to extract the relevant key. from operator import itemgetter; from langchain_core.prompts import ChatPromptTemplate; from langchain_core.runnables import RunnableLambda; from langchain_openai import ChatOpenAI; def length_function(text): return len(text); chain = ({'prompt_input': itemgetter('foo') | RunnableLambda(length_function),} | prompt | model); chain.invoke({'foo':'hello world'})"
12 | "With a RAG chain in LCEL, why are documents retrieved automatically when we construct the prompt like {'context': retriever, 'question': RunnablePassthrough()} and invoke it using chain.invoke('where did harrison work?')","When we create the chain, get_relevant_documents is invoked automatically. vectorstore = FAISS.from_texts(['harrison worked at kensho'], embedding=OpenAIEmbeddings()); retriever = vectorstore.as_retriever(); chain = ({'context': retriever, 'question': RunnablePassthrough()} | prompt | model | StrOutputParser()); chain.invoke('where did harrison work?')"
13 | "I am passing a map with {'num': 1} to a LCEL chain. How can I add an extra key num2 to this map that adds 1 to the value of num. Then I want to assign this new map to a new key named output?","We can use RunnablePassthrough with a lambda function. from langchain_core.runnables import RunnableParallel, RunnablePassthrough; runnable = RunnableParallel(output=RunnablePassthrough.assign(num2=lambda x: x['num'] + 1),); runnable.invoke({'num': 1})"
14 | "How can I configure the temperature of an LLM when invoking the LCEL chain?","Use configuration fields. from langchain.prompts import PromptTemplate; from langchain_core.runnables import ConfigurableField; from langchain_openai import ChatOpenAI; model = ChatOpenAI(temperature=0).configurable_fields(temperature=ConfigurableField(id='llm_temperature', name='LLM Temperature', description='The temperature of the LLM', )); model.with_config(configurable={'llm_temperature': 0.9}).invoke('pick a random number')"
15 | "How can we apply a function call to an LLM in an LCEL chain?","We can attach a function call to the model using bind: functions = [{'name': 'joke', 'description': 'A joke', 'parameters': {'type': 'object', 'properties': {'setup': {'type': 'string', 'description': 'The setup for the joke'}, 'punchline': {'type': 'string', 'description': 'The punchline for the joke'}}, 'required': ['setup', 'punchline']}]; chain = prompt | model.bind(function_call={'name': 'joke'}, functions=functions)"
16 | "How can I run two LCEL chains in parallel and write their output to a map?","We can use RunnableParallel: from langchain_core.prompts import ChatPromptTemplate; from langchain_core.runnables import RunnableParallel; from langchain_openai import ChatOpenAI; model = ChatOpenAI(); joke_chain = ChatPromptTemplate.from_template('tell me a joke about {topic}') | model; poem_chain = (ChatPromptTemplate.from_template('write a 2-line poem about {topic}') | model); map_chain = RunnableParallel(joke=joke_chain, poem=poem_chain); map_chain.invoke({'topic': 'bear'})"
17 | "How can I directly pass a string to a runnable and use it to construct the input needed for my prompt?","Use RunnablePassthrough. from langchain_core.runnables import RunnableParallel, RunnablePassthrough; from langchain_core.prompts import ChatPromptTemplate; from langchain_openai import ChatOpenAI; prompt = ChatPromptTemplate.from_template('Tell a joke about: {input}'); model = ChatOpenAI(); runnable = ({'input' : RunnablePassthrough()} | prompt | model); runnable.invoke('flowers')"
18 | "How can I use a custom function to route between 2 chains in LCEL?","Use a RunnableLambda with custom routing logic. from langchain.prompts import PromptTemplate; from langchain_community.chat_models import ChatAnthropic; from langchain_core.output_parsers import StrOutputParser; chain = (PromptTemplate.from_template('Given the user question below, classify it as either being about `LangChain`, `Anthropic`, or `Other`. Do not respond with more than one word. <question> {question} </question> Classification:') | ChatAnthropic() | StrOutputParser()); def route(info): if 'anthropic' in info['topic'].lower(): return anthropic_chain; elif 'langchain' in info['topic'].lower(): return langchain_chain; langchain_chain = (PromptTemplate.from_template('You are an expert in langchain. Always answer questions starting with 'As Harrison Chase told me'. Respond to the following question: Question: {question} Answer:') | ChatAnthropic()); anthropic_chain = (PromptTemplate.from_template('You are an expert in anthropic. Always answer questions starting with 'As Dario Amodei told me'. Respond to the following question: Question: {question} Answer:') | ChatAnthropic()); from langchain_core.runnables import RunnableLambda; full_chain = {'topic': chain, 'question': lambda x: x['question']} | RunnableLambda(route)"
19 | "How do I set up a retrieval-augmented generation chain using LCEL that accepts a string as input?","Use RunnablePassthrough to pass the input to the retriever and build a prompt to pass to the LLM: ! pip install langchain langchain-openai faiss-cpu; from operator import itemgetter; from langchain_community.vectorstores import FAISS; from langchain_core.output_parsers import StrOutputParser; from langchain_core.prompts import ChatPromptTemplate; from langchain_core.runnables import RunnablePassthrough; from langchain_openai import ChatOpenAI, OpenAIEmbeddings; vectorstore = FAISS.from_texts(['harrison worked at kensho'], embedding=OpenAIEmbeddings()); retriever = vectorstore.as_retriever(); template = 'Answer the question based only on the following context:{context}Question: {question}'; prompt = ChatPromptTemplate.from_template(template); model = ChatOpenAI(); chain = ({'context': retriever, 'question': RunnablePassthrough()} | prompt | model | StrOutputParser()); response = chain.invoke('where did harrison work?'); print(response)"
20 | "How can I create a LCEL chain that queries a SQL database?","Follow these steps to create an LCEL chain that can query a SQL DB: from langchain_core.prompts import ChatPromptTemplate; template = 'Based on the table schema below, write a SQL query that would answer the user's question: {schema} Question: {question} SQL Query:'; prompt = ChatPromptTemplate.from_template(template); from langchain_community.utilities import SQLDatabase; db = SQLDatabase.from_uri('sqlite:///./Chinook.db'); def get_schema(_): return db.get_table_info(); def run_query(query): return db.run(query); from langchain_core.output_parsers import StrOutputParser; from langchain_core.runnables import RunnablePassthrough; from langchain_openai import ChatOpenAI; model = ChatOpenAI(); sql_response = (RunnablePassthrough.assign(schema=get_schema) | prompt | model.bind(stop=['\nSQLResult:']) | StrOutputParser()); template = 'Based on the table schema below, question, sql query, and sql response, write a natural language response: {schema} Question: {question} SQL Query: {query} SQL Response: {response}'; prompt_response = ChatPromptTemplate.from_template(template); full_chain = (RunnablePassthrough.assign(query=sql_response).assign(schema=get_schema, response=lambda x: db.run(x['query']), ) | prompt_response | model); full_chain.invoke({'question': 'How many employees are there?'})"
21 | "How to structure output of an LCEL chain as a Pydantic object with prefix and code_block?","We can use PydanticOutputParser. from langchain.output_parsers import PydanticOutputParser; from langchain.prompts import PromptTemplate; from langchain.pydantic_v1 import BaseModel, Field; class FunctionOutput(BaseModel): prefix: str = Field(description='The prefix of the output'); code_block: str = Field(description='The code block of the output'); parser = PydanticOutputParser(pydantic_object=FunctionOutput); format_instructions = parser.get_format_instructions(); prompt = PromptTemplate(template='Output format instructions:\n{format_instructions}\n\nQuery: {query}\n', input_variables=['query'], partial_variables={'format_instructions': format_instructions}); model = ChatOpenAI(model_name='gpt-4'); prompt_and_model = prompt | model; output = prompt_and_model.invoke({'query': 'Give me a function that will add two numbers:'}); parsed_output = parser.invoke(output); prefix = parsed_output.prefix; code_block = parsed_output.code_block"


--------------------------------------------------------------------------------
/eval/test_chain.py:
--------------------------------------------------------------------------------
 1 | import langsmith
 2 | import langsmith.env
 3 | from app.context_stuffing_chain import chain as context_stuffing
 4 | from langchain.smith import RunEvalConfig
 5 | import uuid
 6 | 
 7 | 
 8 | if __name__ == "__main__":
 9 |     client = langsmith.Client()
10 |     git_info = langsmith.env.get_git_info()
11 |     branch, commit = git_info["branch"], git_info["commit"]
12 |     project_name = f"lcel-teacher-{branch}-{commit[:4]}-{uuid.uuid4().hex[:4]}"
13 |     eval_config = RunEvalConfig(
14 |         evaluators=["qa"],
15 |     )
16 |     test_results = client.run_on_dataset(
17 |         dataset_name="lcel-teacher-eval",
18 |         llm_or_chain_factory=lambda: (lambda x: x["question"]) | context_stuffing,
19 |         project_name=project_name,
20 |         evaluation=eval_config,
21 |         verbose=True,
22 |         project_metadata={"context": "regression-tests"},
23 |     )
24 |     test_results.get_aggregate_feedback()
25 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "code-langchain-app"
 3 | version = "0.1.0"
 4 | description = ""
 5 | authors = ["Lance Martin <lance@langchain.dev>"]
 6 | readme = "README.md"
 7 | 
 8 | [tool.poetry.dependencies]
 9 | python = "^3.11"
10 | uvicorn = "^0.23.2"
11 | langserve = {extras = ["server"], version = ">=0.0.22"}
12 | weaviate-client = "^3.26.0"
13 | langchainhub = "^0.1.14"
14 | jupyter = "^1.0.0"
15 | openai = "^1.6.0"
16 | langchain = "~0.1.0"
17 | pandas = "^2.1.4"
18 | tiktoken = "^0.5.2"
19 | scikit-learn = "^1.4.0"
20 | langchain-openai = "^0.0.4"
21 | 
22 | [tool.poetry.group.dev.dependencies]
23 | langchain-cli = ">=0.0.15"
24 | pandas = "^2.1.4"
25 | 
26 | [build-system]
27 | requires = ["poetry-core"]
28 | build-backend = "poetry.core.masonry.api"


--------------------------------------------------------------------------------