├── .env.example ├── .gitignore ├── AWS └── Bedrock │ ├── llm.py │ ├── poetry.lock │ ├── pyproject.toml │ └── rag.py ├── Analysis-and-Comparison-between-Optimism-and-StarkNet.pdf ├── AutoGPT ├── .env.example ├── AutoGPT.py ├── README.md └── requirements.txt ├── AutoGPT_with_LangChain_Primitives.ipynb ├── How_OpenAI_Count_Tokens.ipynb ├── LangChain_AI_Image_Recognition.ipynb ├── LangChain_APIChain.ipynb ├── LangChain_Caching.ipynb ├── LangChain_ChatGithub.ipynb ├── LangChain_ChatOpenAI_OpenAI_Diff.ipynb ├── LangChain_Extraction.ipynb ├── LangChain_Gmail.ipynb ├── LangChain_Google_Gemini_API.ipynb ├── LangChain_LLM_Math.ipynb ├── LangChain_OpenAI_Function_Calling.ipynb ├── LangChain_Output_Parsing.ipynb ├── LangChain_PDF_Chatbot.ipynb ├── LangChain_ParentDocumentRetriever.ipynb ├── LangChain_Pinecone_Serverless.ipynb ├── LangChain_SQLDatabaseChain_Vulnerability.ipynb ├── LangChain_Spark_AI.ipynb ├── LangChain_TextSplitter.ipynb ├── Langchain_HuggingFacePipeline.ipynb ├── Langchain_Memory_Persistent_Store.ipynb ├── OpenAI_Chat_Completions_16k.ipynb ├── README.md ├── StreamChat ├── app.py ├── requirements.txt ├── server.py └── statics │ └── index.html ├── coffee-roll.jpg ├── expression-language ├── LangChain_Expression_03_Router.ipynb ├── LangChain_Expression_Language.ipynb └── LangChain_Expression_Language_Runnable.ipynb ├── langchain_0_2_chat_model_one_line.ipynb ├── langchain_anthropic_contextual_retrieval.ipynb ├── langchain_firecrawl.ipynb ├── langchain_nomic_embedding.ipynb ├── langchain_openai_gpt4o.ipynb ├── langchain_supabase_rag.ipynb └── langgraph_nodes_edges.ipynb /.env.example: -------------------------------------------------------------------------------- 1 | OPENAI_API_KEY=1234567890 2 | SERPAPI_API_KEY=0987654321 3 | HUGGINGFACEHUB_API_TOKEN=9999999999 4 | 5 | GOOGLE_API_KEY=1234567890 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .env 2 | */**/.env 3 | */**/venv 4 | */**/.venv -------------------------------------------------------------------------------- /AWS/Bedrock/llm.py: -------------------------------------------------------------------------------- 1 | from langchain.llms import Bedrock 2 | from langchain.chains import ConversationChain 3 | from langchain.memory import ConversationBufferMemory 4 | from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler 5 | 6 | llm = Bedrock( 7 | credentials_profile_name="william", 8 | model_id="amazon.titan-text-express-v1", 9 | streaming=True, 10 | callbacks=[StreamingStdOutCallbackHandler()], 11 | ) 12 | conversation = ConversationChain( 13 | llm=llm, verbose=True, memory=ConversationBufferMemory() 14 | ) 15 | response = conversation.predict(input="How to set up a new profile for boto3?") -------------------------------------------------------------------------------- /AWS/Bedrock/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "aws-bedrock-tutorial" 3 | version = "0.1.0" 4 | description = "" 5 | authors = ["sugarforever "] 6 | readme = "README.md" 7 | 8 | [tool.poetry.dependencies] 9 | python = "^3.11" 10 | boto3 = "^1.34.10" 11 | langchain = "^0.0.353" 12 | 13 | 14 | [build-system] 15 | requires = ["poetry-core"] 16 | build-backend = "poetry.core.masonry.api" 17 | -------------------------------------------------------------------------------- /AWS/Bedrock/rag.py: -------------------------------------------------------------------------------- 1 | from langchain.retrievers import AmazonKnowledgeBasesRetriever 2 | from langchain.chains import RetrievalQA 3 | from langchain.llms import Bedrock 4 | 5 | retriever = AmazonKnowledgeBasesRetriever( 6 | credentials_profile_name="william", 7 | knowledge_base_id="GFQSZ3PZJV", 8 | retrieval_config={"vectorSearchConfiguration": {"numberOfResults": 4}}, 9 | ) 10 | 11 | question = "Introduce the training hardware of llama2" 12 | docs = retriever.get_relevant_documents(query=question) 13 | print(docs) 14 | 15 | print("\n******************************\n") 16 | 17 | llm = Bedrock( 18 | credentials_profile_name="william", 19 | model_id="amazon.titan-text-express-v1" 20 | ) 21 | 22 | qa = RetrievalQA.from_chain_type( 23 | llm=llm, retriever=retriever, return_source_documents=True 24 | ) 25 | 26 | response = qa(question) 27 | print(response) 28 | -------------------------------------------------------------------------------- /Analysis-and-Comparison-between-Optimism-and-StarkNet.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sugarforever/LangChain-Tutorials/32f0abc3cebca46e583bc70fe2737ea367151e11/Analysis-and-Comparison-between-Optimism-and-StarkNet.pdf -------------------------------------------------------------------------------- /AutoGPT/.env.example: -------------------------------------------------------------------------------- 1 | OPENAI_API_KEY=1234567890 -------------------------------------------------------------------------------- /AutoGPT/AutoGPT.py: -------------------------------------------------------------------------------- 1 | from dotenv import load_dotenv 2 | from langchain.llms import OpenAI 3 | from langchain.prompts import PromptTemplate 4 | from langchain.chains import LLMChain, SequentialChain 5 | import streamlit as st 6 | 7 | # Load env vars 8 | load_dotenv() 9 | 10 | st.title('AutoGPT Wizard') 11 | prompt = st.text_input('Tell me a topic you want to learn its programming language:') 12 | 13 | # Prompt templates 14 | language_template = PromptTemplate( 15 | input_variables = ['topic'], 16 | template='Suggest me a programming language for {topic} and respond in a code block with the language name only' 17 | ) 18 | 19 | book_recommendation_template = PromptTemplate( 20 | input_variables = ['programming_language'], 21 | template='''Recommend me a book based on this programming language {programming_language} 22 | 23 | The book name should be in a code block and the book name should be the only text in the code block 24 | ''' 25 | ) 26 | 27 | llm = OpenAI(temperature=0.9, model_name="gpt-3.5-turbo") 28 | language_chain = LLMChain(llm=llm, prompt=language_template, verbose=True, output_key='programming_language') 29 | book_recommendation_chain = LLMChain(llm=llm, prompt=book_recommendation_template, verbose=True, output_key='book_name') 30 | 31 | sequential_chain = SequentialChain( 32 | chains = [language_chain, book_recommendation_chain], 33 | input_variables=['topic'], 34 | output_variables=['programming_language', 'book_name'], 35 | verbose=True) 36 | 37 | if prompt: 38 | reply = sequential_chain({'topic': prompt}) 39 | 40 | with st.expander("Result"): 41 | st.info(reply) 42 | 43 | with st.expander("Programming Language"): 44 | st.info(reply['programming_language']) 45 | 46 | with st.expander("Recommended Book"): 47 | st.info(reply['book_name']) 48 | -------------------------------------------------------------------------------- /AutoGPT/README.md: -------------------------------------------------------------------------------- 1 | # Example AutoGPT 2 | 3 | This is an example of how to use LangChain and OpenAI to develop a AutoGPT app, aka AI Agent. 4 | 5 | ## Get started 6 | 7 | 1. python -m venv venv 8 | 2. source venv/bin/activate 9 | 3. pip install -r requirements.txt 10 | 4. streamlit run AutoGPT.py 11 | 5. open http://localhost:8501/ in your browser -------------------------------------------------------------------------------- /AutoGPT/requirements.txt: -------------------------------------------------------------------------------- 1 | aiohttp==3.8.4 2 | aiosignal==1.3.1 3 | altair==4.2.2 4 | anyio==3.6.2 5 | async-timeout==4.0.2 6 | attrs==23.1.0 7 | backoff==2.2.1 8 | beautifulsoup4==4.12.2 9 | blinker==1.6.2 10 | cachetools==5.3.0 11 | certifi==2022.12.7 12 | charset-normalizer==3.1.0 13 | chromadb==0.3.21 14 | click==8.1.3 15 | clickhouse-connect==0.5.20 16 | dataclasses-json==0.5.7 17 | decorator==5.1.1 18 | duckdb==0.7.1 19 | entrypoints==0.4 20 | fastapi==0.95.1 21 | filelock==3.12.0 22 | frozenlist==1.3.3 23 | fsspec==2023.4.0 24 | gitdb==4.0.10 25 | GitPython==3.1.31 26 | greenlet==2.0.2 27 | h11==0.14.0 28 | hnswlib==0.7.0 29 | httptools==0.5.0 30 | huggingface-hub==0.14.1 31 | idna==3.4 32 | importlib-metadata==6.6.0 33 | Jinja2==3.1.2 34 | joblib==1.2.0 35 | jsonschema==4.17.3 36 | langchain==0.0.149 37 | lz4==4.3.2 38 | markdown-it-py==2.2.0 39 | MarkupSafe==2.1.2 40 | marshmallow==3.19.0 41 | marshmallow-enum==1.5.1 42 | mdurl==0.1.2 43 | monotonic==1.6 44 | mpmath==1.3.0 45 | multidict==6.0.4 46 | mypy-extensions==1.0.0 47 | networkx==3.1 48 | nltk==3.8.1 49 | numexpr==2.8.4 50 | numpy==1.24.3 51 | openai==0.27.4 52 | openapi-schema-pydantic==1.2.4 53 | packaging==23.1 54 | pandas==1.5.3 55 | Pillow==9.5.0 56 | posthog==3.0.1 57 | protobuf==3.20.3 58 | pyarrow==11.0.0 59 | pydantic==1.10.7 60 | pydeck==0.8.1b0 61 | Pygments==2.15.1 62 | Pympler==1.0.1 63 | pyrsistent==0.19.3 64 | python-dateutil==2.8.2 65 | python-dotenv==1.0.0 66 | pytz==2023.3 67 | pytz-deprecation-shim==0.1.0.post0 68 | PyYAML==6.0 69 | regex==2023.3.23 70 | requests==2.28.2 71 | rich==13.3.4 72 | scikit-learn==1.2.2 73 | scipy==1.10.1 74 | sentence-transformers==2.2.2 75 | sentencepiece==0.1.98 76 | six==1.16.0 77 | smmap==5.0.0 78 | sniffio==1.3.0 79 | soupsieve==2.4.1 80 | SQLAlchemy==2.0.10 81 | starlette==0.26.1 82 | streamlit==1.21.0 83 | sympy==1.11.1 84 | tenacity==8.2.2 85 | threadpoolctl==3.1.0 86 | tiktoken==0.3.3 87 | tokenizers==0.13.3 88 | toml==0.10.2 89 | toolz==0.12.0 90 | torch==2.0.0 91 | torchvision==0.15.1 92 | tornado==6.3.1 93 | tqdm==4.65.0 94 | transformers==4.28.1 95 | typing-inspect==0.8.0 96 | typing_extensions==4.5.0 97 | tzdata==2023.3 98 | tzlocal==4.3 99 | urllib3==1.26.15 100 | uvicorn==0.21.1 101 | uvloop==0.17.0 102 | validators==0.20.0 103 | watchfiles==0.19.0 104 | websockets==11.0.2 105 | wikipedia==1.4.0 106 | yarl==1.9.2 107 | zipp==3.15.0 108 | zstandard==0.21.0 109 | -------------------------------------------------------------------------------- /How_OpenAI_Count_Tokens.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": {}, 7 | "source": [ 8 | "OpenAI使用`tiktoken`来拆分文本为token。该notebook介绍OpenAI是如何计数token的。" 9 | ] 10 | }, 11 | { 12 | "attachments": {}, 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "编码方法决定了不同的文本拆分Token的方式。OpenAI使用如下3个`tiktoken`支持的编码方法于不同的模型中:\n", 17 | "\n", 18 | "1. cl100k_base: gpt-4, gpt-3.5-turbo, text-embedding-ada-002\n", 19 | "2. p50k_base: text-davinci-002, text-davinci-003\n", 20 | "3. r50k_base 或 gpt2: GPT-3模型,如davinci" 21 | ] 22 | }, 23 | { 24 | "attachments": {}, 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "1. 安装`tiktoken`" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "%pip install --upgrade tiktoken > /dev/null" 38 | ] 39 | }, 40 | { 41 | "attachments": {}, 42 | "cell_type": "markdown", 43 | "metadata": {}, 44 | "source": [ 45 | "2. 编码" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 17, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "import tiktoken\n", 55 | "\n", 56 | "encoding = tiktoken.get_encoding(\"p50k_base\")\n", 57 | "encoding_for_model = tiktoken.encoding_for_model(\"gpt-4\")" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 18, 63 | "metadata": {}, 64 | "outputs": [ 65 | { 66 | "name": "stdout", 67 | "output_type": "stream", 68 | "text": [ 69 | "[19526, 254, 25001, 121, 171, 120, 234, 17312, 233, 20998, 233]\n", 70 | "[57668, 53901, 3922, 4916, 233, 98915]\n" 71 | ] 72 | } 73 | ], 74 | "source": [ 75 | "text_chinese = '你好,朋友'\n", 76 | "\n", 77 | "print(encoding.encode(text_chinese))\n", 78 | "print(encoding_for_model.encode(text_chinese))" 79 | ] 80 | }, 81 | { 82 | "attachments": {}, 83 | "cell_type": "markdown", 84 | "metadata": {}, 85 | "source": [ 86 | "3. 解码" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": 19, 92 | "metadata": {}, 93 | "outputs": [ 94 | { 95 | "name": "stdout", 96 | "output_type": "stream", 97 | "text": [ 98 | "你好,朋友\n", 99 | "你好,朋友\n" 100 | ] 101 | } 102 | ], 103 | "source": [ 104 | "print(encoding.decode([19526, 254, 25001, 121, 171, 120, 234, 17312, 233, 20998, 233]))\n", 105 | "print(encoding_for_model.decode([57668, 53901, 3922, 4916, 233, 98915]))" 106 | ] 107 | }, 108 | { 109 | "attachments": {}, 110 | "cell_type": "markdown", 111 | "metadata": {}, 112 | "source": [ 113 | "4. OpenAI的Chat API的Token计数方式,参考官方文档[链接](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb)" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 20, 119 | "metadata": {}, 120 | "outputs": [], 121 | "source": [ 122 | "def num_tokens_from_messages(messages, model=\"gpt-3.5-turbo-0301\"):\n", 123 | " \"\"\"Returns the number of tokens used by a list of messages.\"\"\"\n", 124 | " try:\n", 125 | " encoding = tiktoken.encoding_for_model(model)\n", 126 | " except KeyError:\n", 127 | " print(\"Warning: model not found. Using cl100k_base encoding.\")\n", 128 | " encoding = tiktoken.get_encoding(\"cl100k_base\")\n", 129 | " if model == \"gpt-3.5-turbo\":\n", 130 | " print(\"Warning: gpt-3.5-turbo may change over time. Returning num tokens assuming gpt-3.5-turbo-0301.\")\n", 131 | " return num_tokens_from_messages(messages, model=\"gpt-3.5-turbo-0301\")\n", 132 | " elif model == \"gpt-4\":\n", 133 | " print(\"Warning: gpt-4 may change over time. Returning num tokens assuming gpt-4-0314.\")\n", 134 | " return num_tokens_from_messages(messages, model=\"gpt-4-0314\")\n", 135 | " elif model == \"gpt-3.5-turbo-0301\":\n", 136 | " tokens_per_message = 4 # every message follows <|start|>{role/name}\\n{content}<|end|>\\n\n", 137 | " tokens_per_name = -1 # if there's a name, the role is omitted\n", 138 | " elif model == \"gpt-4-0314\":\n", 139 | " tokens_per_message = 3\n", 140 | " tokens_per_name = 1\n", 141 | " else:\n", 142 | " raise NotImplementedError(f\"\"\"num_tokens_from_messages() is not implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.\"\"\")\n", 143 | " num_tokens = 0\n", 144 | " for message in messages:\n", 145 | " num_tokens += tokens_per_message\n", 146 | " for key, value in message.items():\n", 147 | " num_tokens += len(encoding.encode(value))\n", 148 | " if key == \"name\":\n", 149 | " num_tokens += tokens_per_name\n", 150 | " num_tokens += 3 # every reply is primed with <|start|>assistant<|message|>\n", 151 | " return num_tokens" 152 | ] 153 | }, 154 | { 155 | "attachments": {}, 156 | "cell_type": "markdown", 157 | "metadata": {}, 158 | "source": [ 159 | "5. 示例代码" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": 22, 165 | "metadata": {}, 166 | "outputs": [ 167 | { 168 | "name": "stdout", 169 | "output_type": "stream", 170 | "text": [ 171 | "gpt-3.5-turbo-0301\n", 172 | "67 prompt tokens counted by num_tokens_from_messages().\n", 173 | "67 prompt tokens counted by the OpenAI API.\n", 174 | "\n", 175 | "gpt-4-0314\n", 176 | "67 prompt tokens counted by num_tokens_from_messages().\n", 177 | "67 prompt tokens counted by the OpenAI API.\n", 178 | "\n" 179 | ] 180 | } 181 | ], 182 | "source": [ 183 | "import openai\n", 184 | "\n", 185 | "example_messages = [\n", 186 | " {\n", 187 | " \"role\": \"system\",\n", 188 | " \"content\": \"你是翻译助理,请帮我将英文翻译成中文,谢谢。请只回复翻译文字,不要回复其他内容。\",\n", 189 | " },\n", 190 | " {\n", 191 | " \"role\": \"user\",\n", 192 | " \"name\": \"Alice\",\n", 193 | " \"content\": \"The sky is blue.\",\n", 194 | " },\n", 195 | "]\n", 196 | "\n", 197 | "for model in [\"gpt-3.5-turbo-0301\", \"gpt-4-0314\"]:\n", 198 | " print(model)\n", 199 | " # 来自上述实现的函数的token计数\n", 200 | " print(f\"{num_tokens_from_messages(example_messages, model)} prompt tokens counted by num_tokens_from_messages().\")\n", 201 | " # 来自OpenAI API的token计数\n", 202 | " response = openai.ChatCompletion.create(\n", 203 | " model=model,\n", 204 | " messages=example_messages,\n", 205 | " temperature=0,\n", 206 | " max_tokens=1 # 仅返回用于计数的token数量,因此不需要API返回completion内容\n", 207 | " )\n", 208 | " print(f'{response[\"usage\"][\"prompt_tokens\"]} prompt tokens counted by the OpenAI API.')\n", 209 | " print()" 210 | ] 211 | } 212 | ], 213 | "metadata": { 214 | "kernelspec": { 215 | "display_name": "Python 3", 216 | "language": "python", 217 | "name": "python3" 218 | }, 219 | "language_info": { 220 | "codemirror_mode": { 221 | "name": "ipython", 222 | "version": 3 223 | }, 224 | "file_extension": ".py", 225 | "mimetype": "text/x-python", 226 | "name": "python", 227 | "nbconvert_exporter": "python", 228 | "pygments_lexer": "ipython3", 229 | "version": "3.9.16" 230 | }, 231 | "orig_nbformat": 4 232 | }, 233 | "nbformat": 4, 234 | "nbformat_minor": 2 235 | } 236 | -------------------------------------------------------------------------------- /LangChain_APIChain.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [], 7 | "authorship_tag": "ABX9TyPi9Xve396uxUAuTiWRDCBu", 8 | "include_colab_link": true 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | }, 14 | "language_info": { 15 | "name": "python" 16 | } 17 | }, 18 | "cells": [ 19 | { 20 | "cell_type": "markdown", 21 | "metadata": { 22 | "id": "view-in-github", 23 | "colab_type": "text" 24 | }, 25 | "source": [ 26 | "\"Open" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 1, 32 | "metadata": { 33 | "colab": { 34 | "base_uri": "https://localhost:8080/" 35 | }, 36 | "id": "j-ARiJc5771h", 37 | "outputId": "fad717a8-ed32-43c8-8ad5-3efa2fa9a87c" 38 | }, 39 | "outputs": [ 40 | { 41 | "output_type": "stream", 42 | "name": "stdout", 43 | "text": [ 44 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m696.4/696.4 kB\u001b[0m \u001b[31m12.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 45 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.6/71.6 kB\u001b[0m \u001b[31m3.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 46 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.0/1.0 MB\u001b[0m \u001b[31m26.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 47 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m90.0/90.0 kB\u001b[0m \u001b[31m7.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 48 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m268.8/268.8 kB\u001b[0m \u001b[31m12.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 49 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m149.6/149.6 kB\u001b[0m \u001b[31m8.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 50 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m114.5/114.5 kB\u001b[0m \u001b[31m4.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 51 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.1/49.1 kB\u001b[0m \u001b[31m509.3 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 52 | "\u001b[?25h" 53 | ] 54 | } 55 | ], 56 | "source": [ 57 | "!pip install -qU langchain openai" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "source": [ 63 | "from langchain.llms import OpenAI\n", 64 | "from langchain.chains import LLMRequestsChain, LLMChain" 65 | ], 66 | "metadata": { 67 | "id": "8fJjgh7C8TU7" 68 | }, 69 | "execution_count": 11, 70 | "outputs": [] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "source": [ 75 | "llm = OpenAI(temperature=0, model_name='gpt-3.5-turbo', openai_api_key='your openai api key')" 76 | ], 77 | "metadata": { 78 | "id": "iInsE--I-yRY" 79 | }, 80 | "execution_count": 12, 81 | "outputs": [] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "source": [ 86 | "from langchain.prompts import PromptTemplate\n", 87 | "\n", 88 | "template = \"\"\"Between >>> and <<< are the raw search result text from provided RSS URL.\n", 89 | "Extract the answer to the question '{query}' or say \"not found\" if the information is not contained, and summarize all the information.\n", 90 | ">>> {requests_result} <<<\n", 91 | "Use the following JSON format to include all the titles:\n", 92 | "{{\n", 93 | " \"titles\": [\n", 94 | " 'aaa',\n", 95 | " 'bbb',\n", 96 | " ]\n", 97 | "}} \n", 98 | "Extracted:\"\"\"\n", 99 | "\n", 100 | "PROMPT = PromptTemplate(\n", 101 | " input_variables=[\"query\", \"requests_result\"],\n", 102 | " template=template,\n", 103 | ")" 104 | ], 105 | "metadata": { 106 | "id": "SczpN3-rA0_R" 107 | }, 108 | "execution_count": 54, 109 | "outputs": [] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "source": [ 114 | "chain = LLMRequestsChain(llm_chain = LLMChain(llm=llm, prompt=PROMPT))" 115 | ], 116 | "metadata": { 117 | "id": "ZrYLB-e8A5I0" 118 | }, 119 | "execution_count": 55, 120 | "outputs": [] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "source": [ 125 | "question = \"What are all the titles in this RSS feed?\"\n", 126 | "inputs = {\n", 127 | " \"query\": question,\n", 128 | " \"url\": \"https://rss.nytimes.com/services/xml/rss/nyt/US.xml\"\n", 129 | "}\n" 130 | ], 131 | "metadata": { 132 | "id": "Ts3Nc4otA66F" 133 | }, 134 | "execution_count": 57, 135 | "outputs": [] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "source": [ 140 | "response = chain(inputs)\n", 141 | "print(response['output'])" 142 | ], 143 | "metadata": { 144 | "colab": { 145 | "base_uri": "https://localhost:8080/" 146 | }, 147 | "id": "tXe0deYDA9Lt", 148 | "outputId": "0a251970-243e-4e19-8939-a518984742d9" 149 | }, 150 | "execution_count": 58, 151 | "outputs": [ 152 | { 153 | "output_type": "stream", 154 | "name": "stderr", 155 | "text": [ 156 | "/usr/local/lib/python3.10/dist-packages/bs4/builder/__init__.py:545: XMLParsedAsHTMLWarning: It looks like you're parsing an XML document using an HTML parser. If this really is an HTML document (maybe it's XHTML?), you can ignore or filter this warning. If it's XML, you should know that using an XML parser will be more reliable. To parse this document as XML, make sure you have the lxml package installed, and pass the keyword argument `features=\"xml\"` into the BeautifulSoup constructor.\n", 157 | " warnings.warn(\n" 158 | ] 159 | }, 160 | { 161 | "output_type": "stream", 162 | "name": "stdout", 163 | "text": [ 164 | "{\n", 165 | " \"titles\": [\n", 166 | " \"Minnesota Votes to Legalize Marijuana as Democrats Press Liberal Policies\",\n", 167 | " \"Montana Governor Signs Law Banning Transgender Care for Minors\",\n", 168 | " \"North Carolina Gerrymander Ruling Reflects Politicization of Judiciary Nationally\",\n", 169 | " \"Colorado Governor Signs Bills Strengthening Gun Laws\",\n", 170 | " \"Doorbell Prank Crash Trial: Anurag Chandra Is Found Guilty of Murder\",\n", 171 | " \"New California Rule Would Ban Sale of Diesel Trucks by 2036\",\n", 172 | " \"South Carolina Democrats Elect First Black Woman to Run State Party\",\n", 173 | " \"Gunman Kills Five People in Texas and Is Still at Large, Officials Say\",\n", 174 | " \"As Biden Runs for Re-election, Black Voters’ Frustration Bubbles\",\n", 175 | " \"Army Grounds Nonessential Flights After 2 Helicopter Crashes\"\n", 176 | " ]\n", 177 | "}\n", 178 | "\n", 179 | "Summary: The RSS feed contains news articles on various topics such as politics, law, crime, and environment. Some of the titles include Minnesota legalizing marijuana, Montana banning transgender care for minors, North Carolina's gerrymandering ruling, Colorado strengthening gun laws, and California banning diesel trucks by 2036. The feed also includes news on a doorbell prank crash trial, South Carolina electing its first black woman to run the state party, a shooting in Texas, and Black voters' frustration with President Biden. The Army has grounded nonessential flights after two helicopter crashes.\n" 180 | ] 181 | } 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "source": [ 187 | "template = \"\"\"在 >>> 和 <<< 之间是网页的返回的HTML内容。\n", 188 | "\n", 189 | "网页是新浪财经A股上市公司的每季度股东信息表格。\n", 190 | "\n", 191 | "请抽取参数请求的信息。每个截至日期作为JSON返回数据的date_of_quarter。因此,当表格中有多个截止日期时,返回数据应当包括所有的日期作为key。\n", 192 | "\n", 193 | ">>> {requests_result} <<<\n", 194 | "请使用如下的JSON格式返回数据\n", 195 | "{{\n", 196 | " \"date_of_quarter\": [\n", 197 | " {{\n", 198 | " \"holder_name\": \"a\",\n", 199 | " \"percentage\": \"50\"\n", 200 | " }},\n", 201 | " {{\n", 202 | " \"holder_name\": \"b\",\n", 203 | " \"percentage\": \"30\"\n", 204 | " }},\n", 205 | " ]\n", 206 | "}} \n", 207 | "\n", 208 | "例如,截至日期为2023-03-31,JSON数据应该是如下形式:\n", 209 | "\n", 210 | "{{\n", 211 | " \"2023-03-31\": [\n", 212 | " {{\n", 213 | " \"holder_name\": \"a\",\n", 214 | " \"percentage\": \"50\"\n", 215 | " }},\n", 216 | " {{\n", 217 | " \"holder_name\": \"b\",\n", 218 | " \"percentage\": \"30\"\n", 219 | " }},\n", 220 | " ]\n", 221 | "}}\n", 222 | "Extracted:\"\"\"\n", 223 | "\n", 224 | "PROMPT = PromptTemplate(\n", 225 | " input_variables=[\"requests_result\"],\n", 226 | " template=template,\n", 227 | ")" 228 | ], 229 | "metadata": { 230 | "id": "e9LFhyR0Ea9A" 231 | }, 232 | "execution_count": 67, 233 | "outputs": [] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "source": [ 238 | "chain = LLMRequestsChain(llm_chain = LLMChain(llm=llm, prompt=PROMPT))" 239 | ], 240 | "metadata": { 241 | "id": "B8d9vOQXEdd7" 242 | }, 243 | "execution_count": 68, 244 | "outputs": [] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "source": [ 249 | "inputs = {\n", 250 | " \"url\": \"https://vip.stock.finance.sina.com.cn/corp/go.php/vCI_StockHolder/stockid/600519/displaytype/30.phtml\"\n", 251 | "}" 252 | ], 253 | "metadata": { 254 | "id": "u2kZVhqfEfCt" 255 | }, 256 | "execution_count": 69, 257 | "outputs": [] 258 | }, 259 | { 260 | "cell_type": "code", 261 | "source": [ 262 | "response = chain(inputs)\n", 263 | "print(response['output'])" 264 | ], 265 | "metadata": { 266 | "colab": { 267 | "base_uri": "https://localhost:8080/" 268 | }, 269 | "id": "Y5_iZan9Eg8m", 270 | "outputId": "fb5fd134-7b8d-4d93-9b48-4affa785b261" 271 | }, 272 | "execution_count": 70, 273 | "outputs": [ 274 | { 275 | "output_type": "stream", 276 | "name": "stdout", 277 | "text": [ 278 | "{\n", 279 | " \"2023-03-31\": [\n", 280 | " {\n", 281 | " \"holder_name\": \"中国贵州茅台酒厂(集团)有限责任公司\",\n", 282 | " \"percentage\": \"54.06\"\n", 283 | " },\n", 284 | " {\n", 285 | " \"holder_name\": \"香港中央结算有限公司\",\n", 286 | " \"percentage\": \"7.26\"\n", 287 | " },\n", 288 | " {\n", 289 | " \"holder_name\": \"贵州省国有资本运营有限责任公司\",\n", 290 | " \"percentage\": \"4.54\"\n", 291 | " },\n", 292 | " {\n", 293 | " \"holder_name\": \"贵州茅台酒厂(集团)技术开发有限公司\",\n", 294 | " \"percentage\": \"2.22\"\n", 295 | " },\n", 296 | " {\n", 297 | " \"holder_name\": \"中央汇金资产管理有限责任公司\",\n", 298 | " \"percentage\": \"0.83\"\n", 299 | " },\n", 300 | " {\n", 301 | " \"holder_name\": \"深圳市金汇荣盛财富管理有限公司-金汇荣盛三号私募证券投资基金\",\n", 302 | " \"percentage\": \"0.66\"\n", 303 | " },\n", 304 | " {\n", 305 | " \"holder_name\": \"珠海市瑞丰汇邦资产管理有限公司-瑞丰汇邦三号私募证券投资基金\",\n", 306 | " \"percentage\": \"0.65\"\n", 307 | " },\n", 308 | " {\n", 309 | " \"holder_name\": \"中国证券金融股份有限公司\",\n", 310 | " \"percentage\": \"0.64\"\n", 311 | " },\n", 312 | " {\n", 313 | " \"holder_name\": \"中国工商银行-上证50交易型开放式指数证券投资基金\",\n", 314 | " \"percentage\": \"0.39\"\n", 315 | " },\n", 316 | " {\n", 317 | " \"holder_name\": \"中国人寿保险股份有限公司-传统-普通保险产品-005L-CT001沪\",\n", 318 | " \"percentage\": \"0.32\"\n", 319 | " }\n", 320 | " ],\n", 321 | " \"2022-12-31\": [\n", 322 | " {\n", 323 | " \"holder_name\": \"中国贵州茅台酒厂(集团)有限责任公司\",\n", 324 | " \"percentage\": \"54\"\n", 325 | " },\n", 326 | " {\n", 327 | " \"holder_name\": \"香港中央结算有限公司\",\n", 328 | " \"percentage\": \"6.75\"\n", 329 | " },\n", 330 | " {\n", 331 | " \"holder_name\": \"贵州省国有资本运营有限责任公司\",\n", 332 | " \"percentage\": \"4.54\"\n", 333 | " },\n", 334 | " {\n", 335 | " \"holder_name\": \"贵州茅台酒厂(集团)技术开发有限公司\",\n", 336 | " \"percentage\": \"2.21\"\n", 337 | " },\n", 338 | " {\n", 339 | " \"holder_name\": \"中央汇金资产管理有限责任公司\",\n", 340 | " \"percentage\": \"0.83\"\n", 341 | " },\n", 342 | " {\n", 343 | " \"holder_name\": \"中国证券金融股份有限公司\",\n", 344 | " \"percentage\": \"0.64\"\n", 345 | " },\n", 346 | " {\n", 347 | " \"holder_name\": \"深圳市金汇荣盛财富管理有限公司-金汇荣盛三号私募证券投资基金\",\n", 348 | " \"percentage\": \"0.61\"\n", 349 | " },\n", 350 | " {\n", 351 | " \"holder_name\": \"珠海市瑞丰汇邦资产管理有限公司-瑞丰汇邦三号私募证券投资基金\",\n", 352 | " \"percentage\": \"0.6\"\n", 353 | " },\n", 354 | " {\n", 355 | " \"holder_name\": \"中国银行股份有限公司-招商中证白酒指数分级证券投资基金\",\n", 356 | " \"percentage\": \"0.43\"\n", 357 | " },\n", 358 | " {\n", 359 | " \"holder_name\": \"中国工商银行-上证50交易型开放式指数证券投资基金\",\n", 360 | " \"percentage\": \"0.43\"\n", 361 | " }\n", 362 | " ],\n", 363 | " \"2022-09-30\": [\n", 364 | " {\n", 365 | " \"holder_name\": \"中国贵州茅台酒厂(集团)有限责任公司\",\n", 366 | " \"percentage\": \"54\"\n", 367 | " },\n", 368 | " {\n", 369 | " \"holder_name\": \"香港中央结算有限公司\",\n", 370 | " \"percentage\": \"7.31\"\n", 371 | " },\n", 372 | " {\n", 373 | " \"holder_name\": \"贵州省国有资本运营有限责任\n" 374 | ] 375 | } 376 | ] 377 | } 378 | ] 379 | } -------------------------------------------------------------------------------- /LangChain_ChatOpenAI_OpenAI_Diff.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [], 7 | "authorship_tag": "ABX9TyN4zbA6UTpBoKhwBY7dto14", 8 | "include_colab_link": true 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | }, 14 | "language_info": { 15 | "name": "python" 16 | } 17 | }, 18 | "cells": [ 19 | { 20 | "cell_type": "markdown", 21 | "metadata": { 22 | "id": "view-in-github", 23 | "colab_type": "text" 24 | }, 25 | "source": [ 26 | "\"Open" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "source": [ 32 | "# Difference between ChatOpenAI and OpenAI" 33 | ], 34 | "metadata": { 35 | "id": "KMYMb8dRTPds" 36 | } 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "source": [ 41 | "In this notebook, I will explain with you the difference between the 2 classes introduced by LangChain framework." 42 | ], 43 | "metadata": { 44 | "id": "nMobxSKSTi8K" 45 | } 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 19, 50 | "metadata": { 51 | "id": "0l3RTnY-vnsx" 52 | }, 53 | "outputs": [], 54 | "source": [ 55 | "!pip install langchain openai --quiet --upgrade" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "source": [ 61 | "from langchain import OpenAI\n", 62 | "from langchain.chat_models import ChatOpenAI\n", 63 | "import os" 64 | ], 65 | "metadata": { 66 | "id": "R5j6nQZdvwTZ" 67 | }, 68 | "execution_count": 20, 69 | "outputs": [] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "source": [ 74 | "os.environ['OPENAI_API_KEY'] = 'your openai api key'" 75 | ], 76 | "metadata": { 77 | "id": "gFoCDWd6xYHJ" 78 | }, 79 | "execution_count": 21, 80 | "outputs": [] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "source": [ 85 | "ChatOpenAI is using OpenAI endpoint `/v1/chat/completions/` which supports the models:\n", 86 | "- gpt-4\n", 87 | "- gpt-4-0613\n", 88 | "- gpt-4-32k\n", 89 | "- gpt-4-32k-0613\n", 90 | "- gpt-3.5-turbo\n", 91 | "- gpt-3.5-turbo-0613\n", 92 | "- gpt-3.5-turbo-16k\n", 93 | "- gpt-3.5-turbo-16k-0613" 94 | ], 95 | "metadata": { 96 | "id": "Xki-BzX6Txcy" 97 | } 98 | }, 99 | { 100 | "cell_type": "code", 101 | "source": [ 102 | "llm = ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo-0613\")" 103 | ], 104 | "metadata": { 105 | "id": "2B4PVfNjRE1y" 106 | }, 107 | "execution_count": 32, 108 | "outputs": [] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "source": [ 113 | "from langchain.schema import (\n", 114 | " SystemMessage,\n", 115 | " HumanMessage,\n", 116 | " AIMessage\n", 117 | ")\n", 118 | "\n", 119 | "messages = [\n", 120 | " HumanMessage(content=\"Hi AI, how are you today?\"),\n", 121 | "]" 122 | ], 123 | "metadata": { 124 | "id": "gfY1IydMRGnp" 125 | }, 126 | "execution_count": 30, 127 | "outputs": [] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "source": [ 132 | "llm(messages)" 133 | ], 134 | "metadata": { 135 | "colab": { 136 | "base_uri": "https://localhost:8080/" 137 | }, 138 | "id": "_-zvodYHSQ2F", 139 | "outputId": "8f365b64-3811-4687-9230-4c8bd9630faa" 140 | }, 141 | "execution_count": 33, 142 | "outputs": [ 143 | { 144 | "output_type": "execute_result", 145 | "data": { 146 | "text/plain": [ 147 | "AIMessage(content=\"Hello! As an AI, I don't have feelings, but I'm here and ready to assist you. How can I help you today?\", additional_kwargs={}, example=False)" 148 | ] 149 | }, 150 | "metadata": {}, 151 | "execution_count": 33 152 | } 153 | ] 154 | }, 155 | { 156 | "cell_type": "markdown", 157 | "source": [ 158 | "OpenAI is using OpenAI endpoint `/v1/completions/` which supports the models:\n", 159 | " - text-davinci-003\n", 160 | " - text-davinci-002\n", 161 | " - text-curie-001\n", 162 | " - text-babbage-001\n", 163 | " - text-ada-001" 164 | ], 165 | "metadata": { 166 | "id": "PYbcPJdGUD16" 167 | } 168 | }, 169 | { 170 | "cell_type": "code", 171 | "source": [ 172 | "llm = OpenAI(temperature=0, model_name='text-davinci-002')" 173 | ], 174 | "metadata": { 175 | "id": "0mnDmF8RRse0" 176 | }, 177 | "execution_count": 34, 178 | "outputs": [] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "source": [ 183 | "llm('Hi AI, how are you today?')" 184 | ], 185 | "metadata": { 186 | "colab": { 187 | "base_uri": "https://localhost:8080/", 188 | "height": 36 189 | }, 190 | "id": "7YVluKNLRvyy", 191 | "outputId": "49c68d16-3da5-4820-a381-d28928e1ad6c" 192 | }, 193 | "execution_count": 35, 194 | "outputs": [ 195 | { 196 | "output_type": "execute_result", 197 | "data": { 198 | "text/plain": [ 199 | "\"\\n\\nI'm doing well today. Thank you for asking.\"" 200 | ], 201 | "application/vnd.google.colaboratory.intrinsic+json": { 202 | "type": "string" 203 | } 204 | }, 205 | "metadata": {}, 206 | "execution_count": 35 207 | } 208 | ] 209 | } 210 | ] 211 | } -------------------------------------------------------------------------------- /LangChain_Gmail.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": {}, 7 | "source": [ 8 | "# Introduction\n", 9 | "\n", 10 | "This is Python notebook demonstrating how to integrate Gmail access with OpenAI capabilities to create a AI Agent that can read and write emails.\n", 11 | "\n", 12 | "You should complete the following steps to get started:\n", 13 | "- Enable API by following the [Enable the API - Quick start](https://developers.google.com/gmail/api/quickstart/python#enable_the_api).\n", 14 | "- Set up your credentials explained in the [Gmail API docs](https://developers.google.com/gmail/api/quickstart/python#authorize_credentials_for_a_desktop_application)." 15 | ] 16 | }, 17 | { 18 | "attachments": {}, 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "1. Install required Python packages" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "!pip install --upgrade --quiet google-api-python-client google-auth-oauthlib google-auth-httplib2 langchain openai" 32 | ] 33 | }, 34 | { 35 | "attachments": {}, 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "2. Install load_dotenv and use it to load *OPENAI_API_KEY* from `.env` file" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "!pip install python-dotenv\n", 49 | "from dotenv import load_dotenv\n", 50 | "load_dotenv()" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": null, 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "from langchain.agents.agent_toolkits import GmailToolkit\n", 60 | "\n", 61 | "toolkit = GmailToolkit()" 62 | ] 63 | }, 64 | { 65 | "attachments": {}, 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "3. List the Gmail tools supported by LangChain" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": null, 75 | "metadata": {}, 76 | "outputs": [], 77 | "source": [ 78 | "tools = toolkit.get_tools()\n", 79 | "tools" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": null, 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": [ 88 | "from langchain import OpenAI\n", 89 | "from langchain.agents import initialize_agent, AgentType" 90 | ] 91 | }, 92 | { 93 | "attachments": {}, 94 | "cell_type": "markdown", 95 | "metadata": {}, 96 | "source": [ 97 | "4. Create a LangChain agent with OpenAI and any model you prefer" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": null, 103 | "metadata": {}, 104 | "outputs": [], 105 | "source": [ 106 | "llm = OpenAI(temperature=0)\n", 107 | "agent = initialize_agent(tools=tools, llm=llm, agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=True)" 108 | ] 109 | }, 110 | { 111 | "attachments": {}, 112 | "cell_type": "markdown", 113 | "metadata": {}, 114 | "source": [ 115 | "5. Ask the agent to work on your Gmail tasks" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": null, 121 | "metadata": {}, 122 | "outputs": [], 123 | "source": [ 124 | "agent.run(\n", 125 | " \"Create a gmail draft for me to edit of a letter for a potential customer\"\n", 126 | " \" who has visited to my website and expressed interest in my product.\"\n", 127 | " \" Under no circumstances may you send the message, however.\"\n", 128 | ")" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": null, 134 | "metadata": {}, 135 | "outputs": [], 136 | "source": [ 137 | "agent.run(\"Could you search in my drafts for the latest email?\")" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": null, 143 | "metadata": {}, 144 | "outputs": [], 145 | "source": [ 146 | "agent.run(\"Please send an email to 0001coder@gmail.com with title 'Greetings from myself' and message 'This is 01coder, and nice to meet you'.\")" 147 | ] 148 | } 149 | ], 150 | "metadata": { 151 | "kernelspec": { 152 | "display_name": "Python 3", 153 | "language": "python", 154 | "name": "python3" 155 | }, 156 | "language_info": { 157 | "codemirror_mode": { 158 | "name": "ipython", 159 | "version": 3 160 | }, 161 | "file_extension": ".py", 162 | "mimetype": "text/x-python", 163 | "name": "python", 164 | "nbconvert_exporter": "python", 165 | "pygments_lexer": "ipython3", 166 | "version": "3.9.16" 167 | }, 168 | "orig_nbformat": 4 169 | }, 170 | "nbformat": 4, 171 | "nbformat_minor": 2 172 | } 173 | -------------------------------------------------------------------------------- /LangChain_LLM_Math.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [], 7 | "authorship_tag": "ABX9TyM3TaApvacHQONh9AbS8ZKW", 8 | "include_colab_link": true 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | }, 14 | "language_info": { 15 | "name": "python" 16 | }, 17 | "accelerator": "GPU", 18 | "gpuClass": "standard" 19 | }, 20 | "cells": [ 21 | { 22 | "cell_type": "markdown", 23 | "metadata": { 24 | "id": "view-in-github", 25 | "colab_type": "text" 26 | }, 27 | "source": [ 28 | "\"Open" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "source": [ 34 | "!pip install langchain openai" 35 | ], 36 | "metadata": { 37 | "colab": { 38 | "base_uri": "https://localhost:8080/" 39 | }, 40 | "id": "f9iqJ8-RM_2Q", 41 | "outputId": "f4141238-2a64-4c24-a28d-bb6c8b17db19" 42 | }, 43 | "execution_count": 1, 44 | "outputs": [ 45 | { 46 | "output_type": "stream", 47 | "name": "stdout", 48 | "text": [ 49 | "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", 50 | "Collecting langchain\n", 51 | " Downloading langchain-0.0.151-py3-none-any.whl (665 kB)\n", 52 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m666.0/666.0 kB\u001b[0m \u001b[31m10.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 53 | "\u001b[?25hCollecting openai\n", 54 | " Downloading openai-0.27.5-py3-none-any.whl (71 kB)\n", 55 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.6/71.6 kB\u001b[0m \u001b[31m6.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 56 | "\u001b[?25hRequirement already satisfied: pydantic<2,>=1 in /usr/local/lib/python3.10/dist-packages (from langchain) (1.10.7)\n", 57 | "Requirement already satisfied: PyYAML>=5.4.1 in /usr/local/lib/python3.10/dist-packages (from langchain) (6.0)\n", 58 | "Collecting dataclasses-json<0.6.0,>=0.5.7\n", 59 | " Downloading dataclasses_json-0.5.7-py3-none-any.whl (25 kB)\n", 60 | "Requirement already satisfied: SQLAlchemy<3,>1.3 in /usr/local/lib/python3.10/dist-packages (from langchain) (2.0.10)\n", 61 | "Collecting async-timeout<5.0.0,>=4.0.0\n", 62 | " Downloading async_timeout-4.0.2-py3-none-any.whl (5.8 kB)\n", 63 | "Requirement already satisfied: tenacity<9.0.0,>=8.1.0 in /usr/local/lib/python3.10/dist-packages (from langchain) (8.2.2)\n", 64 | "Requirement already satisfied: numpy<2,>=1 in /usr/local/lib/python3.10/dist-packages (from langchain) (1.22.4)\n", 65 | "Collecting openapi-schema-pydantic<2.0,>=1.2\n", 66 | " Downloading openapi_schema_pydantic-1.2.4-py3-none-any.whl (90 kB)\n", 67 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m90.0/90.0 kB\u001b[0m \u001b[31m10.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 68 | "\u001b[?25hRequirement already satisfied: tqdm>=4.48.0 in /usr/local/lib/python3.10/dist-packages (from langchain) (4.65.0)\n", 69 | "Collecting aiohttp<4.0.0,>=3.8.3\n", 70 | " Downloading aiohttp-3.8.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.0 MB)\n", 71 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.0/1.0 MB\u001b[0m \u001b[31m10.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 72 | "\u001b[?25hRequirement already satisfied: requests<3,>=2 in /usr/local/lib/python3.10/dist-packages (from langchain) (2.27.1)\n", 73 | "Requirement already satisfied: numexpr<3.0.0,>=2.8.4 in /usr/local/lib/python3.10/dist-packages (from langchain) (2.8.4)\n", 74 | "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (23.1.0)\n", 75 | "Collecting multidict<7.0,>=4.5\n", 76 | " Downloading multidict-6.0.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (114 kB)\n", 77 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m114.5/114.5 kB\u001b[0m \u001b[31m13.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 78 | "\u001b[?25hCollecting frozenlist>=1.1.1\n", 79 | " Downloading frozenlist-1.3.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (149 kB)\n", 80 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m149.6/149.6 kB\u001b[0m \u001b[31m17.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 81 | "\u001b[?25hCollecting aiosignal>=1.1.2\n", 82 | " Downloading aiosignal-1.3.1-py3-none-any.whl (7.6 kB)\n", 83 | "Collecting yarl<2.0,>=1.0\n", 84 | " Downloading yarl-1.9.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (268 kB)\n", 85 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m268.8/268.8 kB\u001b[0m \u001b[31m31.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 86 | "\u001b[?25hRequirement already satisfied: charset-normalizer<4.0,>=2.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (2.0.12)\n", 87 | "Collecting marshmallow-enum<2.0.0,>=1.5.1\n", 88 | " Downloading marshmallow_enum-1.5.1-py2.py3-none-any.whl (4.2 kB)\n", 89 | "Collecting marshmallow<4.0.0,>=3.3.0\n", 90 | " Downloading marshmallow-3.19.0-py3-none-any.whl (49 kB)\n", 91 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.1/49.1 kB\u001b[0m \u001b[31m6.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 92 | "\u001b[?25hCollecting typing-inspect>=0.4.0\n", 93 | " Downloading typing_inspect-0.8.0-py3-none-any.whl (8.7 kB)\n", 94 | "Requirement already satisfied: typing-extensions>=4.2.0 in /usr/local/lib/python3.10/dist-packages (from pydantic<2,>=1->langchain) (4.5.0)\n", 95 | "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain) (1.26.15)\n", 96 | "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain) (3.4)\n", 97 | "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain) (2022.12.7)\n", 98 | "Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.10/dist-packages (from SQLAlchemy<3,>1.3->langchain) (2.0.2)\n", 99 | "Requirement already satisfied: packaging>=17.0 in /usr/local/lib/python3.10/dist-packages (from marshmallow<4.0.0,>=3.3.0->dataclasses-json<0.6.0,>=0.5.7->langchain) (23.1)\n", 100 | "Collecting mypy-extensions>=0.3.0\n", 101 | " Downloading mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)\n", 102 | "Installing collected packages: mypy-extensions, multidict, marshmallow, frozenlist, async-timeout, yarl, typing-inspect, openapi-schema-pydantic, marshmallow-enum, aiosignal, dataclasses-json, aiohttp, openai, langchain\n", 103 | "Successfully installed aiohttp-3.8.4 aiosignal-1.3.1 async-timeout-4.0.2 dataclasses-json-0.5.7 frozenlist-1.3.3 langchain-0.0.151 marshmallow-3.19.0 marshmallow-enum-1.5.1 multidict-6.0.4 mypy-extensions-1.0.0 openai-0.27.5 openapi-schema-pydantic-1.2.4 typing-inspect-0.8.0 yarl-1.9.2\n" 104 | ] 105 | } 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "source": [ 111 | "from langchain.agents import load_tools\n", 112 | "from langchain.agents import initialize_agent\n", 113 | "from langchain.agents import AgentType\n", 114 | "from langchain.llms import OpenAI\n", 115 | "from langchain.chat_models import ChatOpenAI\n", 116 | "from langchain.chains.conversation.memory import ConversationBufferWindowMemory" 117 | ], 118 | "metadata": { 119 | "id": "vWAHCTE7oa8F" 120 | }, 121 | "execution_count": 23, 122 | "outputs": [] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "source": [ 127 | "OPENAI_API_KEY = 'your OpenAI API key here'" 128 | ], 129 | "metadata": { 130 | "id": "Ojv0fTD_oqxj" 131 | }, 132 | "execution_count": 3, 133 | "outputs": [] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "source": [ 138 | "llm = ChatOpenAI(openai_api_key=OPENAI_API_KEY, temperature=0, model_name=\"gpt-3.5-turbo\")" 139 | ], 140 | "metadata": { 141 | "id": "VVwq0-yHoczv" 142 | }, 143 | "execution_count": 24, 144 | "outputs": [] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "source": [ 149 | "from langchain.tools import BaseTool\n", 150 | "\n", 151 | "class EvaluateMathExpression(BaseTool):\n", 152 | " name = \"Math Evaluation\"\n", 153 | " description = 'use this tool to evaluate a math expression.'\n", 154 | "\n", 155 | " def _run(self, expr: str):\n", 156 | " return eval(expr)\n", 157 | " \n", 158 | " def _arun(self, query: str):\n", 159 | " raise NotImplementedError(\"Async operation not supported yet\")\n", 160 | "\n", 161 | "tools = [EvaluateMathExpression()]" 162 | ], 163 | "metadata": { 164 | "id": "qFPyDKY0ufLs" 165 | }, 166 | "execution_count": 25, 167 | "outputs": [] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "source": [ 172 | "agent = initialize_agent(\n", 173 | " agent='chat-conversational-react-description',\n", 174 | " tools=tools,\n", 175 | " llm=llm,\n", 176 | " verbose=True,\n", 177 | " max_iterations=3,\n", 178 | " early_stopping_method='generate',\n", 179 | " memory=ConversationBufferWindowMemory(\n", 180 | " memory_key='chat_history',\n", 181 | " k=5,\n", 182 | " return_messages=True\n", 183 | " )\n", 184 | ")" 185 | ], 186 | "metadata": { 187 | "id": "fkUISONKCMGQ" 188 | }, 189 | "execution_count": 27, 190 | "outputs": [] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "source": [ 195 | "for message in agent.agent.llm_chain.prompt.messages:\n", 196 | " print(message)" 197 | ], 198 | "metadata": { 199 | "colab": { 200 | "base_uri": "https://localhost:8080/" 201 | }, 202 | "id": "tQXiJpQ1h5xc", 203 | "outputId": "fb413b11-6867-4555-c967-e09189ea04aa" 204 | }, 205 | "execution_count": 28, 206 | "outputs": [ 207 | { 208 | "output_type": "stream", 209 | "name": "stdout", 210 | "text": [ 211 | "prompt=PromptTemplate(input_variables=[], output_parser=None, partial_variables={}, template='Assistant is a large language model trained by OpenAI.\\n\\nAssistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.\\n\\nAssistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, Assistant is able to generate its own text based on the input it receives, allowing it to engage in discussions and provide explanations and descriptions on a wide range of topics.\\n\\nOverall, Assistant is a powerful system that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether you need help with a specific question or just want to have a conversation about a particular topic, Assistant is here to assist.', template_format='f-string', validate_template=True) additional_kwargs={}\n", 212 | "variable_name='chat_history'\n", 213 | "prompt=PromptTemplate(input_variables=['input'], output_parser=None, partial_variables={}, template='TOOLS\\n------\\nAssistant can ask the user to use tools to look up information that may be helpful in answering the users original question. The tools the human can use are:\\n\\n> Math Evaluation: use this tool to evaluate a math expression.\\n\\nRESPONSE FORMAT INSTRUCTIONS\\n----------------------------\\n\\nWhen responding to me, please output a response in one of two formats:\\n\\n**Option 1:**\\nUse this if you want the human to use a tool.\\nMarkdown code snippet formatted in the following schema:\\n\\n```json\\n{{\\n \"action\": string \\\\ The action to take. Must be one of Math Evaluation\\n \"action_input\": string \\\\ The input to the action\\n}}\\n```\\n\\n**Option #2:**\\nUse this if you want to respond directly to the human. Markdown code snippet formatted in the following schema:\\n\\n```json\\n{{\\n \"action\": \"Final Answer\",\\n \"action_input\": string \\\\ You should put what you want to return to use here\\n}}\\n```\\n\\nUSER\\'S INPUT\\n--------------------\\nHere is the user\\'s input (remember to respond with a markdown code snippet of a json blob with a single action, and NOTHING else):\\n\\n{input}', template_format='f-string', validate_template=True) additional_kwargs={}\n", 214 | "variable_name='agent_scratchpad'\n" 215 | ] 216 | } 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "source": [ 222 | "agent(f\"What is 2 * 2 * 0.13 - 1.001?\")" 223 | ], 224 | "metadata": { 225 | "colab": { 226 | "base_uri": "https://localhost:8080/" 227 | }, 228 | "id": "wJPk8RebvGC3", 229 | "outputId": "0f7f012a-eb64-44d3-8314-c77f9d3318e9" 230 | }, 231 | "execution_count": 29, 232 | "outputs": [ 233 | { 234 | "output_type": "stream", 235 | "name": "stdout", 236 | "text": [ 237 | "\n", 238 | "\n", 239 | "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", 240 | "\u001b[32;1m\u001b[1;3m{\n", 241 | " \"action\": \"Final Answer\",\n", 242 | " \"action_input\": \"-0.341\"\n", 243 | "}\u001b[0m\n", 244 | "\n", 245 | "\u001b[1m> Finished chain.\u001b[0m\n" 246 | ] 247 | }, 248 | { 249 | "output_type": "execute_result", 250 | "data": { 251 | "text/plain": [ 252 | "{'input': 'What is 2 * 2 * 0.13 - 1.001?',\n", 253 | " 'chat_history': [],\n", 254 | " 'output': '-0.341'}" 255 | ] 256 | }, 257 | "metadata": {}, 258 | "execution_count": 29 259 | } 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "source": [ 265 | "2 * 2 * 0.13 - 1.001" 266 | ], 267 | "metadata": { 268 | "colab": { 269 | "base_uri": "https://localhost:8080/" 270 | }, 271 | "id": "KvC-1ZB9hURY", 272 | "outputId": "9797b9d0-3223-4597-d749-e93a752c897a" 273 | }, 274 | "execution_count": 30, 275 | "outputs": [ 276 | { 277 | "output_type": "execute_result", 278 | "data": { 279 | "text/plain": [ 280 | "-0.48099999999999987" 281 | ] 282 | }, 283 | "metadata": {}, 284 | "execution_count": 30 285 | } 286 | ] 287 | }, 288 | { 289 | "cell_type": "code", 290 | "source": [ 291 | "from langchain.agents.conversational_chat.prompt import (PREFIX)\n", 292 | "system_message = PREFIX + \"\\n\" + '''\n", 293 | "Unfortunately, Assistant is terrible at maths. Assistant should always refers to available tools and never try to answer math questions by itself\n", 294 | "'''" 295 | ], 296 | "metadata": { 297 | "id": "clEarofMhhJu" 298 | }, 299 | "execution_count": 31, 300 | "outputs": [] 301 | }, 302 | { 303 | "cell_type": "code", 304 | "source": [ 305 | "new_prompt = agent.agent.create_prompt(\n", 306 | " system_message=system_message,\n", 307 | " tools=tools\n", 308 | ")\n", 309 | "\n", 310 | "agent.agent.llm_chain.prompt = new_prompt" 311 | ], 312 | "metadata": { 313 | "id": "ZARVE-emhnIN" 314 | }, 315 | "execution_count": 32, 316 | "outputs": [] 317 | }, 318 | { 319 | "cell_type": "code", 320 | "source": [ 321 | "agent(f\"What is 2 * 2 * 0.13 - 1.001?\")" 322 | ], 323 | "metadata": { 324 | "colab": { 325 | "base_uri": "https://localhost:8080/" 326 | }, 327 | "id": "nm1_Im67hroZ", 328 | "outputId": "cd7abc79-8be3-470d-d5a2-70f1c8b2623e" 329 | }, 330 | "execution_count": 33, 331 | "outputs": [ 332 | { 333 | "output_type": "stream", 334 | "name": "stdout", 335 | "text": [ 336 | "\n", 337 | "\n", 338 | "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", 339 | "\u001b[32;1m\u001b[1;3m{\n", 340 | " \"action\": \"Math Evaluation\",\n", 341 | " \"action_input\": \"2 * 2 * 0.13 - 1.001\"\n", 342 | "}\u001b[0m\n", 343 | "Observation: \u001b[36;1m\u001b[1;3m-0.48099999999999987\u001b[0m\n", 344 | "Thought:\u001b[32;1m\u001b[1;3m{\n", 345 | " \"action\": \"Final Answer\",\n", 346 | " \"action_input\": \"-0.481\"\n", 347 | "}\u001b[0m\n", 348 | "\n", 349 | "\u001b[1m> Finished chain.\u001b[0m\n" 350 | ] 351 | }, 352 | { 353 | "output_type": "execute_result", 354 | "data": { 355 | "text/plain": [ 356 | "{'input': 'What is 2 * 2 * 0.13 - 1.001?',\n", 357 | " 'chat_history': [HumanMessage(content='What is 2 * 2 * 0.13 - 1.001?', additional_kwargs={}),\n", 358 | " AIMessage(content='-0.341', additional_kwargs={})],\n", 359 | " 'output': '-0.481'}" 360 | ] 361 | }, 362 | "metadata": {}, 363 | "execution_count": 33 364 | } 365 | ] 366 | }, 367 | { 368 | "cell_type": "code", 369 | "source": [ 370 | "agent.memory.buffer" 371 | ], 372 | "metadata": { 373 | "colab": { 374 | "base_uri": "https://localhost:8080/" 375 | }, 376 | "id": "6g0ri68KEC0q", 377 | "outputId": "6d348ff3-2e6e-4f7e-bd0c-8d637459c1e9" 378 | }, 379 | "execution_count": 34, 380 | "outputs": [ 381 | { 382 | "output_type": "execute_result", 383 | "data": { 384 | "text/plain": [ 385 | "[HumanMessage(content='What is 2 * 2 * 0.13 - 1.001?', additional_kwargs={}),\n", 386 | " AIMessage(content='-0.341', additional_kwargs={}),\n", 387 | " HumanMessage(content='What is 2 * 2 * 0.13 - 1.001?', additional_kwargs={}),\n", 388 | " AIMessage(content='-0.481', additional_kwargs={})]" 389 | ] 390 | }, 391 | "metadata": {}, 392 | "execution_count": 34 393 | } 394 | ] 395 | } 396 | ] 397 | } -------------------------------------------------------------------------------- /LangChain_Output_Parsing.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [], 7 | "authorship_tag": "ABX9TyONPJCAwPx0Wq0HDbOoi4Tk", 8 | "include_colab_link": true 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | }, 14 | "language_info": { 15 | "name": "python" 16 | } 17 | }, 18 | "cells": [ 19 | { 20 | "cell_type": "markdown", 21 | "metadata": { 22 | "id": "view-in-github", 23 | "colab_type": "text" 24 | }, 25 | "source": [ 26 | "\"Open" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "source": [ 32 | "In this notebook, we will learn how to use LangChain's output parser to process LLM output in a more programming language friendly way." 33 | ], 34 | "metadata": { 35 | "id": "UV1rzv4pfn8o" 36 | } 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 38, 41 | "metadata": { 42 | "id": "3scUGKX6fh94" 43 | }, 44 | "outputs": [], 45 | "source": [ 46 | "!pip install langchain openai -qU" 47 | ] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "source": [ 52 | "# CommaSeparatedListOutputParser" 53 | ], 54 | "metadata": { 55 | "id": "8wqIvBtegME0" 56 | } 57 | }, 58 | { 59 | "cell_type": "code", 60 | "source": [ 61 | "from langchain.output_parsers import CommaSeparatedListOutputParser\n", 62 | "from langchain.prompts import PromptTemplate, ChatPromptTemplate, HumanMessagePromptTemplate\n", 63 | "from langchain.llms import OpenAI\n", 64 | "from langchain.chat_models import ChatOpenAI" 65 | ], 66 | "metadata": { 67 | "id": "hLxERwUAgI02" 68 | }, 69 | "execution_count": 39, 70 | "outputs": [] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "source": [ 75 | "OPENAI_API_KEY = 'your openai api key here'" 76 | ], 77 | "metadata": { 78 | "id": "xaXmIOlUh7du" 79 | }, 80 | "execution_count": 37, 81 | "outputs": [] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "source": [ 86 | "output_parser = CommaSeparatedListOutputParser()\n", 87 | "format_instructions = output_parser.get_format_instructions()\n", 88 | "prompt = PromptTemplate(\n", 89 | " template=\"List 3 main-stream {subject}.\\n{format_instructions}\",\n", 90 | " input_variables=[\"subject\"],\n", 91 | " partial_variables={\"format_instructions\": format_instructions}\n", 92 | ")" 93 | ], 94 | "metadata": { 95 | "id": "dA7lnAUkg0O0" 96 | }, 97 | "execution_count": 46, 98 | "outputs": [] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "source": [ 103 | "print(format_instructions)" 104 | ], 105 | "metadata": { 106 | "colab": { 107 | "base_uri": "https://localhost:8080/" 108 | }, 109 | "id": "kISdCOOus7dK", 110 | "outputId": "811f281a-f567-42de-8bb0-22b63409d206" 111 | }, 112 | "execution_count": 47, 113 | "outputs": [ 114 | { 115 | "output_type": "stream", 116 | "name": "stdout", 117 | "text": [ 118 | "Your response should be a list of comma separated values, eg: `foo, bar, baz`\n" 119 | ] 120 | } 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "source": [ 126 | "llm = OpenAI(temperature=0, openai_api_key=OPENAI_API_KEY)" 127 | ], 128 | "metadata": { 129 | "id": "qiIwAWalgZKJ" 130 | }, 131 | "execution_count": 48, 132 | "outputs": [] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "source": [ 137 | "_input = prompt.format(subject=\"music styles\")\n", 138 | "output = llm(_input)\n" 139 | ], 140 | "metadata": { 141 | "id": "dYZdV_wHgvcd" 142 | }, 143 | "execution_count": 52, 144 | "outputs": [] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "source": [ 149 | "print(output)" 150 | ], 151 | "metadata": { 152 | "colab": { 153 | "base_uri": "https://localhost:8080/" 154 | }, 155 | "id": "9MHBQyaihwdI", 156 | "outputId": "93e41b4d-23aa-40d1-ba5a-770522047737" 157 | }, 158 | "execution_count": 53, 159 | "outputs": [ 160 | { 161 | "output_type": "stream", 162 | "name": "stdout", 163 | "text": [ 164 | "\n", 165 | "\n", 166 | "Pop, Rock, Hip-Hop\n" 167 | ] 168 | } 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "source": [ 174 | "output_parser.parse(output)" 175 | ], 176 | "metadata": { 177 | "colab": { 178 | "base_uri": "https://localhost:8080/" 179 | }, 180 | "id": "ffPXRbX-h2Lt", 181 | "outputId": "f6bf11f2-66b8-4ff1-f724-d44bfc08a1db" 182 | }, 183 | "execution_count": 54, 184 | "outputs": [ 185 | { 186 | "output_type": "execute_result", 187 | "data": { 188 | "text/plain": [ 189 | "['Pop', 'Rock', 'Hip-Hop']" 190 | ] 191 | }, 192 | "metadata": {}, 193 | "execution_count": 54 194 | } 195 | ] 196 | }, 197 | { 198 | "cell_type": "markdown", 199 | "source": [ 200 | "# EnumOutputParser" 201 | ], 202 | "metadata": { 203 | "id": "t9M4boioiSAa" 204 | } 205 | }, 206 | { 207 | "cell_type": "code", 208 | "source": [ 209 | "from langchain.output_parsers.enum import EnumOutputParser" 210 | ], 211 | "metadata": { 212 | "id": "vRqHTEYliXUb" 213 | }, 214 | "execution_count": 55, 215 | "outputs": [] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "source": [ 220 | "from enum import Enum\n", 221 | "\n", 222 | "class Genders(Enum):\n", 223 | " MALE = \"male\"\n", 224 | " FEMALE = \"female\"" 225 | ], 226 | "metadata": { 227 | "id": "W08I3jHDicmR" 228 | }, 229 | "execution_count": 56, 230 | "outputs": [] 231 | }, 232 | { 233 | "cell_type": "code", 234 | "source": [ 235 | "output_parser = EnumOutputParser(enum=Genders)\n", 236 | "format_instructions = output_parser.get_format_instructions()\n", 237 | "prompt = PromptTemplate(\n", 238 | " template=\"Tell me the gender of the celebrity {name}.\\n{format_instructions}\",\n", 239 | " input_variables=[\"name\"],\n", 240 | " partial_variables={\"format_instructions\": format_instructions}\n", 241 | ")" 242 | ], 243 | "metadata": { 244 | "id": "XelNJSDJilxD" 245 | }, 246 | "execution_count": 57, 247 | "outputs": [] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "source": [ 252 | "print(format_instructions)" 253 | ], 254 | "metadata": { 255 | "colab": { 256 | "base_uri": "https://localhost:8080/" 257 | }, 258 | "id": "arF_9ckdw9TK", 259 | "outputId": "47e54ba9-2e34-4f7f-fc60-89e870ce2c92" 260 | }, 261 | "execution_count": 58, 262 | "outputs": [ 263 | { 264 | "output_type": "stream", 265 | "name": "stdout", 266 | "text": [ 267 | "Select one of the following options: male, female\n" 268 | ] 269 | } 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "source": [ 275 | "_input = prompt.format(name=\"Michael Jordan\")\n", 276 | "output = llm(_input)\n", 277 | "print(output)" 278 | ], 279 | "metadata": { 280 | "colab": { 281 | "base_uri": "https://localhost:8080/" 282 | }, 283 | "id": "o-jdUoa3iyda", 284 | "outputId": "04aeb640-93f9-4d7f-ac99-6c0d7e75c520" 285 | }, 286 | "execution_count": 59, 287 | "outputs": [ 288 | { 289 | "output_type": "stream", 290 | "name": "stdout", 291 | "text": [ 292 | "\n", 293 | "male\n" 294 | ] 295 | } 296 | ] 297 | }, 298 | { 299 | "cell_type": "code", 300 | "source": [ 301 | "output_parser.parse(output)" 302 | ], 303 | "metadata": { 304 | "colab": { 305 | "base_uri": "https://localhost:8080/" 306 | }, 307 | "id": "u_97GW_GjBNQ", 308 | "outputId": "e6ae8ed1-a61a-4731-dc59-5614b2278828" 309 | }, 310 | "execution_count": 60, 311 | "outputs": [ 312 | { 313 | "output_type": "execute_result", 314 | "data": { 315 | "text/plain": [ 316 | "" 317 | ] 318 | }, 319 | "metadata": {}, 320 | "execution_count": 60 321 | } 322 | ] 323 | }, 324 | { 325 | "cell_type": "code", 326 | "source": [ 327 | "output_parser.parse('xyz')" 328 | ], 329 | "metadata": { 330 | "colab": { 331 | "base_uri": "https://localhost:8080/", 332 | "height": 452 333 | }, 334 | "id": "5lUJm3mSjGuP", 335 | "outputId": "b29b0cf1-ecc1-4f00-db73-78e5e672136f" 336 | }, 337 | "execution_count": 61, 338 | "outputs": [ 339 | { 340 | "output_type": "error", 341 | "ename": "OutputParserException", 342 | "evalue": "ignored", 343 | "traceback": [ 344 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 345 | "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", 346 | "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/langchain/output_parsers/enum.py\u001b[0m in \u001b[0;36mparse\u001b[0;34m(self, response)\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 25\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0menum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresponse\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstrip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 26\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 347 | "\u001b[0;32m/usr/lib/python3.10/enum.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(cls, value, names, module, qualname, type, start)\u001b[0m\n\u001b[1;32m 384\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mnames\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# simple value lookup\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 385\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mcls\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__new__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcls\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 386\u001b[0m \u001b[0;31m# otherwise, functional API: we're creating a new Enum type\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 348 | "\u001b[0;32m/usr/lib/python3.10/enum.py\u001b[0m in \u001b[0;36m__new__\u001b[0;34m(cls, value)\u001b[0m\n\u001b[1;32m 709\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mexc\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 710\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mve_exc\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 711\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mexc\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 349 | "\u001b[0;31mValueError\u001b[0m: 'xyz' is not a valid Genders", 350 | "\nDuring handling of the above exception, another exception occurred:\n", 351 | "\u001b[0;31mOutputParserException\u001b[0m Traceback (most recent call last)", 352 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0moutput_parser\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'xyz'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 353 | "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/langchain/output_parsers/enum.py\u001b[0m in \u001b[0;36mparse\u001b[0;34m(self, response)\u001b[0m\n\u001b[1;32m 25\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0menum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresponse\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstrip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 26\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 27\u001b[0;31m raise OutputParserException(\n\u001b[0m\u001b[1;32m 28\u001b[0m \u001b[0;34mf\"Response '{response}' is not one of the \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 29\u001b[0m \u001b[0;34mf\"expected values: {self._valid_values}\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 354 | "\u001b[0;31mOutputParserException\u001b[0m: Response 'xyz' is not one of the expected values: ['male', 'female']" 355 | ] 356 | } 357 | ] 358 | }, 359 | { 360 | "cell_type": "markdown", 361 | "source": [ 362 | "# StructuredOutputParser" 363 | ], 364 | "metadata": { 365 | "id": "VgYMfxcYjQ4E" 366 | } 367 | }, 368 | { 369 | "cell_type": "code", 370 | "source": [ 371 | "from langchain.output_parsers import StructuredOutputParser, ResponseSchema" 372 | ], 373 | "metadata": { 374 | "id": "bspETWUdjSnv" 375 | }, 376 | "execution_count": 62, 377 | "outputs": [] 378 | }, 379 | { 380 | "cell_type": "code", 381 | "source": [ 382 | "response_schemas = [\n", 383 | " ResponseSchema(name=\"answer\", description=\"answer to the human's question\"),\n", 384 | " ResponseSchema(name=\"source\", description=\"source used to answer the human's question, should be a website.\")\n", 385 | "]\n", 386 | "output_parser = StructuredOutputParser.from_response_schemas(response_schemas)" 387 | ], 388 | "metadata": { 389 | "id": "pdozG2V4jcTO" 390 | }, 391 | "execution_count": 63, 392 | "outputs": [] 393 | }, 394 | { 395 | "cell_type": "code", 396 | "source": [ 397 | "format_instructions = output_parser.get_format_instructions()\n", 398 | "prompt = PromptTemplate(\n", 399 | " template=\"answer the users question as best as possible.\\n{format_instructions}\\n{question}\",\n", 400 | " input_variables=[\"question\"],\n", 401 | " partial_variables={\"format_instructions\": format_instructions}\n", 402 | ")" 403 | ], 404 | "metadata": { 405 | "id": "vMp2DJvJjpP9" 406 | }, 407 | "execution_count": 64, 408 | "outputs": [] 409 | }, 410 | { 411 | "cell_type": "code", 412 | "source": [ 413 | "print(format_instructions)" 414 | ], 415 | "metadata": { 416 | "colab": { 417 | "base_uri": "https://localhost:8080/" 418 | }, 419 | "id": "YHlRmZF3yF5C", 420 | "outputId": "72dcd806-df4e-40fb-f30d-5648749f1a5e" 421 | }, 422 | "execution_count": 65, 423 | "outputs": [ 424 | { 425 | "output_type": "stream", 426 | "name": "stdout", 427 | "text": [ 428 | "The output should be a markdown code snippet formatted in the following schema, including the leading and trailing \"```json\" and \"```\":\n", 429 | "\n", 430 | "```json\n", 431 | "{\n", 432 | "\t\"answer\": string // answer to the human's question\n", 433 | "\t\"source\": string // source used to answer the human's question, should be a website.\n", 434 | "}\n", 435 | "```\n" 436 | ] 437 | } 438 | ] 439 | }, 440 | { 441 | "cell_type": "code", 442 | "source": [ 443 | "_input = prompt.format_prompt(question=\"what are the ingredients of milk?\")\n", 444 | "output = llm(_input.to_string())\n", 445 | "\n", 446 | "print(output)" 447 | ], 448 | "metadata": { 449 | "colab": { 450 | "base_uri": "https://localhost:8080/" 451 | }, 452 | "id": "jdfxd85_jtil", 453 | "outputId": "a6bbc547-03c2-459b-bdab-8fbfced2e667" 454 | }, 455 | "execution_count": 66, 456 | "outputs": [ 457 | { 458 | "output_type": "stream", 459 | "name": "stdout", 460 | "text": [ 461 | "\n", 462 | "\n", 463 | "```json\n", 464 | "{\n", 465 | "\t\"answer\": \"Milk is typically made up of water, fat, proteins, lactose (sugar) and minerals.\",\n", 466 | "\t\"source\": \"https://www.dairynz.co.nz/nutrition/what-is-in-milk/\"\n", 467 | "}\n", 468 | "```\n" 469 | ] 470 | } 471 | ] 472 | }, 473 | { 474 | "cell_type": "code", 475 | "source": [ 476 | "json_data = output_parser.parse(output)" 477 | ], 478 | "metadata": { 479 | "id": "VHcXhoiUj1aG" 480 | }, 481 | "execution_count": 68, 482 | "outputs": [] 483 | }, 484 | { 485 | "cell_type": "code", 486 | "source": [ 487 | "print(json_data['answer'])" 488 | ], 489 | "metadata": { 490 | "colab": { 491 | "base_uri": "https://localhost:8080/" 492 | }, 493 | "id": "n4cb1BQ2ypHU", 494 | "outputId": "7c9c99cf-bb3c-47a3-b7ec-c155e047e2aa" 495 | }, 496 | "execution_count": 69, 497 | "outputs": [ 498 | { 499 | "output_type": "stream", 500 | "name": "stdout", 501 | "text": [ 502 | "Milk is typically made up of water, fat, proteins, lactose (sugar) and minerals.\n" 503 | ] 504 | } 505 | ] 506 | } 507 | ] 508 | } -------------------------------------------------------------------------------- /LangChain_PDF_Chatbot.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "colab_type": "text", 7 | "id": "view-in-github" 8 | }, 9 | "source": [ 10 | "\"Open" 11 | ] 12 | }, 13 | { 14 | "attachments": {}, 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "该Python notebook利用langchain的QA chain,结合Chroma来实现PDF文档Analysis-and-Comparison-between-Optimism-and-StarkNet.pdf的语义化搜索。\n", 19 | "\n", 20 | "该PDF文档共61页。通过本notebook,我们演示该字数规模的文件的语义化索引的OpenAI API开销。\n", 21 | "\n", 22 | "使用时,在本地创建`.env`,并如`.env.example`所示,设置有效的OpenAI API Key即可。" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": { 29 | "id": "Nifwi9FrKb3g" 30 | }, 31 | "outputs": [], 32 | "source": [ 33 | "%pip install openai > /dev/null\n", 34 | "%pip install chromadb > /dev/null\n", 35 | "%pip install langchain > /dev/null" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": { 42 | "id": "5xgbUBve0LuN" 43 | }, 44 | "outputs": [], 45 | "source": [ 46 | "from langchain.document_loaders import PyMuPDFLoader" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": { 53 | "id": "UpOBdhBrdaiU" 54 | }, 55 | "outputs": [], 56 | "source": [ 57 | "PDF_NAME='Analysis-and-Comparison-between-Optimism-and-StarkNet.pdf'\n", 58 | "def load_pdf():\n", 59 | " return PyMuPDFLoader(PDF_NAME).load()" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": null, 65 | "metadata": { 66 | "id": "_-1itVpTY8Gz" 67 | }, 68 | "outputs": [], 69 | "source": [ 70 | "docs = load_pdf()" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": null, 76 | "metadata": { 77 | "colab": { 78 | "base_uri": "https://localhost:8080/" 79 | }, 80 | "id": "33zAThYjY9gA", 81 | "outputId": "bc6d338b-82e3-461f-be11-04b4f44af8fb" 82 | }, 83 | "outputs": [], 84 | "source": [ 85 | "print (f'You have {len(docs)} document(s) in your data')\n", 86 | "print (f'There are {len(docs[0].page_content)} characters in the first page of your document')\n", 87 | "\n", 88 | "total = 0\n", 89 | "for doc in docs:\n", 90 | " total += len(doc.page_content)\n", 91 | "print (f'There are {total} characters in your document')" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": { 98 | "id": "ziV20FzmZpm1" 99 | }, 100 | "outputs": [], 101 | "source": [ 102 | "from langchain.text_splitter import RecursiveCharacterTextSplitter\n", 103 | "\n", 104 | "text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", 105 | "split_docs = text_splitter.split_documents(docs)" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": null, 111 | "metadata": { 112 | "colab": { 113 | "base_uri": "https://localhost:8080/" 114 | }, 115 | "id": "WlDqqN_6Z08T", 116 | "outputId": "c0f4d8f4-2840-41cf-c3fd-dd8da6b9a9fa" 117 | }, 118 | "outputs": [], 119 | "source": [ 120 | "print (f'Now you have {len(split_docs)} documents')" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": null, 126 | "metadata": { 127 | "id": "6IOlsXpbaIAw" 128 | }, 129 | "outputs": [], 130 | "source": [ 131 | "from langchain.embeddings.openai import OpenAIEmbeddings\n", 132 | "from langchain.vectorstores import Chroma\n", 133 | "import os\n", 134 | "\n", 135 | "OPENAI_API_KEY = os.environ[\"OPENAI_API_KEY\"]" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": null, 141 | "metadata": { 142 | "id": "bE-KtYTCgkLw" 143 | }, 144 | "outputs": [], 145 | "source": [ 146 | "embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": null, 152 | "metadata": { 153 | "id": "hGdpt9LygkLy" 154 | }, 155 | "outputs": [], 156 | "source": [ 157 | "persist_directory = 'starknet'\n", 158 | "collection_name = 'starknet_index'" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": null, 164 | "metadata": {}, 165 | "outputs": [], 166 | "source": [ 167 | "from langchain.callbacks import get_openai_callback" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": null, 173 | "metadata": { 174 | "id": "1mQJJ8HRaf0I", 175 | "outputId": "d9343337-2df2-49dd-84d3-ac354ee65b7c" 176 | }, 177 | "outputs": [], 178 | "source": [ 179 | "with get_openai_callback() as cb:\n", 180 | " vectorstore = Chroma.from_documents(split_docs, embeddings, collection_name=collection_name, persist_directory=persist_directory)\n", 181 | " vectorstore.persist()\n", 182 | " print(cb)\n" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": null, 188 | "metadata": { 189 | "id": "yBAx1_X-beQp" 190 | }, 191 | "outputs": [], 192 | "source": [ 193 | "from langchain.llms import OpenAI\n", 194 | "from langchain.chains.question_answering import load_qa_chain" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": null, 200 | "metadata": { 201 | "id": "V8hds-zybhfc" 202 | }, 203 | "outputs": [], 204 | "source": [ 205 | "llm = OpenAI(temperature=0, openai_api_key=OPENAI_API_KEY)\n", 206 | "\n", 207 | "chain = load_qa_chain(llm, chain_type=\"stuff\")\n", 208 | "\n", 209 | "# Load the vectorstore from disk\n", 210 | "vectordb = Chroma(collection_name=collection_name, persist_directory=persist_directory, embedding_function=embeddings)\n", 211 | "\n", 212 | "query = \"What is starknet?\"\n", 213 | "docs = vectorstore.similarity_search(query, 3, include_metadata=True)" 214 | ] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "execution_count": null, 219 | "metadata": {}, 220 | "outputs": [], 221 | "source": [ 222 | "print(chain.document_prompt)" 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": null, 228 | "metadata": { 229 | "id": "K3SlreQ2haC4", 230 | "outputId": "417ebd17-6bd3-431f-8699-3b9e81cd8911" 231 | }, 232 | "outputs": [], 233 | "source": [ 234 | "for doc in docs:\n", 235 | " print(doc.metadata)" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": null, 241 | "metadata": {}, 242 | "outputs": [], 243 | "source": [ 244 | "print(chain.prompt_length(docs, question='What is starknet?'))" 245 | ] 246 | }, 247 | { 248 | "cell_type": "code", 249 | "execution_count": null, 250 | "metadata": { 251 | "id": "zdF03LqphOQY", 252 | "outputId": "bc596972-7e39-479a-a921-5b17204f4b7f" 253 | }, 254 | "outputs": [], 255 | "source": [ 256 | "with get_openai_callback() as cb:\n", 257 | " print(chain.run(input_documents=docs, question=query))\n", 258 | " print(cb)" 259 | ] 260 | }, 261 | { 262 | "cell_type": "markdown", 263 | "metadata": {}, 264 | "source": [] 265 | } 266 | ], 267 | "metadata": { 268 | "colab": { 269 | "include_colab_link": true, 270 | "provenance": [] 271 | }, 272 | "kernelspec": { 273 | "display_name": "Python 3", 274 | "name": "python3" 275 | }, 276 | "language_info": { 277 | "codemirror_mode": { 278 | "name": "ipython", 279 | "version": 3 280 | }, 281 | "file_extension": ".py", 282 | "mimetype": "text/x-python", 283 | "name": "python", 284 | "nbconvert_exporter": "python", 285 | "pygments_lexer": "ipython3", 286 | "version": "3.9.16" 287 | } 288 | }, 289 | "nbformat": 4, 290 | "nbformat_minor": 0 291 | } 292 | -------------------------------------------------------------------------------- /LangChain_TextSplitter.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [], 7 | "gpuType": "T4", 8 | "authorship_tag": "ABX9TyPMVWfftf8OvHN6BWqQwq5N", 9 | "include_colab_link": true 10 | }, 11 | "kernelspec": { 12 | "name": "python3", 13 | "display_name": "Python 3" 14 | }, 15 | "language_info": { 16 | "name": "python" 17 | }, 18 | "accelerator": "GPU" 19 | }, 20 | "cells": [ 21 | { 22 | "cell_type": "markdown", 23 | "metadata": { 24 | "id": "view-in-github", 25 | "colab_type": "text" 26 | }, 27 | "source": [ 28 | "\"Open" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "source": [ 34 | "In this notebook, I will show you the main text splitters LangChain framework supports." 35 | ], 36 | "metadata": { 37 | "id": "amP-lCFgKUb-" 38 | } 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 92, 43 | "metadata": { 44 | "id": "TO7WJgpwKLA-" 45 | }, 46 | "outputs": [], 47 | "source": [ 48 | "!pip install -qU langchain" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "source": [ 54 | "long_text = '''\n", 55 | "WASHINGTON (Reuters) -Former U.S. President Donald Trump faces 37 criminal counts including charges of unauthorized retention of classified documents and conspiracy to obstruct justice after leaving the White House in 2021, according to federal court documents made public on Friday.\n", 56 | "\n", 57 | "The Justice Department made the charging documents public on a tumultuous day in which two of Trump's lawyers quit the case and a former aide face charges as well.\n", 58 | "\n", 59 | "The charges stem from Trump's treatment of sensitive government materials he took with him when he left the White House in January 2021.\n", 60 | "\n", 61 | "He is due to make a first court appearance in the case in a Miami court on Tuesday, a day before his 77th birthday.\n", 62 | "\n", 63 | "The indictment of a former U.S. president on federal charges is unprecedented in American history and emerges at a time when Trump is the front-runner for the Republican presidential nomination next year.\n", 64 | "\n", 65 | "Investigators seized roughly 13,000 documents from Trump's Mar-a-Lago estate in Palm Beach, Florida, nearly a year ago. One hundred were marked as classified, even though one of Trump's lawyers had previously said all records with classified markings had been returned to the government.\n", 66 | "'''" 67 | ], 68 | "metadata": { 69 | "id": "vwX4O06HUSye" 70 | }, 71 | "execution_count": 93, 72 | "outputs": [] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "source": [ 77 | "# CharacterTextSplitter" 78 | ], 79 | "metadata": { 80 | "id": "CAO_cdXlJwf-" 81 | } 82 | }, 83 | { 84 | "cell_type": "code", 85 | "source": [ 86 | "from langchain.text_splitter import CharacterTextSplitter" 87 | ], 88 | "metadata": { 89 | "id": "ISg0Zv8yKfVi" 90 | }, 91 | "execution_count": 94, 92 | "outputs": [] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "source": [ 97 | "text_splitter = CharacterTextSplitter( \n", 98 | " separator = \"\\n\\n\",\n", 99 | " chunk_size = 50,\n", 100 | " chunk_overlap = 10,\n", 101 | " length_function = len,\n", 102 | ")\n", 103 | "\n", 104 | "documents = text_splitter.create_documents([long_text])\n", 105 | "print(documents[0].page_content)\n", 106 | "print(documents[1].page_content)" 107 | ], 108 | "metadata": { 109 | "colab": { 110 | "base_uri": "https://localhost:8080/" 111 | }, 112 | "id": "VciFcr6sUr94", 113 | "outputId": "3482f157-47db-4f9c-df46-bbe0de490218" 114 | }, 115 | "execution_count": 95, 116 | "outputs": [ 117 | { 118 | "output_type": "stream", 119 | "name": "stderr", 120 | "text": [ 121 | "WARNING:langchain.text_splitter:Created a chunk of size 284, which is longer than the specified 50\n", 122 | "WARNING:langchain.text_splitter:Created a chunk of size 163, which is longer than the specified 50\n", 123 | "WARNING:langchain.text_splitter:Created a chunk of size 136, which is longer than the specified 50\n", 124 | "WARNING:langchain.text_splitter:Created a chunk of size 115, which is longer than the specified 50\n", 125 | "WARNING:langchain.text_splitter:Created a chunk of size 204, which is longer than the specified 50\n" 126 | ] 127 | }, 128 | { 129 | "output_type": "stream", 130 | "name": "stdout", 131 | "text": [ 132 | "WASHINGTON (Reuters) -Former U.S. President Donald Trump faces 37 criminal counts including charges of unauthorized retention of classified documents and conspiracy to obstruct justice after leaving the White House in 2021, according to federal court documents made public on Friday.\n", 133 | "The Justice Department made the charging documents public on a tumultuous day in which two of Trump's lawyers quit the case and a former aide face charges as well.\n" 134 | ] 135 | } 136 | ] 137 | }, 138 | { 139 | "cell_type": "markdown", 140 | "source": [ 141 | "# RecursiveCharacterTextSplitter" 142 | ], 143 | "metadata": { 144 | "id": "zw2Wh3u_J0EV" 145 | } 146 | }, 147 | { 148 | "cell_type": "code", 149 | "source": [ 150 | "from langchain.text_splitter import RecursiveCharacterTextSplitter" 151 | ], 152 | "metadata": { 153 | "id": "i6dJm78aC6C_" 154 | }, 155 | "execution_count": 96, 156 | "outputs": [] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "source": [ 161 | "text_splitter = RecursiveCharacterTextSplitter(\n", 162 | " chunk_size = 50,\n", 163 | " chunk_overlap = 10,\n", 164 | " length_function = len,\n", 165 | " add_start_index = True\n", 166 | ")\n", 167 | "\n", 168 | "documents = text_splitter.create_documents([long_text])\n", 169 | "print(documents[0])\n", 170 | "print(documents[1])\n", 171 | "print(len(documents[1].page_content))" 172 | ], 173 | "metadata": { 174 | "colab": { 175 | "base_uri": "https://localhost:8080/" 176 | }, 177 | "id": "Ep6YoDaXC9rM", 178 | "outputId": "427eaf1f-a5a4-4b98-b157-27a9099a5fa3" 179 | }, 180 | "execution_count": 97, 181 | "outputs": [ 182 | { 183 | "output_type": "stream", 184 | "name": "stdout", 185 | "text": [ 186 | "page_content='WASHINGTON (Reuters) -Former U.S. President' metadata={'start_index': 1}\n", 187 | "page_content='President Donald Trump faces 37 criminal counts' metadata={'start_index': 35}\n", 188 | "47\n" 189 | ] 190 | } 191 | ] 192 | }, 193 | { 194 | "cell_type": "markdown", 195 | "source": [ 196 | "# TokenTextSplitter" 197 | ], 198 | "metadata": { 199 | "id": "o5k-2CspJ4kP" 200 | } 201 | }, 202 | { 203 | "cell_type": "code", 204 | "source": [ 205 | "!pip install tiktoken" 206 | ], 207 | "metadata": { 208 | "colab": { 209 | "base_uri": "https://localhost:8080/" 210 | }, 211 | "id": "rGeK2Vv6FRyr", 212 | "outputId": "3b7e9887-1b5a-416a-f8e4-a4748072cb41" 213 | }, 214 | "execution_count": 98, 215 | "outputs": [ 216 | { 217 | "output_type": "stream", 218 | "name": "stdout", 219 | "text": [ 220 | "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", 221 | "Requirement already satisfied: tiktoken in /usr/local/lib/python3.10/dist-packages (0.4.0)\n", 222 | "Requirement already satisfied: regex>=2022.1.18 in /usr/local/lib/python3.10/dist-packages (from tiktoken) (2022.10.31)\n", 223 | "Requirement already satisfied: requests>=2.26.0 in /usr/local/lib/python3.10/dist-packages (from tiktoken) (2.27.1)\n", 224 | "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.26.0->tiktoken) (1.26.15)\n", 225 | "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.26.0->tiktoken) (2022.12.7)\n", 226 | "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests>=2.26.0->tiktoken) (2.0.12)\n", 227 | "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.26.0->tiktoken) (3.4)\n" 228 | ] 229 | } 230 | ] 231 | }, 232 | { 233 | "cell_type": "code", 234 | "source": [ 235 | "from langchain.text_splitter import TokenTextSplitter" 236 | ], 237 | "metadata": { 238 | "id": "q9lZ0lfLFUZG" 239 | }, 240 | "execution_count": 99, 241 | "outputs": [] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "source": [ 246 | "text_splitter = TokenTextSplitter(chunk_size=50, chunk_overlap=0)" 247 | ], 248 | "metadata": { 249 | "id": "ARvJa2NZFNhf" 250 | }, 251 | "execution_count": 100, 252 | "outputs": [] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "source": [ 257 | "documents = text_splitter.create_documents([long_text])\n", 258 | "print(documents[0])" 259 | ], 260 | "metadata": { 261 | "colab": { 262 | "base_uri": "https://localhost:8080/" 263 | }, 264 | "id": "0iucjqxDFYSC", 265 | "outputId": "83845a19-8811-4865-8340-a9d7d512afb2" 266 | }, 267 | "execution_count": 101, 268 | "outputs": [ 269 | { 270 | "output_type": "stream", 271 | "name": "stdout", 272 | "text": [ 273 | "page_content='\\nWASHINGTON (Reuters) -Former U.S. President Donald Trump faces 37 criminal counts including charges of unauthorized retention of classified documents and conspiracy to obstruct justice after leaving the White House in 2021, according to federal court documents made public on Friday.\\n' metadata={}\n" 274 | ] 275 | } 276 | ] 277 | }, 278 | { 279 | "cell_type": "code", 280 | "source": [ 281 | "print(documents[1])" 282 | ], 283 | "metadata": { 284 | "colab": { 285 | "base_uri": "https://localhost:8080/" 286 | }, 287 | "id": "KvIK0pgdYO00", 288 | "outputId": "a73fcf64-c70d-44e3-b670-d6e5a61baf5a" 289 | }, 290 | "execution_count": 102, 291 | "outputs": [ 292 | { 293 | "output_type": "stream", 294 | "name": "stdout", 295 | "text": [ 296 | "page_content=\"\\nThe Justice Department made the charging documents public on a tumultuous day in which two of Trump's lawyers quit the case and a former aide face charges as well.\\n\\nThe charges stem from Trump's treatment of sensitive government materials he took with him when\" metadata={}\n" 297 | ] 298 | } 299 | ] 300 | }, 301 | { 302 | "cell_type": "code", 303 | "source": [ 304 | "import tiktoken\n", 305 | "enc = tiktoken.get_encoding(\"gpt2\")\n", 306 | "print(len(enc.encode(documents[0].page_content)))\n", 307 | "print(len(enc.encode(documents[1].page_content)))\n", 308 | "print(len(enc.encode(documents[2].page_content)))" 309 | ], 310 | "metadata": { 311 | "colab": { 312 | "base_uri": "https://localhost:8080/" 313 | }, 314 | "id": "413dzpLfFymc", 315 | "outputId": "bff7c8bc-e3db-4a7b-8a1e-ce7058fd6e1e" 316 | }, 317 | "execution_count": 103, 318 | "outputs": [ 319 | { 320 | "output_type": "stream", 321 | "name": "stdout", 322 | "text": [ 323 | "50\n", 324 | "50\n", 325 | "50\n" 326 | ] 327 | } 328 | ] 329 | }, 330 | { 331 | "cell_type": "code", 332 | "source": [ 333 | "print(enc.encode(documents[0].page_content))" 334 | ], 335 | "metadata": { 336 | "colab": { 337 | "base_uri": "https://localhost:8080/" 338 | }, 339 | "id": "ZRGx3pdWZJuh", 340 | "outputId": "db790f9c-430b-472a-a95c-6c14943787ae" 341 | }, 342 | "execution_count": 104, 343 | "outputs": [ 344 | { 345 | "output_type": "stream", 346 | "name": "stdout", 347 | "text": [ 348 | "[198, 21793, 357, 12637, 8, 532, 14282, 471, 13, 50, 13, 1992, 3759, 1301, 6698, 5214, 4301, 9853, 1390, 4530, 286, 22959, 21545, 286, 10090, 4963, 290, 10086, 284, 26520, 5316, 706, 4305, 262, 2635, 2097, 287, 33448, 11, 1864, 284, 2717, 2184, 4963, 925, 1171, 319, 3217, 13, 198]\n" 349 | ] 350 | } 351 | ] 352 | } 353 | ] 354 | } -------------------------------------------------------------------------------- /Langchain_Memory_Persistent_Store.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [], 7 | "authorship_tag": "ABX9TyMSoUMKic3BwTFLpQ1YCwEm", 8 | "include_colab_link": true 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | }, 14 | "language_info": { 15 | "name": "python" 16 | } 17 | }, 18 | "cells": [ 19 | { 20 | "cell_type": "markdown", 21 | "metadata": { 22 | "id": "view-in-github", 23 | "colab_type": "text" 24 | }, 25 | "source": [ 26 | "\"Open" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "source": [ 32 | "# 简介\n", 33 | "\n", 34 | "该Python Notebook演示LangChain框架所提供的Memory持久化能力。LangChain通过langchain.memory.chat_message_histories包中提供的一系列组件支持多种形式的历史消息存储,包括文件,数据库等。\n", 35 | "\n", 36 | "本示例演示文件存储形式的持久化。" 37 | ], 38 | "metadata": { 39 | "id": "q0ApeCaVrZhV" 40 | } 41 | }, 42 | { 43 | "cell_type": "code", 44 | "source": [ 45 | "!pip install langchain openai --quiet --upgrade" 46 | ], 47 | "metadata": { 48 | "id": "RdSvP1Oi7LCc" 49 | }, 50 | "execution_count": 23, 51 | "outputs": [] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 24, 56 | "metadata": { 57 | "id": "CcSMVHBz6zrj" 58 | }, 59 | "outputs": [], 60 | "source": [ 61 | "from langchain.llms import OpenAI\n", 62 | "from langchain.chains import ConversationChain\n", 63 | "from langchain.callbacks import get_openai_callback\n", 64 | "from langchain.memory import ConversationBufferMemory" 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "source": [ 70 | "1. 准备**OpenAI API Key**,**track_tokens_usage**辅助函数(调用`chain.run`函数,并统计token开销),以及LLM实例" 71 | ], 72 | "metadata": { 73 | "id": "wVqmb_UYrif9" 74 | } 75 | }, 76 | { 77 | "cell_type": "code", 78 | "source": [ 79 | "import os\n", 80 | "\n", 81 | "os.environ['OPENAI_API_KEY'] = 'your openai api key'" 82 | ], 83 | "metadata": { 84 | "id": "E2xdWZRTiKWV" 85 | }, 86 | "execution_count": 26, 87 | "outputs": [] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "source": [ 92 | "def track_tokens_usage(chain, query):\n", 93 | " with get_openai_callback() as cb:\n", 94 | " result = chain.run(query)\n", 95 | " print(f'Total tokens: {cb.total_tokens}')\n", 96 | "\n", 97 | " return result\n" 98 | ], 99 | "metadata": { 100 | "id": "yozbiUy2_RCt" 101 | }, 102 | "execution_count": 27, 103 | "outputs": [] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "source": [ 108 | "llm = OpenAI(\n", 109 | " temperature=0,\n", 110 | "\topenai_api_key=os.environ[\"OPENAI_API_KEY\"],\n", 111 | "\tmodel_name=\"text-davinci-003\"\n", 112 | ")\n" 113 | ], 114 | "metadata": { 115 | "id": "akq_I-dn-hpd" 116 | }, 117 | "execution_count": 28, 118 | "outputs": [] 119 | }, 120 | { 121 | "cell_type": "markdown", 122 | "source": [ 123 | "2. 使用Memory组件**ConversationBufferMemory**的默认配置" 124 | ], 125 | "metadata": { 126 | "id": "EU9QeEcZr-x1" 127 | } 128 | }, 129 | { 130 | "cell_type": "code", 131 | "source": [ 132 | "conversation = ConversationChain(llm=llm, memory = ConversationBufferMemory())" 133 | ], 134 | "metadata": { 135 | "id": "czgOmwfsr9Y7" 136 | }, 137 | "execution_count": 29, 138 | "outputs": [] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "source": [ 143 | "print(conversation.prompt.template)" 144 | ], 145 | "metadata": { 146 | "id": "pEsc6-AF-riz", 147 | "colab": { 148 | "base_uri": "https://localhost:8080/" 149 | }, 150 | "outputId": "0de9b559-782e-49ea-bf4f-b1e437eb7f1e" 151 | }, 152 | "execution_count": 30, 153 | "outputs": [ 154 | { 155 | "output_type": "stream", 156 | "name": "stdout", 157 | "text": [ 158 | "The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.\n", 159 | "\n", 160 | "Current conversation:\n", 161 | "{history}\n", 162 | "Human: {input}\n", 163 | "AI:\n" 164 | ] 165 | } 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "source": [ 171 | "track_tokens_usage(conversation, \"What is Langchain?\")\n" 172 | ], 173 | "metadata": { 174 | "id": "rM_qZkMT_nw7", 175 | "colab": { 176 | "base_uri": "https://localhost:8080/", 177 | "height": 73 178 | }, 179 | "outputId": "cfc70405-94b9-4fb1-9229-0849811e01e1" 180 | }, 181 | "execution_count": 31, 182 | "outputs": [ 183 | { 184 | "output_type": "stream", 185 | "name": "stdout", 186 | "text": [ 187 | "Total tokens: 110\n" 188 | ] 189 | }, 190 | { 191 | "output_type": "execute_result", 192 | "data": { 193 | "text/plain": [ 194 | "' Langchain is a blockchain-based language learning platform that allows users to learn new languages in a secure and decentralized environment. It uses a combination of blockchain technology and artificial intelligence to provide users with an immersive language learning experience.'" 195 | ], 196 | "application/vnd.google.colaboratory.intrinsic+json": { 197 | "type": "string" 198 | } 199 | }, 200 | "metadata": {}, 201 | "execution_count": 31 202 | } 203 | ] 204 | }, 205 | { 206 | "cell_type": "markdown", 207 | "source": [ 208 | "3. 引入消息历史持久化组件" 209 | ], 210 | "metadata": { 211 | "id": "OnAq2L8rsLLy" 212 | } 213 | }, 214 | { 215 | "cell_type": "code", 216 | "source": [ 217 | "from langchain.memory.chat_message_histories import FileChatMessageHistory" 218 | ], 219 | "metadata": { 220 | "id": "TymjTQMWiXAx" 221 | }, 222 | "execution_count": 32, 223 | "outputs": [] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "source": [ 228 | "message_history = FileChatMessageHistory(file_path = 'conversation_20230620.txt')\n", 229 | "\n", 230 | "memory = ConversationBufferMemory(chat_memory=message_history)" 231 | ], 232 | "metadata": { 233 | "id": "EL-gMHvdjLHD" 234 | }, 235 | "execution_count": 33, 236 | "outputs": [] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "source": [ 241 | "conversation = ConversationChain(llm=llm, memory = memory)" 242 | ], 243 | "metadata": { 244 | "id": "9K799NAZjVUT" 245 | }, 246 | "execution_count": 34, 247 | "outputs": [] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "source": [ 252 | "track_tokens_usage(conversation, \"How does ChatGPT enable coherent conversation?\")" 253 | ], 254 | "metadata": { 255 | "colab": { 256 | "base_uri": "https://localhost:8080/", 257 | "height": 73 258 | }, 259 | "id": "D1kDz1EjjYcr", 260 | "outputId": "bdd9c106-7999-48f8-e4f1-815c13925bc7" 261 | }, 262 | "execution_count": 35, 263 | "outputs": [ 264 | { 265 | "output_type": "stream", 266 | "name": "stdout", 267 | "text": [ 268 | "Total tokens: 133\n" 269 | ] 270 | }, 271 | { 272 | "output_type": "execute_result", 273 | "data": { 274 | "text/plain": [ 275 | "' ChatGPT is a natural language processing (NLP) model that enables coherent conversation by understanding the context of a conversation and providing relevant responses. It uses a transformer-based architecture to generate responses that are based on the context of the conversation. It also uses a large corpus of data to generate more accurate responses.'" 276 | ], 277 | "application/vnd.google.colaboratory.intrinsic+json": { 278 | "type": "string" 279 | } 280 | }, 281 | "metadata": {}, 282 | "execution_count": 35 283 | } 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "source": [ 289 | "!cat conversation_20230620.txt" 290 | ], 291 | "metadata": { 292 | "colab": { 293 | "base_uri": "https://localhost:8080/" 294 | }, 295 | "id": "U8rrP-T3kOS0", 296 | "outputId": "c03707e3-0a90-4e2b-bd47-571372d1f148" 297 | }, 298 | "execution_count": 36, 299 | "outputs": [ 300 | { 301 | "output_type": "stream", 302 | "name": "stdout", 303 | "text": [ 304 | "[{\"type\": \"human\", \"data\": {\"content\": \"How does ChatGPT enable coherent conversation?\", \"additional_kwargs\": {}, \"example\": false}}, {\"type\": \"ai\", \"data\": {\"content\": \" ChatGPT is a natural language processing (NLP) model that enables coherent conversation by understanding the context of a conversation and providing relevant responses. It uses a transformer-based architecture to generate responses that are based on the context of the conversation. It also uses a large corpus of data to generate more accurate responses.\", \"additional_kwargs\": {}, \"example\": false}}]" 305 | ] 306 | } 307 | ] 308 | }, 309 | { 310 | "cell_type": "code", 311 | "source": [ 312 | "track_tokens_usage(conversation, \"Bye now!\")" 313 | ], 314 | "metadata": { 315 | "colab": { 316 | "base_uri": "https://localhost:8080/", 317 | "height": 54 318 | }, 319 | "id": "oPBsl3jUyNhP", 320 | "outputId": "a2e54fdc-c9ca-4c2f-e67f-9c159552bf36" 321 | }, 322 | "execution_count": 37, 323 | "outputs": [ 324 | { 325 | "output_type": "stream", 326 | "name": "stdout", 327 | "text": [ 328 | "Total tokens: 156\n" 329 | ] 330 | }, 331 | { 332 | "output_type": "execute_result", 333 | "data": { 334 | "text/plain": [ 335 | "' Bye! It was nice talking to you. Have a great day!'" 336 | ], 337 | "application/vnd.google.colaboratory.intrinsic+json": { 338 | "type": "string" 339 | } 340 | }, 341 | "metadata": {}, 342 | "execution_count": 37 343 | } 344 | ] 345 | }, 346 | { 347 | "cell_type": "code", 348 | "source": [ 349 | "!cat conversation_20230620.txt" 350 | ], 351 | "metadata": { 352 | "colab": { 353 | "base_uri": "https://localhost:8080/" 354 | }, 355 | "id": "mjGRBf5DyO7I", 356 | "outputId": "557083c1-4c41-4515-b685-90e0b269d153" 357 | }, 358 | "execution_count": 38, 359 | "outputs": [ 360 | { 361 | "output_type": "stream", 362 | "name": "stdout", 363 | "text": [ 364 | "[{\"type\": \"human\", \"data\": {\"content\": \"How does ChatGPT enable coherent conversation?\", \"additional_kwargs\": {}, \"example\": false}}, {\"type\": \"ai\", \"data\": {\"content\": \" ChatGPT is a natural language processing (NLP) model that enables coherent conversation by understanding the context of a conversation and providing relevant responses. It uses a transformer-based architecture to generate responses that are based on the context of the conversation. It also uses a large corpus of data to generate more accurate responses.\", \"additional_kwargs\": {}, \"example\": false}}, {\"type\": \"human\", \"data\": {\"content\": \"Bye now!\", \"additional_kwargs\": {}, \"example\": false}}, {\"type\": \"ai\", \"data\": {\"content\": \" Bye! It was nice talking to you. Have a great day!\", \"additional_kwargs\": {}, \"example\": false}}]" 365 | ] 366 | } 367 | ] 368 | } 369 | ] 370 | } -------------------------------------------------------------------------------- /OpenAI_Chat_Completions_16k.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [], 7 | "authorship_tag": "ABX9TyNo7/eDIuUXulqI4aKGLXrI", 8 | "include_colab_link": true 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | }, 14 | "language_info": { 15 | "name": "python" 16 | } 17 | }, 18 | "cells": [ 19 | { 20 | "cell_type": "markdown", 21 | "metadata": { 22 | "id": "view-in-github", 23 | "colab_type": "text" 24 | }, 25 | "source": [ 26 | "\"Open" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "source": [ 32 | "# OpenAI new chat model gpt-3.5-turbo-16k\n", 33 | "\n", 34 | "gpt-3.5-turbo-16k offers 4 times the context length of gpt-3.5-turbo at twice the price: $0.003/1K input tokens and $0.004/1K output tokens.\n", 35 | "\n", 36 | "16k context means the model can now support ~20 pages of text in a single request." 37 | ], 38 | "metadata": { 39 | "id": "BW1vmhBS4fKN" 40 | } 41 | }, 42 | { 43 | "cell_type": "code", 44 | "source": [ 45 | "!pip install tiktoken langchain openai --quiet --upgrade" 46 | ], 47 | "metadata": { 48 | "id": "EbRu-u0F4SaZ" 49 | }, 50 | "execution_count": 31, 51 | "outputs": [] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "source": [ 56 | "import os\n", 57 | "import openai\n", 58 | "\n", 59 | "openai.api_key = \"Your OpenAI API Key\"" 60 | ], 61 | "metadata": { 62 | "id": "QuGN31Hi4Ghh" 63 | }, 64 | "execution_count": 32, 65 | "outputs": [] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 33, 70 | "metadata": { 71 | "colab": { 72 | "base_uri": "https://localhost:8080/" 73 | }, 74 | "id": "_S3w-_dYLLyh", 75 | "outputId": "59bef309-1f5f-440c-ff19-5bbdee442aee" 76 | }, 77 | "outputs": [ 78 | { 79 | "output_type": "stream", 80 | "name": "stdout", 81 | "text": [ 82 | "--2023-06-15 21:20:37-- https://uniswap.org/whitepaper-v3.pdf\n", 83 | "Resolving uniswap.org (uniswap.org)... 104.18.23.54, 104.18.22.54, 2606:4700::6812:1736, ...\n", 84 | "Connecting to uniswap.org (uniswap.org)|104.18.23.54|:443... connected.\n", 85 | "HTTP request sent, awaiting response... 200 OK\n", 86 | "Length: 1500865 (1.4M) [application/pdf]\n", 87 | "Saving to: ‘whitepaper-v3.pdf.1’\n", 88 | "\n", 89 | "\rwhitepaper-v3.pdf.1 0%[ ] 0 --.-KB/s \rwhitepaper-v3.pdf.1 100%[===================>] 1.43M --.-KB/s in 0.02s \n", 90 | "\n", 91 | "2023-06-15 21:20:37 (65.6 MB/s) - ‘whitepaper-v3.pdf.1’ saved [1500865/1500865]\n", 92 | "\n" 93 | ] 94 | } 95 | ], 96 | "source": [ 97 | "!wget https://uniswap.org/whitepaper-v3.pdf" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "source": [ 103 | "!ls -alt whitepaper-v3.pdf" 104 | ], 105 | "metadata": { 106 | "colab": { 107 | "base_uri": "https://localhost:8080/" 108 | }, 109 | "id": "9FhyJVTwLVsu", 110 | "outputId": "48e7c8d6-bbfd-4d09-e3fd-60f0a4dd88de" 111 | }, 112 | "execution_count": 34, 113 | "outputs": [ 114 | { 115 | "output_type": "stream", 116 | "name": "stdout", 117 | "text": [ 118 | "-rw-r--r-- 1 root root 1500865 Jun 15 20:28 whitepaper-v3.pdf\n" 119 | ] 120 | } 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "source": [ 126 | "import tiktoken\n", 127 | "\n", 128 | "encoding = tiktoken.encoding_for_model('gpt-3.5-turbo-16k')\n", 129 | "encoding" 130 | ], 131 | "metadata": { 132 | "colab": { 133 | "base_uri": "https://localhost:8080/" 134 | }, 135 | "id": "SWdGZYFJs9rL", 136 | "outputId": "9b50cf76-7bfe-4005-9272-3d9856e37137" 137 | }, 138 | "execution_count": 35, 139 | "outputs": [ 140 | { 141 | "output_type": "execute_result", 142 | "data": { 143 | "text/plain": [ 144 | "" 145 | ] 146 | }, 147 | "metadata": {}, 148 | "execution_count": 35 149 | } 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "source": [ 155 | "encoding.encode(\"tiktoken is great!\")" 156 | ], 157 | "metadata": { 158 | "colab": { 159 | "base_uri": "https://localhost:8080/" 160 | }, 161 | "id": "_4R-IPTdtTep", 162 | "outputId": "a37a348d-b911-4797-dec4-032002ed8043" 163 | }, 164 | "execution_count": 36, 165 | "outputs": [ 166 | { 167 | "output_type": "execute_result", 168 | "data": { 169 | "text/plain": [ 170 | "[83, 1609, 5963, 374, 2294, 0]" 171 | ] 172 | }, 173 | "metadata": {}, 174 | "execution_count": 36 175 | } 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "source": [ 181 | "!pip install PyPDF2 pymupdf --quiet" 182 | ], 183 | "metadata": { 184 | "id": "2F2s9bLpuXXK" 185 | }, 186 | "execution_count": null, 187 | "outputs": [] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "source": [ 192 | "from langchain.document_loaders import PyMuPDFLoader\n", 193 | "\n", 194 | "pages = PyMuPDFLoader('whitepaper-v3.pdf').load()\n", 195 | "\n", 196 | "all_content = ''\n", 197 | "for page in pages:\n", 198 | " all_content += page.page_content + \"\\n\"" 199 | ], 200 | "metadata": { 201 | "id": "d7QDq7P1ubQw" 202 | }, 203 | "execution_count": 37, 204 | "outputs": [] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "source": [ 209 | "print(len(all_content))" 210 | ], 211 | "metadata": { 212 | "colab": { 213 | "base_uri": "https://localhost:8080/" 214 | }, 215 | "id": "5SeDEippv4wM", 216 | "outputId": "ccb48270-6280-40ed-8a86-8e876c42a036" 217 | }, 218 | "execution_count": 38, 219 | "outputs": [ 220 | { 221 | "output_type": "stream", 222 | "name": "stdout", 223 | "text": [ 224 | "41711\n" 225 | ] 226 | } 227 | ] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "source": [ 232 | "tokens = encoding.encode(all_content)\n", 233 | "len(tokens)" 234 | ], 235 | "metadata": { 236 | "colab": { 237 | "base_uri": "https://localhost:8080/" 238 | }, 239 | "id": "jnXy3800wU4d", 240 | "outputId": "f3601eb2-a3ee-47f9-e9bb-b8306e634ae3" 241 | }, 242 | "execution_count": 39, 243 | "outputs": [ 244 | { 245 | "output_type": "execute_result", 246 | "data": { 247 | "text/plain": [ 248 | "12377" 249 | ] 250 | }, 251 | "metadata": {}, 252 | "execution_count": 39 253 | } 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "source": [ 259 | "completion = openai.ChatCompletion.create(\n", 260 | " model=\"gpt-3.5-turbo-16k\",\n", 261 | " messages = [\n", 262 | " {\"role\": \"system\", \"content\" : \"You are a chatbot which can answer questions based on the paper user supplies.\"},\n", 263 | " {\"role\": \"user\", \"content\" : \"I have the following whitepaper of uniswap v3\"},\n", 264 | " {\"role\": \"user\", \"content\" : all_content},\n", 265 | " {\"role\": \"user\", \"content\" : \"What is liquidity oracle?\"}\n", 266 | " ]\n", 267 | ")\n", 268 | "print(completion)" 269 | ], 270 | "metadata": { 271 | "colab": { 272 | "base_uri": "https://localhost:8080/" 273 | }, 274 | "id": "Kk2kQK2Gwag5", 275 | "outputId": "dcc5d468-e2b7-4623-ef91-02c9ce4bc2ce" 276 | }, 277 | "execution_count": 44, 278 | "outputs": [ 279 | { 280 | "output_type": "stream", 281 | "name": "stdout", 282 | "text": [ 283 | "{\n", 284 | " \"id\": \"chatcmpl-7Rope0Wps0WHHCnQDAw1ZIZzQbWtM\",\n", 285 | " \"object\": \"chat.completion\",\n", 286 | " \"created\": 1686864610,\n", 287 | " \"model\": \"gpt-3.5-turbo-16k-0613\",\n", 288 | " \"choices\": [\n", 289 | " {\n", 290 | " \"index\": 0,\n", 291 | " \"message\": {\n", 292 | " \"role\": \"assistant\",\n", 293 | " \"content\": \"A liquidity oracle is a mechanism that provides information about the available liquidity in a particular market or trading pair. In the context of decentralized exchanges (DEXs) like Uniswap, a liquidity oracle provides data on the depth and volume of liquidity available for different token pairs.\\n\\nThe liquidity oracle collects and aggregates data from various liquidity sources, such as liquidity providers and market makers, to calculate the overall liquidity in a specific trading pair. It helps traders and users of DEXs to make informed decisions by providing insights into the liquidity landscape.\\n\\nThe data provided by a liquidity oracle can be used in various ways, including calculating slippage for trades, determining price impact, and assessing the overall health and stability of a market. Liquidity oracles play a crucial role in ensuring that users have access to reliable liquidity information, which is essential for the efficient functioning of decentralized exchanges.\"\n", 294 | " },\n", 295 | " \"finish_reason\": \"stop\"\n", 296 | " }\n", 297 | " ],\n", 298 | " \"usage\": {\n", 299 | " \"prompt_tokens\": 12429,\n", 300 | " \"completion_tokens\": 173,\n", 301 | " \"total_tokens\": 12602\n", 302 | " }\n", 303 | "}\n" 304 | ] 305 | } 306 | ] 307 | }, 308 | { 309 | "cell_type": "code", 310 | "source": [ 311 | "completion['choices'][0]['message']['content']" 312 | ], 313 | "metadata": { 314 | "colab": { 315 | "base_uri": "https://localhost:8080/", 316 | "height": 109 317 | }, 318 | "id": "yHNo3vqpxwqw", 319 | "outputId": "1ec59f1b-5f37-4ce6-9461-0beb58350b0f" 320 | }, 321 | "execution_count": 42, 322 | "outputs": [ 323 | { 324 | "output_type": "execute_result", 325 | "data": { 326 | "text/plain": [ 327 | "'A liquidity oracle is a mechanism or system that provides information about the liquidity available in a specific market or trading pair. It helps users and smart contracts determine the depth and availability of liquidity for trading or other purposes.\\n\\nIn the context of Uniswap v3, the liquidity oracle is a time-weighted average liquidity oracle. It provides information about the average liquidity for a given pair of tokens over a specific period of time. It allows users to query the recent liquidity accumulator values, eliminating the need to checkpoint the accumulator value at the exact beginning and end of the period for which a time-weighted average liquidity is being measured.\\n\\nThe liquidity oracle provides valuable information for various purposes, such as optimizing trading strategies, determining slippage, or making decisions related to liquidity provisioning and mining programs.'" 328 | ], 329 | "application/vnd.google.colaboratory.intrinsic+json": { 330 | "type": "string" 331 | } 332 | }, 333 | "metadata": {}, 334 | "execution_count": 42 335 | } 336 | ] 337 | } 338 | ] 339 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # LangChain Tutorials 2 | -------------------------------------------------------------------------------- /StreamChat/app.py: -------------------------------------------------------------------------------- 1 | from dotenv import load_dotenv 2 | from langchain.chat_models import ChatOpenAI 3 | from langchain.callbacks.streaming_aiter import AsyncIteratorCallbackHandler 4 | from langchain.schema import HumanMessage 5 | import asyncio 6 | import sys 7 | 8 | load_dotenv() 9 | handler = AsyncIteratorCallbackHandler() 10 | llm = ChatOpenAI(streaming=True, callbacks=[handler], temperature=0) 11 | 12 | async def consumer(): 13 | iterator = handler.aiter() 14 | async for item in iterator: 15 | sys.stdout.write(item) 16 | sys.stdout.flush() 17 | 18 | if __name__ == '__main__': 19 | message = "What is AI?" 20 | loop = asyncio.get_event_loop() 21 | loop.create_task(llm.agenerate(messages=[[HumanMessage(content=message)]])) 22 | loop.create_task(consumer()) 23 | loop.run_forever() 24 | loop.close() -------------------------------------------------------------------------------- /StreamChat/requirements.txt: -------------------------------------------------------------------------------- 1 | aiohttp==3.8.4 2 | aiosignal==1.3.1 3 | altair==5.0.1 4 | anyio==3.7.0 5 | async-timeout==4.0.2 6 | attrs==23.1.0 7 | blinker==1.6.2 8 | cachetools==5.3.1 9 | certifi==2023.5.7 10 | charset-normalizer==3.1.0 11 | click==8.1.3 12 | dataclasses-json==0.5.8 13 | decorator==5.1.1 14 | exceptiongroup==1.1.1 15 | fastapi==0.98.0 16 | frozenlist==1.3.3 17 | gitdb==4.0.10 18 | GitPython==3.1.31 19 | greenlet==2.0.2 20 | h11==0.14.0 21 | idna==3.4 22 | importlib-metadata==6.7.0 23 | Jinja2==3.1.2 24 | jsonschema==4.17.3 25 | langchain==0.0.209 26 | langchainplus-sdk==0.0.16 27 | markdown-it-py==3.0.0 28 | MarkupSafe==2.1.3 29 | marshmallow==3.19.0 30 | marshmallow-enum==1.5.1 31 | mdurl==0.1.2 32 | multidict==6.0.4 33 | mypy-extensions==1.0.0 34 | numexpr==2.8.4 35 | numpy==1.25.0 36 | openai==0.27.8 37 | openapi-schema-pydantic==1.2.4 38 | packaging==23.1 39 | pandas==2.0.2 40 | Pillow==9.5.0 41 | protobuf==4.23.3 42 | pyarrow==12.0.1 43 | pydantic==1.10.9 44 | pydeck==0.8.1b0 45 | Pygments==2.15.1 46 | Pympler==1.0.1 47 | pyrsistent==0.19.3 48 | python-dateutil==2.8.2 49 | python-dotenv==1.0.0 50 | pytz==2023.3 51 | pytz-deprecation-shim==0.1.0.post0 52 | PyYAML==6.0 53 | requests==2.31.0 54 | rich==13.4.2 55 | six==1.16.0 56 | smmap==5.0.0 57 | sniffio==1.3.0 58 | SQLAlchemy==2.0.16 59 | starlette==0.27.0 60 | tenacity==8.2.2 61 | toml==0.10.2 62 | toolz==0.12.0 63 | tornado==6.3.2 64 | tqdm==4.65.0 65 | typing-inspect==0.9.0 66 | typing_extensions==4.6.3 67 | tzdata==2023.3 68 | tzlocal==4.3.1 69 | urllib3==2.0.3 70 | uvicorn==0.22.0 71 | validators==0.20.0 72 | yarl==1.9.2 73 | zipp==3.15.0 74 | -------------------------------------------------------------------------------- /StreamChat/server.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import uvicorn 3 | 4 | from typing import AsyncIterable, Awaitable 5 | from dotenv import load_dotenv 6 | from fastapi import FastAPI 7 | from fastapi.responses import FileResponse, StreamingResponse 8 | from langchain.callbacks import AsyncIteratorCallbackHandler 9 | from langchain.chat_models import ChatOpenAI 10 | from langchain.schema import HumanMessage 11 | 12 | load_dotenv() 13 | async def wait_done(fn: Awaitable, event: asyncio.Event): 14 | try: 15 | await fn 16 | except Exception as e: 17 | print(e) 18 | event.set() 19 | finally: 20 | event.set() 21 | 22 | async def call_openai(question: str) -> AsyncIterable[str]: 23 | callback = AsyncIteratorCallbackHandler() 24 | model = ChatOpenAI(streaming=True, verbose=True, callbacks=[callback]) 25 | 26 | coroutine = wait_done(model.agenerate(messages=[[HumanMessage(content=question)]]), callback.done) 27 | task = asyncio.create_task(coroutine) 28 | 29 | async for token in callback.aiter(): 30 | yield f"{token}" 31 | 32 | await task 33 | 34 | 35 | app = FastAPI() 36 | 37 | @app.post("/ask") 38 | def ask(body: dict): 39 | return StreamingResponse(call_openai(body['question']), media_type="text/event-stream") 40 | 41 | @app.get("/") 42 | async def homepage(): 43 | return FileResponse('statics/index.html') 44 | 45 | if __name__ == "__main__": 46 | uvicorn.run(host="0.0.0.0", port=8888, app=app) -------------------------------------------------------------------------------- /StreamChat/statics/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Streaming Chat 5 | 6 | 7 | 8 | 9 | 10 |
11 |

Streaming Chat

12 |
13 |
14 | 17 | 20 |
21 |
22 |
23 |

{{ answer }}

24 |
25 |
26 | 58 | 59 | 60 | -------------------------------------------------------------------------------- /coffee-roll.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sugarforever/LangChain-Tutorials/32f0abc3cebca46e583bc70fe2737ea367151e11/coffee-roll.jpg -------------------------------------------------------------------------------- /expression-language/LangChain_Expression_03_Router.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [], 7 | "authorship_tag": "ABX9TyM+98apepmO9YHdeIxM5wlu", 8 | "include_colab_link": true 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | }, 14 | "language_info": { 15 | "name": "python" 16 | } 17 | }, 18 | "cells": [ 19 | { 20 | "cell_type": "markdown", 21 | "metadata": { 22 | "id": "view-in-github", 23 | "colab_type": "text" 24 | }, 25 | "source": [ 26 | "\"Open" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "source": [ 32 | "# 03 路由(Router)\n", 33 | "\n", 34 | "`LEL` ( `LangChain Expression Language` ) 实现了路由机制,支持在应用开发中根据业务需要将请求转发给指定的链。\n", 35 | "\n", 36 | "核心类:`RouterRunnable`" 37 | ], 38 | "metadata": { 39 | "id": "OHpaChk4b5UN" 40 | } 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "source": [ 45 | "## 准备环境\n", 46 | "\n", 47 | "安装必要的 `python` 包。" 48 | ], 49 | "metadata": { 50 | "id": "IAzbVA50cVt1" 51 | } 52 | }, 53 | { 54 | "cell_type": "code", 55 | "source": [ 56 | "!pip install -q -U langchain openai" 57 | ], 58 | "metadata": { 59 | "id": "MZWl8x3dmjsV" 60 | }, 61 | "execution_count": 27, 62 | "outputs": [] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "source": [ 67 | "from langchain.prompts import ChatPromptTemplate\n", 68 | "from langchain.schema.runnable import RunnableMap, Runnable, RunnableConfig, RunnablePassthrough, Input\n", 69 | "from langchain.load.serializable import Serializable\n", 70 | "from langchain.prompts import ChatPromptTemplate\n", 71 | "from langchain.chat_models import ChatOpenAI\n", 72 | "from langchain.schema import StrOutputParser\n", 73 | "from operator import itemgetter\n", 74 | "from typing import Optional, Dict" 75 | ], 76 | "metadata": { 77 | "id": "pYpbezvYwzxU" 78 | }, 79 | "execution_count": 30, 80 | "outputs": [] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "source": [ 85 | "## 一个有用的调试类 `StdOutputRunnable`\n", 86 | "\n", 87 | "类 `StdOutputRunnable` 的功能与 `RunnablePassthrough` 几乎一致,唯一的区别是打印出输入的数据。这非常适合于 `LEL` 的调试。将其加到管道中,可以查看所在环节的输入值,帮助调试。" 88 | ], 89 | "metadata": { 90 | "id": "yf8R7S7q3Dll" 91 | } 92 | }, 93 | { 94 | "cell_type": "code", 95 | "source": [ 96 | "class StdOutputRunnable(Serializable, Runnable[Input, Input]):\n", 97 | " @property\n", 98 | " def lc_serializable(self) -> bool:\n", 99 | " return True\n", 100 | "\n", 101 | " def invoke(self, input: Dict, config: Optional[RunnableConfig] = None) -> Input:\n", 102 | " print(input);\n", 103 | " return self._call_with_config(lambda x: x, input, config)\n" 104 | ], 105 | "metadata": { 106 | "id": "xkgc0euJ0Dnz" 107 | }, 108 | "execution_count": 24, 109 | "outputs": [] 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "source": [ 114 | "### RouterRunnable 示例" 115 | ], 116 | "metadata": { 117 | "id": "YjY2hgpq37xR" 118 | } 119 | }, 120 | { 121 | "cell_type": "markdown", 122 | "source": [ 123 | "模型辅助类,在分类任务执行时,指定允许的分类值。\n", 124 | "\n", 125 | "`create_tagging_chain_pydantic` 创建基于 `Pydantic` schema定义的分类链,请参考 [API文档](https://api.python.langchain.com/en/latest/chains/langchain.chains.openai_functions.tagging.create_tagging_chain_pydantic.html)" 126 | ], 127 | "metadata": { 128 | "id": "fcizgwUh4EBZ" 129 | } 130 | }, 131 | { 132 | "cell_type": "code", 133 | "source": [ 134 | "import os\n", 135 | "\n", 136 | "os.environ['OPENAI_API_KEY'] = \"您的有效openai api key\"" 137 | ], 138 | "metadata": { 139 | "id": "oYHp-mBk4dO5" 140 | }, 141 | "execution_count": 29, 142 | "outputs": [] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "source": [ 147 | "from langchain.chains import create_tagging_chain_pydantic\n", 148 | "from pydantic import BaseModel, Field\n", 149 | "\n", 150 | "class ChainToUse(BaseModel):\n", 151 | " \"\"\"Used to determine which chain to serve the user.\"\"\"\n", 152 | "\n", 153 | " name: str = Field(description=\"Should be one of `color` or `fruit`\")\n", 154 | "\n", 155 | "tagger = create_tagging_chain_pydantic(ChainToUse, ChatOpenAI(temperature=0))" 156 | ], 157 | "metadata": { 158 | "id": "2RkcxjTV3hW4" 159 | }, 160 | "execution_count": 40, 161 | "outputs": [] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "source": [ 166 | "model = ChatOpenAI()" 167 | ], 168 | "metadata": { 169 | "id": "kQ-MyoMT4n6o" 170 | }, 171 | "execution_count": 41, 172 | "outputs": [] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "source": [ 177 | "color_chain = ChatPromptTemplate.from_template(\"You are a color expert. Answer the question about color: {question}\") | model\n", 178 | "fruit_chain = ChatPromptTemplate.from_template(\"You are a fruit expert. Answer the question about fruit: {question}\") | model" 179 | ], 180 | "metadata": { 181 | "id": "3C0IOKs44mjz" 182 | }, 183 | "execution_count": 42, 184 | "outputs": [] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "source": [ 189 | "from langchain.schema.runnable import RouterRunnable\n", 190 | "router = RouterRunnable({\"color\": color_chain, \"fruit\": fruit_chain})" 191 | ], 192 | "metadata": { 193 | "id": "EPj6CgAoxP4m" 194 | }, 195 | "execution_count": 43, 196 | "outputs": [] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "source": [ 201 | "chain = {\n", 202 | " \"key\": RunnablePassthrough() | tagger | StdOutputRunnable() | (lambda x: x['text'].name),\n", 203 | " \"input\": {\"question\": RunnablePassthrough()}\n", 204 | "} | router" 205 | ], 206 | "metadata": { 207 | "id": "AsobBwvR5D2M" 208 | }, 209 | "execution_count": 46, 210 | "outputs": [] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "source": [ 215 | "chain.invoke(\"What is the HEX code of color YELLOW?\")" 216 | ], 217 | "metadata": { 218 | "colab": { 219 | "base_uri": "https://localhost:8080/" 220 | }, 221 | "id": "2q3tEt8n5Fsd", 222 | "outputId": "417c29f4-c48a-4827-f367-d587944dc449" 223 | }, 224 | "execution_count": 47, 225 | "outputs": [ 226 | { 227 | "output_type": "stream", 228 | "name": "stdout", 229 | "text": [ 230 | "{'input': 'What is the HEX code of color YELLOW?', 'text': ChainToUse(name='color')}\n" 231 | ] 232 | }, 233 | { 234 | "output_type": "execute_result", 235 | "data": { 236 | "text/plain": [ 237 | "AIMessage(content='The HEX code for the color yellow is #FFFF00.', additional_kwargs={}, example=False)" 238 | ] 239 | }, 240 | "metadata": {}, 241 | "execution_count": 47 242 | } 243 | ] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "source": [ 248 | "chain.invoke(\"What country grow most apples in 2000?\")" 249 | ], 250 | "metadata": { 251 | "colab": { 252 | "base_uri": "https://localhost:8080/" 253 | }, 254 | "id": "3TMgF_t_5gST", 255 | "outputId": "0cf65277-49f8-4838-a6d0-814ca69d41c2" 256 | }, 257 | "execution_count": 48, 258 | "outputs": [ 259 | { 260 | "output_type": "stream", 261 | "name": "stdout", 262 | "text": [ 263 | "{'input': 'What country grow most apples in 2000?', 'text': ChainToUse(name='fruit')}\n" 264 | ] 265 | }, 266 | { 267 | "output_type": "execute_result", 268 | "data": { 269 | "text/plain": [ 270 | "AIMessage(content='In the year 2000, China was the country that grew the most apples.', additional_kwargs={}, example=False)" 271 | ] 272 | }, 273 | "metadata": {}, 274 | "execution_count": 48 275 | } 276 | ] 277 | } 278 | ] 279 | } -------------------------------------------------------------------------------- /expression-language/LangChain_Expression_Language.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [], 7 | "authorship_tag": "ABX9TyNczSLtaIeZJPFolwCjTueW", 8 | "include_colab_link": true 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | }, 14 | "language_info": { 15 | "name": "python" 16 | } 17 | }, 18 | "cells": [ 19 | { 20 | "cell_type": "markdown", 21 | "metadata": { 22 | "id": "view-in-github", 23 | "colab_type": "text" 24 | }, 25 | "source": [ 26 | "\"Open" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "source": [ 32 | "# LangChain的新特性 - Expression Language\n", 33 | "\n", 34 | "`LangChain Expression Language` 是一种以声明式方法,轻松地将链或组件组合在一起的机制。通过利用管道操作符,构建的任何链将自动具有完整的同步、异步和流式支持。" 35 | ], 36 | "metadata": { 37 | "id": "OHpaChk4b5UN" 38 | } 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "source": [ 43 | "## Python管道 - Pipe\n", 44 | "\n", 45 | "`Python` 的 `Pipe` 提供了管道实现。请参考 [https://github.com/JulienPalard/Pipe](https://github.com/JulienPalard/Pipe)。 来看几个例子" 46 | ], 47 | "metadata": { 48 | "id": "IAzbVA50cVt1" 49 | } 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "source": [ 54 | "### 安装" 55 | ], 56 | "metadata": { 57 | "id": "BL8UBSkOkAVC" 58 | } 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 25, 63 | "metadata": { 64 | "colab": { 65 | "base_uri": "https://localhost:8080/" 66 | }, 67 | "id": "pzj4WvdHbeRn", 68 | "outputId": "8ccdd25b-9056-4002-9f70-daf8d3d8d8fa" 69 | }, 70 | "outputs": [ 71 | { 72 | "output_type": "stream", 73 | "name": "stdout", 74 | "text": [ 75 | "Requirement already satisfied: pipe in /usr/local/lib/python3.10/dist-packages (2.0)\n" 76 | ] 77 | } 78 | ], 79 | "source": [ 80 | "!pip install -U pipe" 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "source": [ 86 | "### 最简单一个例子" 87 | ], 88 | "metadata": { 89 | "id": "-cVz9DNDkEYr" 90 | } 91 | }, 92 | { 93 | "cell_type": "code", 94 | "source": [ 95 | "from pipe import select, where\n", 96 | "\n", 97 | "numbers = [1, 2, 3, 4, 5]\n", 98 | "result = list(numbers | where(lambda x: x % 2 == 0) | select(lambda x: x * 2))\n", 99 | "\n", 100 | "result" 101 | ], 102 | "metadata": { 103 | "colab": { 104 | "base_uri": "https://localhost:8080/" 105 | }, 106 | "id": "-9a39Tepgy58", 107 | "outputId": "7119d313-7b21-42d4-ea61-866737ce02b0" 108 | }, 109 | "execution_count": 7, 110 | "outputs": [ 111 | { 112 | "output_type": "execute_result", 113 | "data": { 114 | "text/plain": [ 115 | "[4, 8]" 116 | ] 117 | }, 118 | "metadata": {}, 119 | "execution_count": 7 120 | } 121 | ] 122 | }, 123 | { 124 | "cell_type": "markdown", 125 | "source": [ 126 | "### 小小进阶\n", 127 | "\n", 128 | "自定义管道 `uppercase` - 接受一个 `iterable` 参数" 129 | ], 130 | "metadata": { 131 | "id": "X6hYiTkYkJ-L" 132 | } 133 | }, 134 | { 135 | "cell_type": "code", 136 | "source": [ 137 | "from pipe import Pipe\n", 138 | "\n", 139 | "uppercase = Pipe(lambda iterable: (x.upper() for x in iterable))\n", 140 | "\n", 141 | "words = ['red', 'green', 'blue', 'YELLOW']\n", 142 | "\n", 143 | "uppercase_words = list(words | uppercase)\n", 144 | "\n", 145 | "uppercase_words" 146 | ], 147 | "metadata": { 148 | "colab": { 149 | "base_uri": "https://localhost:8080/" 150 | }, 151 | "id": "rwhNhNzSkOHC", 152 | "outputId": "6a69c76c-ac24-49bc-e06f-cfb8c7d659d8" 153 | }, 154 | "execution_count": 26, 155 | "outputs": [ 156 | { 157 | "output_type": "execute_result", 158 | "data": { 159 | "text/plain": [ 160 | "['RED', 'GREEN', 'BLUE', 'YELLOW']" 161 | ] 162 | }, 163 | "metadata": {}, 164 | "execution_count": 26 165 | } 166 | ] 167 | }, 168 | { 169 | "cell_type": "markdown", 170 | "source": [ 171 | "## LangChain Expression Language与管道\n", 172 | "\n", 173 | "`LEL` 通过管道定义操作序列,帮助程序员以更加优雅简洁的编码方式构建功能逻辑。我们来看看如何通过表达式来重构几个经典的LangChain实例。" 174 | ], 175 | "metadata": { 176 | "id": "3c8vpUz3lSEt" 177 | } 178 | }, 179 | { 180 | "cell_type": "markdown", 181 | "source": [ 182 | "### 安装\n", 183 | "\n", 184 | "我们需要安装最新版本的 `langchain` 以确保具有 `LEL` 功能的支持。" 185 | ], 186 | "metadata": { 187 | "id": "bcAYqmcAmLiY" 188 | } 189 | }, 190 | { 191 | "cell_type": "code", 192 | "source": [ 193 | "!pip install -q -U langchain openai" 194 | ], 195 | "metadata": { 196 | "id": "QKFeM3t0mbQQ" 197 | }, 198 | "execution_count": 23, 199 | "outputs": [] 200 | }, 201 | { 202 | "cell_type": "markdown", 203 | "source": [ 204 | "### 提示词模版与模型\n" 205 | ], 206 | "metadata": { 207 | "id": "dIiNbIYimmSa" 208 | } 209 | }, 210 | { 211 | "cell_type": "code", 212 | "source": [ 213 | "import os\n", 214 | "\n", 215 | "os.environ['OPENAI_API_KEY'] = '您的有效openai api key'" 216 | ], 217 | "metadata": { 218 | "id": "xCEJ_9XEs6jA" 219 | }, 220 | "execution_count": 24, 221 | "outputs": [] 222 | }, 223 | { 224 | "cell_type": "markdown", 225 | "source": [ 226 | "#### 提示词模板与模型的传统用法" 227 | ], 228 | "metadata": { 229 | "id": "ivCfgUk5rFn6" 230 | } 231 | }, 232 | { 233 | "cell_type": "code", 234 | "source": [ 235 | "from langchain.prompts import (\n", 236 | " ChatPromptTemplate,\n", 237 | " HumanMessagePromptTemplate,\n", 238 | ")\n", 239 | "from langchain.schema import (\n", 240 | " HumanMessage\n", 241 | ")\n", 242 | "from langchain.chains import LLMChain\n", 243 | "\n", 244 | "human_template=\"Show me the HEX code of color {color_name}\"\n", 245 | "human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)\n", 246 | "\n", 247 | "chat_prompt = ChatPromptTemplate.from_messages([human_message_prompt])\n", 248 | "chain = LLMChain(llm=ChatOpenAI(), prompt=chat_prompt)\n", 249 | "\n", 250 | "chain.run(\"RED\")" 251 | ], 252 | "metadata": { 253 | "colab": { 254 | "base_uri": "https://localhost:8080/", 255 | "height": 36 256 | }, 257 | "id": "liG2Oc3ZrHWw", 258 | "outputId": "98b2fbb4-de33-4cbc-b1a7-4427af5f704a" 259 | }, 260 | "execution_count": 27, 261 | "outputs": [ 262 | { 263 | "output_type": "execute_result", 264 | "data": { 265 | "text/plain": [ 266 | "'The HEX code for the color red is #FF0000.'" 267 | ], 268 | "application/vnd.google.colaboratory.intrinsic+json": { 269 | "type": "string" 270 | } 271 | }, 272 | "metadata": {}, 273 | "execution_count": 27 274 | } 275 | ] 276 | }, 277 | { 278 | "cell_type": "markdown", 279 | "source": [ 280 | "#### 通过 `LEL` 连接提示词模板与模型" 281 | ], 282 | "metadata": { 283 | "id": "skH8OpgIrKP7" 284 | } 285 | }, 286 | { 287 | "cell_type": "code", 288 | "source": [ 289 | "from langchain.prompts import ChatPromptTemplate\n", 290 | "from langchain.chat_models import ChatOpenAI\n", 291 | "\n", 292 | "model = ChatOpenAI()\n", 293 | "prompt = ChatPromptTemplate.from_template(\"Show me the HEX code of color {color_name}\")\n", 294 | "\n", 295 | "chain = prompt | model\n", 296 | "\n", 297 | "chain.invoke({\"color_name\": \"RED\"})" 298 | ], 299 | "metadata": { 300 | "colab": { 301 | "base_uri": "https://localhost:8080/" 302 | }, 303 | "id": "Ml1opV6bmq9z", 304 | "outputId": "b05b45af-5f6f-4f9a-f6ec-c1f40e9cee46" 305 | }, 306 | "execution_count": 34, 307 | "outputs": [ 308 | { 309 | "output_type": "execute_result", 310 | "data": { 311 | "text/plain": [ 312 | "AIMessage(content='The HEX code for the color red is #FF0000.', additional_kwargs={}, example=False)" 313 | ] 314 | }, 315 | "metadata": {}, 316 | "execution_count": 34 317 | } 318 | ] 319 | }, 320 | { 321 | "cell_type": "markdown", 322 | "source": [ 323 | "### 一个稍稍复杂的例子\n", 324 | "\n", 325 | "现在我们给刚才搭建的管道追加一些环节" 326 | ], 327 | "metadata": { 328 | "id": "XuKN7l61uTRO" 329 | } 330 | }, 331 | { 332 | "cell_type": "markdown", 333 | "source": [ 334 | "#### 添加标准输出解析" 335 | ], 336 | "metadata": { 337 | "id": "TBJtYNrMwk8k" 338 | } 339 | }, 340 | { 341 | "cell_type": "code", 342 | "source": [ 343 | "from langchain.schema.output_parser import StrOutputParser\n", 344 | "\n", 345 | "chain = prompt | model | StrOutputParser()\n", 346 | "\n", 347 | "chain.invoke({\"color_name\": \"RED\"})" 348 | ], 349 | "metadata": { 350 | "colab": { 351 | "base_uri": "https://localhost:8080/", 352 | "height": 36 353 | }, 354 | "id": "eEDRWAYxuaIj", 355 | "outputId": "814a655d-6aca-44f2-ca96-3b7e48882e33" 356 | }, 357 | "execution_count": 35, 358 | "outputs": [ 359 | { 360 | "output_type": "execute_result", 361 | "data": { 362 | "text/plain": [ 363 | "'The HEX code of the color red is #FF0000.'" 364 | ], 365 | "application/vnd.google.colaboratory.intrinsic+json": { 366 | "type": "string" 367 | } 368 | }, 369 | "metadata": {}, 370 | "execution_count": 35 371 | } 372 | ] 373 | }, 374 | { 375 | "cell_type": "markdown", 376 | "source": [ 377 | "#### 添加函数调用\n", 378 | "\n", 379 | "我们来给管道中的模型添加一些函数调用。注,我们并不真正调用函数,只解析出函数调用的数据。\n", 380 | "\n", 381 | "`JsonOutputFunctionsParser` 用来将函数调用的回复解析为JSON格式,请参考[API 文档](https://api.python.langchain.com/en/latest/output_parsers/langchain.output_parsers.openai_functions.JsonKeyOutputFunctionsParser.html)" 382 | ], 383 | "metadata": { 384 | "id": "y2YheMkRxGrg" 385 | } 386 | }, 387 | { 388 | "cell_type": "code", 389 | "source": [ 390 | "from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser\n", 391 | "\n", 392 | "functions = [\n", 393 | " {\n", 394 | " \"name\": \"save_color_code\",\n", 395 | " \"description\": \"Save the HEX code of color and its name\",\n", 396 | " \"parameters\": {\n", 397 | " \"type\": \"object\",\n", 398 | " \"properties\": {\n", 399 | " \"hex_code\": {\n", 400 | " \"type\": \"string\",\n", 401 | " \"description\": \"The HEX code of the color\"\n", 402 | " },\n", 403 | " \"color\": {\n", 404 | " \"type\": \"string\",\n", 405 | " \"description\": \"The color name\"\n", 406 | " }\n", 407 | " },\n", 408 | " \"required\": [\"hex_code\", \"color\"]\n", 409 | " }\n", 410 | " }\n", 411 | " ]\n", 412 | "chain = prompt | model.bind(function_call = {\"name\": \"save_color_code\"}, functions = functions) | JsonOutputFunctionsParser()\n", 413 | "\n", 414 | "chain.invoke({\"color_name\": \"RED\"})" 415 | ], 416 | "metadata": { 417 | "colab": { 418 | "base_uri": "https://localhost:8080/" 419 | }, 420 | "id": "obpu_uLAyGS5", 421 | "outputId": "c8c45e4f-c159-4fe3-fd00-9d21b3b55f46" 422 | }, 423 | "execution_count": 36, 424 | "outputs": [ 425 | { 426 | "output_type": "execute_result", 427 | "data": { 428 | "text/plain": [ 429 | "{'hex_code': '#FF0000', 'color': 'RED'}" 430 | ] 431 | }, 432 | "metadata": {}, 433 | "execution_count": 36 434 | } 435 | ] 436 | } 437 | ] 438 | } -------------------------------------------------------------------------------- /expression-language/LangChain_Expression_Language_Runnable.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [], 7 | "authorship_tag": "ABX9TyOp1YZkXUo1Nss+Mi/VxTsF", 8 | "include_colab_link": true 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | }, 14 | "language_info": { 15 | "name": "python" 16 | } 17 | }, 18 | "cells": [ 19 | { 20 | "cell_type": "markdown", 21 | "metadata": { 22 | "id": "view-in-github", 23 | "colab_type": "text" 24 | }, 25 | "source": [ 26 | "\"Open" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "source": [ 32 | "# LangChain Expression Language 中最重要的接口 Runnable\n", 33 | "\n", 34 | "`LEL` ( `LangChain Expression Language` ) 是一种以声明式方法,轻松地将链或组件组合在一起的机制。今天我们来介绍 `LEL` 中最重要的接口 `Runnable`。" 35 | ], 36 | "metadata": { 37 | "id": "OHpaChk4b5UN" 38 | } 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "source": [ 43 | "## Runnable\n", 44 | "\n", 45 | "`LangChain` 定义了 `Runnable` 接口,绝大多数组件也实现了该接口。`Runnable` 接口定义了如下函数:\n", 46 | "\n", 47 | "- stream: 流式输出响应\n", 48 | "- invoke: 基于单一输入调用链\n", 49 | "- batch: 基于一组输入调用链\n", 50 | "- astream\n", 51 | "- ainvoke\n", 52 | "- abatch\n", 53 | "\n", 54 | "后三个函数为前三个函数的异步版本。更多内容请参考 [Expression Language Interface](https://python.langchain.com/docs/guides/expression_language/interface)。\n", 55 | "\n", 56 | "`ChatPromptTemplate` 与 `ChatOpenAI` 类都实现了 `Runnable` 接口。参考如下代码:\n" 57 | ], 58 | "metadata": { 59 | "id": "IAzbVA50cVt1" 60 | } 61 | }, 62 | { 63 | "cell_type": "code", 64 | "source": [ 65 | "!pip install -q -U langchain\n", 66 | "\n", 67 | "from langchain.prompts import ChatPromptTemplate, Base\n", 68 | "from langchain.schema.runnable import Runnable\n", 69 | "prompt = ChatPromptTemplate.from_template(\"Hi, LEL!\")\n", 70 | "print(isinstance(prompt, Runnable))" 71 | ], 72 | "metadata": { 73 | "colab": { 74 | "base_uri": "https://localhost:8080/" 75 | }, 76 | "id": "MZWl8x3dmjsV", 77 | "outputId": "0ce4eef3-a06c-4647-f1ab-618440503839" 78 | }, 79 | "execution_count": 1, 80 | "outputs": [ 81 | { 82 | "output_type": "stream", 83 | "name": "stdout", 84 | "text": [ 85 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.4/1.4 MB\u001b[0m \u001b[31m7.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 86 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m90.0/90.0 kB\u001b[0m \u001b[31m6.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 87 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.4/49.4 kB\u001b[0m \u001b[31m2.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 88 | "\u001b[?25hTrue\n" 89 | ] 90 | } 91 | ] 92 | }, 93 | { 94 | "cell_type": "markdown", 95 | "source": [ 96 | "### RunnablePassthrough 在管道中实现输入的传递" 97 | ], 98 | "metadata": { 99 | "id": "So9PZ9SigdjN" 100 | } 101 | }, 102 | { 103 | "cell_type": "markdown", 104 | "source": [ 105 | "在通过管道构建LangChain链时,我们可能需要将原始输入变量传递给链式模型的后续步骤。我们可以使用类 `RunnablePassthrough` 来达到输入传递的目的。请参考一下示例:" 106 | ], 107 | "metadata": { 108 | "id": "aOmBTf29hT15" 109 | } 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "source": [ 114 | "`RunnablePassthrough` 接受输入,并如实地将输入作为自己的输出,从而达到传递的目的。这里我们实现一个 `Runnable` 来接受一个 `Dict` 类型的输入,并在控制台打印出键为 `name` 的值,以此来测试 `RunnablePassthrough` 的传递效果。\n", 115 | "\n", 116 | "在构成的链中,第一个 `RunnablePassthrough` 传递的是 `chain.invoke(\"Alex\")` 中的字符串参数 `Alex`,第二个 `RunnablePassthrough` 传递的是管道第一部分的输出,一个字典 dict:\n", 117 | "\n", 118 | "```json\n", 119 | "{\n", 120 | " \"name\": \"Alex\"\n", 121 | "}\n", 122 | "```" 123 | ], 124 | "metadata": { 125 | "id": "88VQ51ARhshM" 126 | } 127 | }, 128 | { 129 | "cell_type": "code", 130 | "source": [ 131 | "from langchain.schema.runnable import RunnablePassthrough, RunnableConfig, Input\n", 132 | "from langchain.load.serializable import Serializable\n", 133 | "from typing import Optional, Dict\n", 134 | "\n", 135 | "class StdOutputRunnable(Serializable, Runnable[Input, Input]):\n", 136 | " @property\n", 137 | " def lc_serializable(self) -> bool:\n", 138 | " return True\n", 139 | "\n", 140 | " def invoke(self, input: Dict, config: Optional[RunnableConfig] = None) -> Input:\n", 141 | " print(f\"Hey, I received the name {input['name']}\")\n", 142 | " return self._call_with_config(lambda x: x, input, config)\n", 143 | "\n", 144 | "chain = {\"name\": RunnablePassthrough()} | RunnablePassthrough() | StdOutputRunnable()\n", 145 | "\n", 146 | "chain.invoke(\"Simon\")" 147 | ], 148 | "metadata": { 149 | "colab": { 150 | "base_uri": "https://localhost:8080/" 151 | }, 152 | "id": "XsOMu9w5dV0-", 153 | "outputId": "52269096-4e50-4fcf-a042-9ef78f1d2fd0" 154 | }, 155 | "execution_count": 46, 156 | "outputs": [ 157 | { 158 | "output_type": "stream", 159 | "name": "stdout", 160 | "text": [ 161 | "Hey, I received the name Simon\n" 162 | ] 163 | }, 164 | { 165 | "output_type": "execute_result", 166 | "data": { 167 | "text/plain": [ 168 | "{'name': 'Simon'}" 169 | ] 170 | }, 171 | "metadata": {}, 172 | "execution_count": 46 173 | } 174 | ] 175 | }, 176 | { 177 | "cell_type": "markdown", 178 | "source": [ 179 | "### itemgetter 实现输入的部分传递\n", 180 | "\n", 181 | "我们可以不需要传递输入的完整数据。当我们想要传递字典中的某一个键值,可以通过 `itemgetter` 函数实现。请参考如下代码:" 182 | ], 183 | "metadata": { 184 | "id": "qG3fJp0Si-iX" 185 | } 186 | }, 187 | { 188 | "cell_type": "code", 189 | "source": [ 190 | "from langchain.schema import StrOutputParser\n", 191 | "from langchain.schema.runnable import RunnablePassthrough, RunnableConfig, Input\n", 192 | "from langchain.load.serializable import Serializable\n", 193 | "from operator import itemgetter\n", 194 | "\n", 195 | "chain = {\"name\": itemgetter(\"user_name\") } | RunnablePassthrough() | StdOutputRunnable()\n", 196 | "\n", 197 | "chain.invoke({\"user_name\": \"Alex\"})" 198 | ], 199 | "metadata": { 200 | "colab": { 201 | "base_uri": "https://localhost:8080/" 202 | }, 203 | "id": "bVJPSSOqjckz", 204 | "outputId": "8bb676ac-1724-4bad-912e-0eb3d9b7b850" 205 | }, 206 | "execution_count": 47, 207 | "outputs": [ 208 | { 209 | "output_type": "stream", 210 | "name": "stdout", 211 | "text": [ 212 | "Hey, I received the name Alex\n" 213 | ] 214 | }, 215 | { 216 | "output_type": "execute_result", 217 | "data": { 218 | "text/plain": [ 219 | "{'name': 'Alex'}" 220 | ] 221 | }, 222 | "metadata": {}, 223 | "execution_count": 47 224 | } 225 | ] 226 | }, 227 | { 228 | "cell_type": "markdown", 229 | "source": [ 230 | "### 连接模型的一个完整例子\n", 231 | "\n", 232 | "1. 利用 `Retriever` 增加外部数据获取能力\n", 233 | "2. 管道连接 `OpenAI` 的聊天模型完成问答" 234 | ], 235 | "metadata": { 236 | "id": "mzD5Emcsp9rq" 237 | } 238 | }, 239 | { 240 | "cell_type": "code", 241 | "source": [ 242 | "!pip install -q -U openai chromadb tiktoken" 243 | ], 244 | "metadata": { 245 | "colab": { 246 | "base_uri": "https://localhost:8080/" 247 | }, 248 | "id": "r9XU7NXLrERq", 249 | "outputId": "8564e35b-a7b1-4acb-f9fc-3992153e3473" 250 | }, 251 | "execution_count": 27, 252 | "outputs": [ 253 | { 254 | "output_type": "stream", 255 | "name": "stdout", 256 | "text": [ 257 | "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/1.7 MB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[91m━━━\u001b[0m\u001b[90m╺\u001b[0m\u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.1/1.7 MB\u001b[0m \u001b[31m4.3 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K \u001b[91m━━━━━━━━━━━━━━━\u001b[0m\u001b[91m╸\u001b[0m\u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.7/1.7 MB\u001b[0m \u001b[31m10.0 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K \u001b[91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[91m╸\u001b[0m\u001b[90m━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.7 MB\u001b[0m \u001b[31m12.6 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m12.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 258 | "\u001b[?25h" 259 | ] 260 | } 261 | ] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "source": [ 266 | "import os\n", 267 | "\n", 268 | "os.environ['OPENAI_API_KEY'] = '您的有效openai api key'" 269 | ], 270 | "metadata": { 271 | "id": "cpayrV5RrTBT" 272 | }, 273 | "execution_count": 28, 274 | "outputs": [] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "source": [ 279 | "from langchain.vectorstores import Chroma\n", 280 | "from langchain.vectorstores.base import VectorStoreRetriever\n", 281 | "from langchain.embeddings import OpenAIEmbeddings\n", 282 | "from langchain.schema.runnable import RunnablePassthrough\n", 283 | "from langchain.chat_models import ChatOpenAI\n", 284 | "\n", 285 | "vectorstore = Chroma.from_texts([\"My name is VerySmallWoods, a software engineer based in Dublin.\"], embedding=OpenAIEmbeddings())\n", 286 | "retriever = vectorstore.as_retriever()" 287 | ], 288 | "metadata": { 289 | "id": "D4uoeGtVrMhI" 290 | }, 291 | "execution_count": 48, 292 | "outputs": [] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "source": [ 297 | "retriever.__class__" 298 | ], 299 | "metadata": { 300 | "colab": { 301 | "base_uri": "https://localhost:8080/" 302 | }, 303 | "id": "_LoUPnlErxW2", 304 | "outputId": "c8ece014-1c08-4a85-a4fe-49df36922653" 305 | }, 306 | "execution_count": 49, 307 | "outputs": [ 308 | { 309 | "output_type": "execute_result", 310 | "data": { 311 | "text/plain": [ 312 | "langchain.vectorstores.base.VectorStoreRetriever" 313 | ] 314 | }, 315 | "metadata": {}, 316 | "execution_count": 49 317 | } 318 | ] 319 | }, 320 | { 321 | "cell_type": "markdown", 322 | "source": [ 323 | "`VectorStoreRetriever` 的 `BaseRetriever` 基类实现了 `invoke` 函数。它接受字符串类型输入,并调用 `get_relevant_documents` 函数查询相关文档。\n", 324 | "\n", 325 | "```python\n", 326 | "class BaseRetriever(Serializable, Runnable[str, List[Document]], ABC):\n", 327 | " # ......\n", 328 | " def invoke(\n", 329 | " self, input: str, config: Optional[RunnableConfig] = None\n", 330 | " ) -> List[Document]:\n", 331 | " return self.get_relevant_documents(input, **(config or {}))\n", 332 | "```" 333 | ], 334 | "metadata": { 335 | "id": "i5S_fCX0slX1" 336 | } 337 | }, 338 | { 339 | "cell_type": "code", 340 | "source": [ 341 | "model = ChatOpenAI()\n", 342 | "template = \"\"\"Answer the question based only on the following context:\n", 343 | "{context}\n", 344 | "\n", 345 | "Question: {question}\n", 346 | "\"\"\"\n", 347 | "prompt = ChatPromptTemplate.from_template(template)\n", 348 | "\n", 349 | "chain = ( {\n", 350 | " \"context\": retriever,\n", 351 | " \"question\": RunnablePassthrough()\n", 352 | "} | prompt | model | StrOutputParser() )\n", 353 | "\n", 354 | "chain.invoke(\"Who am I?\")\n" 355 | ], 356 | "metadata": { 357 | "colab": { 358 | "base_uri": "https://localhost:8080/", 359 | "height": 36 360 | }, 361 | "id": "RqhRpOnBqCua", 362 | "outputId": "8c65d325-3321-46c0-a590-3997c4538816" 363 | }, 364 | "execution_count": 50, 365 | "outputs": [ 366 | { 367 | "output_type": "execute_result", 368 | "data": { 369 | "text/plain": [ 370 | "'You are VerySmallWoods, a software engineer based in Dublin.'" 371 | ], 372 | "application/vnd.google.colaboratory.intrinsic+json": { 373 | "type": "string" 374 | } 375 | }, 376 | "metadata": {}, 377 | "execution_count": 50 378 | } 379 | ] 380 | }, 381 | { 382 | "cell_type": "code", 383 | "source": [ 384 | "chain.invoke(\"Where do I live?\")" 385 | ], 386 | "metadata": { 387 | "colab": { 388 | "base_uri": "https://localhost:8080/", 389 | "height": 36 390 | }, 391 | "id": "HDxSyFCbtXSG", 392 | "outputId": "1a8be89a-300d-4f13-bda8-e7fc76284aef" 393 | }, 394 | "execution_count": 51, 395 | "outputs": [ 396 | { 397 | "output_type": "execute_result", 398 | "data": { 399 | "text/plain": [ 400 | "'Based on the given context, you live in Dublin.'" 401 | ], 402 | "application/vnd.google.colaboratory.intrinsic+json": { 403 | "type": "string" 404 | } 405 | }, 406 | "metadata": {}, 407 | "execution_count": 51 408 | } 409 | ] 410 | } 411 | ] 412 | } -------------------------------------------------------------------------------- /langchain_firecrawl.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [], 7 | "authorship_tag": "ABX9TyMCL58YEvwYygGhznz8AUrd", 8 | "include_colab_link": true 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | }, 14 | "language_info": { 15 | "name": "python" 16 | } 17 | }, 18 | "cells": [ 19 | { 20 | "cell_type": "markdown", 21 | "metadata": { 22 | "id": "view-in-github", 23 | "colab_type": "text" 24 | }, 25 | "source": [ 26 | "\"Open" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 3, 32 | "metadata": { 33 | "colab": { 34 | "base_uri": "https://localhost:8080/" 35 | }, 36 | "id": "OyTrynJbYbIZ", 37 | "outputId": "60f995fd-969d-42d9-c88a-67d1cf244bc6" 38 | }, 39 | "outputs": [ 40 | { 41 | "output_type": "stream", 42 | "name": "stdout", 43 | "text": [ 44 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m311.6/311.6 kB\u001b[0m \u001b[31m8.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 45 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.8/1.8 MB\u001b[0m \u001b[31m35.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 46 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.6/75.6 kB\u001b[0m \u001b[31m9.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 47 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m77.9/77.9 kB\u001b[0m \u001b[31m9.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 48 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m8.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 49 | "\u001b[?25h" 50 | ] 51 | } 52 | ], 53 | "source": [ 54 | "!pip install langchain langchain_openai firecrawl-py faiss-cpu -q -U" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "source": [ 60 | "from langchain_community.document_loaders import FireCrawlLoader" 61 | ], 62 | "metadata": { 63 | "id": "4rahZqw7YkTj" 64 | }, 65 | "execution_count": 4, 66 | "outputs": [] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "source": [ 71 | "from langchain_community.vectorstores import FAISS\n", 72 | "from langchain_openai import OpenAIEmbeddings\n", 73 | "from langchain_text_splitters import CharacterTextSplitter\n", 74 | "from langchain_community.document_loaders import TextLoader\n", 75 | "from google.colab import userdata\n", 76 | "\n", 77 | "def build_vector_retriever(firecrawl_api_key, scrape_url):\n", 78 | " loader = FireCrawlLoader(api_key=firecrawl_api_key, url=scrape_url, mode=\"scrape\")\n", 79 | " documents = loader.load()\n", 80 | " text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", 81 | " docs = text_splitter.split_documents(documents)\n", 82 | " embeddings = OpenAIEmbeddings(api_key=userdata.get(\"OPENAI_API_KEY\"))\n", 83 | " db = FAISS.from_documents(docs, embeddings)\n", 84 | "\n", 85 | " return db.as_retriever()" 86 | ], 87 | "metadata": { 88 | "id": "BHLCtjZhYwvb" 89 | }, 90 | "execution_count": 20, 91 | "outputs": [] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "source": [ 96 | "retriever = build_vector_retriever(userdata.get(\"FIRECRAWL_API_KEY\"), \"https://stablediffusionapi.com/docs/stable-diffusion-api/img2img\")" 97 | ], 98 | "metadata": { 99 | "colab": { 100 | "base_uri": "https://localhost:8080/" 101 | }, 102 | "id": "sALR6PqCY5lV", 103 | "outputId": "aad4f2cb-8abb-40b8-b994-41ec5d634fae" 104 | }, 105 | "execution_count": 21, 106 | "outputs": [ 107 | { 108 | "output_type": "stream", 109 | "name": "stderr", 110 | "text": [ 111 | "WARNING:langchain_text_splitters.base:Created a chunk of size 1419, which is longer than the specified 1000\n", 112 | "WARNING:langchain_text_splitters.base:Created a chunk of size 1462, which is longer than the specified 1000\n", 113 | "WARNING:langchain_text_splitters.base:Created a chunk of size 1067, which is longer than the specified 1000\n", 114 | "WARNING:langchain_text_splitters.base:Created a chunk of size 1391, which is longer than the specified 1000\n" 115 | ] 116 | } 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "source": [ 122 | "from langchain_core.runnables import RunnableParallel, RunnablePassthrough\n", 123 | "from langchain_core.prompts import ChatPromptTemplate\n", 124 | "from langchain_core.output_parsers import StrOutputParser\n", 125 | "from langchain_openai import ChatOpenAI\n", 126 | "\n", 127 | "def build_qa_chain(retriever):\n", 128 | " prompt = ChatPromptTemplate.from_template(\"Using the context below to answer user's question. If you can't find information within the context, simply answer I don't know.\\n\\n {context} {question}\")\n", 129 | " model = ChatOpenAI(api_key=userdata.get('OPENAI_API_KEY'))\n", 130 | "\n", 131 | " chain = {\"context\": retriever, \"question\": RunnablePassthrough()} | prompt | model | StrOutputParser()\n", 132 | "\n", 133 | " return chain" 134 | ], 135 | "metadata": { 136 | "id": "i4T4Kj7HZZNc" 137 | }, 138 | "execution_count": 22, 139 | "outputs": [] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "source": [ 144 | "chain = build_qa_chain(retriever=retriever)\n", 145 | "\n", 146 | "chain.invoke(\"How to use img2img API in Python\")" 147 | ], 148 | "metadata": { 149 | "colab": { 150 | "base_uri": "https://localhost:8080/", 151 | "height": 71 152 | }, 153 | "id": "umnGP2bcaeOx", 154 | "outputId": "7bed7447-67e4-485d-85bf-6d86f7286abd" 155 | }, 156 | "execution_count": 23, 157 | "outputs": [ 158 | { 159 | "output_type": "execute_result", 160 | "data": { 161 | "text/plain": [ 162 | "'To use the img2img API in Python, you can make a POST request to the specified endpoint with the required parameters like prompt, negative prompt, initial image, width, height, and other attributes as shown in the provided code snippet. Make sure to import the necessary libraries like requests and json, then create a payload with the required parameters, and send a POST request to the API endpoint.'" 163 | ], 164 | "application/vnd.google.colaboratory.intrinsic+json": { 165 | "type": "string" 166 | } 167 | }, 168 | "metadata": {}, 169 | "execution_count": 23 170 | } 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "source": [], 176 | "metadata": { 177 | "id": "YoV-D9pmdUhN" 178 | }, 179 | "execution_count": null, 180 | "outputs": [] 181 | } 182 | ] 183 | } -------------------------------------------------------------------------------- /langchain_nomic_embedding.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [], 7 | "authorship_tag": "ABX9TyNXBc0ZTxbtr63I2LiiPsTA", 8 | "include_colab_link": true 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | }, 14 | "language_info": { 15 | "name": "python" 16 | } 17 | }, 18 | "cells": [ 19 | { 20 | "cell_type": "markdown", 21 | "metadata": { 22 | "id": "view-in-github", 23 | "colab_type": "text" 24 | }, 25 | "source": [ 26 | "\"Open" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": { 33 | "id": "1O1vgh-veZzX" 34 | }, 35 | "outputs": [], 36 | "source": [ 37 | "!pip install -q -U langchain chromadb tiktoken langchain-nomic langchain-openai" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "source": [ 43 | "POST_URL=\"https://blog.nomic.ai/posts/nomic-embed-text-v1\"" 44 | ], 45 | "metadata": { 46 | "id": "LCA0Sqote-__" 47 | }, 48 | "execution_count": null, 49 | "outputs": [] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "source": [ 54 | "from langchain_community.document_loaders import WebBaseLoader\n", 55 | "\n", 56 | "docs = WebBaseLoader(POST_URL).load()" 57 | ], 58 | "metadata": { 59 | "id": "AUCuhl1NfA2V" 60 | }, 61 | "execution_count": null, 62 | "outputs": [] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "source": [ 67 | "len(docs)" 68 | ], 69 | "metadata": { 70 | "id": "aXUyW9stfWav" 71 | }, 72 | "execution_count": null, 73 | "outputs": [] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "source": [ 78 | "from langchain.text_splitter import CharacterTextSplitter\n", 79 | "\n", 80 | "text_splitter = CharacterTextSplitter.from_tiktoken_encoder(\n", 81 | " chunk_size=7500, chunk_overlap=100\n", 82 | ")\n", 83 | "doc_splits = text_splitter.split_documents(docs)" 84 | ], 85 | "metadata": { 86 | "id": "67ozgMXqfXIt" 87 | }, 88 | "execution_count": null, 89 | "outputs": [] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "source": [ 94 | "import os\n", 95 | "from google.colab import userdata\n", 96 | "\n", 97 | "os.environ['NOMIC_API_KEY'] = userdata.get('NOMIC_API_KEY')\n", 98 | "\n", 99 | "from langchain_community.vectorstores import Chroma\n", 100 | "from langchain_core.output_parsers import StrOutputParser\n", 101 | "from langchain_core.runnables import RunnableLambda, RunnablePassthrough\n", 102 | "from langchain_nomic import NomicEmbeddings\n", 103 | "from langchain_nomic.embeddings import NomicEmbeddings" 104 | ], 105 | "metadata": { 106 | "id": "bgjXz_GwgV7A" 107 | }, 108 | "execution_count": null, 109 | "outputs": [] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "source": [ 114 | "vectorstore = Chroma.from_documents(\n", 115 | " documents=doc_splits,\n", 116 | " embedding=NomicEmbeddings(model=\"nomic-embed-text-v1\"),\n", 117 | ")\n", 118 | "retriever = vectorstore.as_retriever()" 119 | ], 120 | "metadata": { 121 | "id": "hBvJESZEgAH1" 122 | }, 123 | "execution_count": null, 124 | "outputs": [] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "source": [ 129 | "os.environ['OPENAI_API_KEY'] = userdata.get(\"OPENAI_API_KEY\")" 130 | ], 131 | "metadata": { 132 | "id": "cjSzc74xhKHU" 133 | }, 134 | "execution_count": null, 135 | "outputs": [] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "source": [ 140 | "from langchain_community.chat_models import ChatOllama\n", 141 | "from langchain_core.prompts import ChatPromptTemplate\n", 142 | "from langchain_openai import ChatOpenAI\n", 143 | "\n", 144 | "# Prompt\n", 145 | "template = \"\"\"Answer the question based only on the following context:\n", 146 | "{context}\n", 147 | "\n", 148 | "Question: {question}\n", 149 | "\"\"\"\n", 150 | "prompt = ChatPromptTemplate.from_template(template)\n", 151 | "\n", 152 | "chain = (\n", 153 | " {\"context\": retriever, \"question\": RunnablePassthrough()}\n", 154 | " | prompt\n", 155 | " | ChatOpenAI(temperature=0, model=\"gpt-4-1106-preview\")\n", 156 | " | StrOutputParser()\n", 157 | ")\n" 158 | ], 159 | "metadata": { 160 | "id": "dMjjps1WgyGH" 161 | }, 162 | "execution_count": null, 163 | "outputs": [] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "source": [ 168 | "response = chain.invoke(\"How did Nomic Embed get trained?\")" 169 | ], 170 | "metadata": { 171 | "id": "ADYQ7q-yhhDO" 172 | }, 173 | "execution_count": null, 174 | "outputs": [] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "source": [ 179 | "response" 180 | ], 181 | "metadata": { 182 | "colab": { 183 | "base_uri": "https://localhost:8080/", 184 | "height": 183 185 | }, 186 | "id": "mdasttaxhs3s", 187 | "outputId": "cbe76cbe-0b55-4d94-e7d1-1f63af7b71cd" 188 | }, 189 | "execution_count": 18, 190 | "outputs": [ 191 | { 192 | "output_type": "execute_result", 193 | "data": { 194 | "text/plain": [ 195 | "\"Nomic Embed was trained using a multi-stage contrastive learning pipeline. The process began with a BERT initialization, specifically training their own BERT model with a 2048 token context length, named nomic-bert-2048. This model incorporated several modifications inspired by MosaicBERT, such as using Rotary Position Embeddings for context length extrapolation, employing SwiGLU activations for improved performance, setting dropout to 0, and implementing various training optimizations like using Deepspeed and FlashAttention, training in BF16 precision, increasing the vocab size to a multiple of 64, training with a large batch size of 4096, and masking at a 30% rate during masked language modeling.\\n\\nAfter establishing the nomic-bert-2048, the next phase involved contrastive training with a dataset composed of approximately 235 million text pairs. This dataset was extensively validated for quality during collection with Nomic Atlas. The details of the dataset can be found in the nomic-ai/contrastors codebase, and a subset of 5 million pairs is available for exploration in Nomic Atlas.\\n\\nThe contrastive training aimed to fine-tune the model on high-quality labeled datasets, leveraging search queries and answers from web searches, and employing data curation and hard-example mining techniques. The model's performance was evaluated on benchmarks like the GLUE benchmark, the Massive Text Embedding Benchmark (MTEB), the LoCo Benchmark, and the Jina Long Context Benchmark, where it demonstrated superior performance compared to other models like OpenAI's text-embedding-ada-002 and jina-embeddings-v2-base-en on various tasks.\"" 196 | ], 197 | "application/vnd.google.colaboratory.intrinsic+json": { 198 | "type": "string" 199 | } 200 | }, 201 | "metadata": {}, 202 | "execution_count": 18 203 | } 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "source": [], 209 | "metadata": { 210 | "id": "7xqm1DsjhtNv" 211 | }, 212 | "execution_count": null, 213 | "outputs": [] 214 | } 215 | ] 216 | } -------------------------------------------------------------------------------- /langchain_openai_gpt4o.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [], 7 | "authorship_tag": "ABX9TyP+7jG/IeRkyerZlQRrt1Ut", 8 | "include_colab_link": true 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | }, 14 | "language_info": { 15 | "name": "python" 16 | } 17 | }, 18 | "cells": [ 19 | { 20 | "cell_type": "markdown", 21 | "metadata": { 22 | "id": "view-in-github", 23 | "colab_type": "text" 24 | }, 25 | "source": [ 26 | "\"Open" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 3, 32 | "metadata": { 33 | "id": "0hbGWIPliFVc" 34 | }, 35 | "outputs": [], 36 | "source": [ 37 | "!pip install langchain_core langchain_openai -q -U" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "source": [ 43 | "from langchain_openai import ChatOpenAI\n", 44 | "from langchain_core.messages import HumanMessage" 45 | ], 46 | "metadata": { 47 | "id": "x-pHksddjSM6" 48 | }, 49 | "execution_count": 5, 50 | "outputs": [] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "source": [ 55 | "from google.colab import userdata\n", 56 | "OPENAI_API_KEY = userdata.get('OPENAI_API_KEY')\n", 57 | "MODEL_NAME = 'gpt-4o'" 58 | ], 59 | "metadata": { 60 | "id": "CkG2HdO9jsel" 61 | }, 62 | "execution_count": 19, 63 | "outputs": [] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "source": [ 68 | "llm = ChatOpenAI(model=MODEL_NAME, api_key=OPENAI_API_KEY)" 69 | ], 70 | "metadata": { 71 | "id": "lllcnOQSj2ta" 72 | }, 73 | "execution_count": 20, 74 | "outputs": [] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "source": [ 79 | "response = llm.invoke(\"What can you do?\")" 80 | ], 81 | "metadata": { 82 | "id": "_sifbHN7j-yz" 83 | }, 84 | "execution_count": 30, 85 | "outputs": [] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "source": [ 90 | "print(response.content)" 91 | ], 92 | "metadata": { 93 | "colab": { 94 | "base_uri": "https://localhost:8080/" 95 | }, 96 | "id": "AN7Ht66CkRSU", 97 | "outputId": "063f598f-25f1-4e3e-9541-9790ef913ccb" 98 | }, 99 | "execution_count": 31, 100 | "outputs": [ 101 | { 102 | "output_type": "stream", 103 | "name": "stdout", 104 | "text": [ 105 | "As an AI developed by OpenAI, I can assist you in a variety of tasks, including but not limited to:\n", 106 | "\n", 107 | "1. **Answering Questions**: I can provide answers to questions across a wide range of topics, from science, technology, and mathematics to literature, history, and general knowledge.\n", 108 | "\n", 109 | "2. **Educational Support**: I can help explain complex concepts, solve problems, and offer detailed explanations in areas such as mathematics, physics, and programming.\n", 110 | "\n", 111 | "3. **Writing and Editing Assistance**: I can help you write essays, reports, emails, and more. I can also assist with editing and improving your written content.\n", 112 | "\n", 113 | "4. **Programming Help**: I can help you understand programming concepts, debug code, or learn different programming languages like Python, Java, and more.\n", 114 | "\n", 115 | "5. **Data Analysis**: I can guide you through basic data analysis, explain statistical concepts, and help you understand machine learning principles.\n", 116 | "\n", 117 | "6. **Creative Writing**: I can assist in generating ideas, writing stories, poems, and even scripts.\n", 118 | "\n", 119 | "7. **Translation**: I can translate text between various languages, although for highly accurate or nuanced translations, especially for less common languages, professional human translation might be preferable.\n", 120 | "\n", 121 | "8. **Health and Fitness Advice**: While I can provide general information about health and fitness, it's important to consult with a professional for personalized advice.\n", 122 | "\n", 123 | "9. **Entertainment Recommendations**: I can suggest books, movies, music, games, and other forms of entertainment based on your interests.\n", 124 | "\n", 125 | "10. **Travel and Geography**: I can provide travel advice, including details about destinations, travel tips, and cultural information.\n", 126 | "\n", 127 | "11. **Cooking and Recipes**: I can help you find recipes, explain cooking techniques, and provide tips on food preparation.\n", 128 | "\n", 129 | "12. **Personal Finance Advice**: I can offer general advice and information on topics like budgeting, saving, and investing, but for personalized financial advice, consulting with a professional is recommended.\n", 130 | "\n", 131 | "I'm here to help with a broad spectrum of inquiries and tasks, leveraging a large database of knowledge that can be useful in many different scenarios.\n" 132 | ] 133 | } 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "source": [ 139 | "human_message = HumanMessage(content=[\n", 140 | " { \"type\": \"text\", \"text\": \"Solve all the math equations in the image\" },\n", 141 | " { \"type\": \"image_url\", \"image_url\": { \"url\": \"https://mathswithdavid.files.wordpress.com/2021/01/image-157.png?w=1024\" }}\n", 142 | "])\n", 143 | "response = llm.invoke([ human_message ])" 144 | ], 145 | "metadata": { 146 | "id": "UGFV8HxdmJ_S" 147 | }, 148 | "execution_count": 33, 149 | "outputs": [] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "source": [ 154 | "print(response.content)" 155 | ], 156 | "metadata": { 157 | "colab": { 158 | "base_uri": "https://localhost:8080/" 159 | }, 160 | "id": "sPZb9mrdmpwx", 161 | "outputId": "8843e55f-82d9-4b3d-f615-b3fd5f4fbf63" 162 | }, 163 | "execution_count": 35, 164 | "outputs": [ 165 | { 166 | "output_type": "stream", 167 | "name": "stdout", 168 | "text": [ 169 | "Let's solve each equation step-by-step:\n", 170 | "\n", 171 | "**(a) 2x + 3 = 9**\n", 172 | "\\[ 2x = 9 - 3 \\]\n", 173 | "\\[ 2x = 6 \\]\n", 174 | "\\[ x = 3 \\]\n", 175 | "\n", 176 | "**(b) 3w - 1 = 14**\n", 177 | "\\[ 3w = 14 + 1 \\]\n", 178 | "\\[ 3w = 15 \\]\n", 179 | "\\[ w = 5 \\]\n", 180 | "\n", 181 | "**(c) 7y + 2 = 30**\n", 182 | "\\[ 7y = 30 - 2 \\]\n", 183 | "\\[ 7y = 28 \\]\n", 184 | "\\[ y = 4 \\]\n", 185 | "\n", 186 | "**(d) 5x + 20 = 35**\n", 187 | "\\[ 5x = 35 - 20 \\]\n", 188 | "\\[ 5x = 15 \\]\n", 189 | "\\[ x = 3 \\]\n", 190 | "\n", 191 | "**(e) 6c - 12 = 48**\n", 192 | "\\[ 6c = 48 + 12 \\]\n", 193 | "\\[ 6c = 60 \\]\n", 194 | "\\[ c = 10 \\]\n", 195 | "\n", 196 | "**(f) 8m - 4 = 20**\n", 197 | "\\[ 8m = 20 + 4 \\]\n", 198 | "\\[ 8m = 24 \\]\n", 199 | "\\[ m = 3 \\]\n", 200 | "\n", 201 | "**(g) 7w + 13 = 90**\n", 202 | "\\[ 7w = 90 - 13 \\]\n", 203 | "\\[ 7w = 77 \\]\n", 204 | "\\[ w = 11 \\]\n", 205 | "\n", 206 | "**(h) 12p - 18 = 30**\n", 207 | "\\[ 12p = 30 + 18 \\]\n", 208 | "\\[ 12p = 48 \\]\n", 209 | "\\[ p = 4 \\]\n", 210 | "\n", 211 | "**(i) 9w - 5 = 67**\n", 212 | "\\[ 9w = 67 + 5 \\]\n", 213 | "\\[ 9w = 72 \\]\n", 214 | "\\[ w = 8 \\]\n", 215 | "\n", 216 | "**(j) 10a + 40 = 100**\n", 217 | "\\[ 10a = 100 - 40 \\]\n", 218 | "\\[ 10a = 60 \\]\n", 219 | "\\[ a = 6 \\]\n", 220 | "\n", 221 | "**(k) 9x - 24 = 84**\n", 222 | "\\[ 9x = 84 + 24 \\]\n", 223 | "\\[ 9x = 108 \\]\n", 224 | "\\[ x = 12 \\]\n", 225 | "\n", 226 | "**(l) 7w + 1 = 1**\n", 227 | "\\[ 7w = 1 - 1 \\]\n", 228 | "\\[ 7w = 0 \\]\n", 229 | "\\[ w = 0 \\]\n", 230 | "\n", 231 | "**(m) 6x - 19 = 5**\n", 232 | "\\[ 6x = 5 + 19 \\]\n", 233 | "\\[ 6x = 24 \\]\n", 234 | "\\[ x = 4 \\]\n", 235 | "\n", 236 | "**(n) 3w + 4 = 43**\n", 237 | "\\[ 3w = 43 - 4 \\]\n", 238 | "\\[ 3w = 39 \\]\n", 239 | "\\[ w = 13 \\]\n", 240 | "\n", 241 | "**(o) \\frac{x}{3} + 1 = 5**\n", 242 | "\\[ \\frac{x}{3} = 5 - 1 \\]\n", 243 | "\\[ \\frac{x}{3} = 4 \\]\n", 244 | "\\[ x = 12 \\]\n", 245 | "\n", 246 | "**(p) \\frac{c}{2} - 4 = 6**\n", 247 | "\\[ \\frac{c}{2} = 6 + 4 \\]\n", 248 | "\\[ \\frac{c}{2} = 10 \\]\n", 249 | "\\[ c = 20 \\]\n", 250 | "\n", 251 | "**(q) \\frac{x}{10} + 3 = 9**\n", 252 | "\\[ \\frac{x}{10} = 9 - 3 \\]\n", 253 | "\\[ \\frac{x}{10} = 6 \\]\n", 254 | "\\[ x = 60 \\]\n", 255 | "\n", 256 | "**(r) \\frac{n}{9} - 8 = 1**\n", 257 | "\\[ \\frac{n}{9} = 1 + 8 \\]\n", 258 | "\\[ \\frac{n}{9} = 9 \\]\n", 259 | "\\[ n = 81 \\]\n", 260 | "\n", 261 | "These are the values of the variables for each equation.\n" 262 | ] 263 | } 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "source": [ 269 | "human_message = HumanMessage(content=[\n", 270 | " { \"type\": \"text\", \"text\": \"What shops are there in the image? How many people do you observe? What are they doing?\" },\n", 271 | " { \"type\": \"image_url\", \"image_url\": { \"url\": \"https://images.unsplash.com/photo-1715514894643-aedb49942811?q=80&w=3387&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D\" }}\n", 272 | "])\n", 273 | "response = llm.invoke([ human_message ])" 274 | ], 275 | "metadata": { 276 | "id": "TA4inoE-pE00" 277 | }, 278 | "execution_count": 36, 279 | "outputs": [] 280 | }, 281 | { 282 | "cell_type": "code", 283 | "source": [ 284 | "print(response.content)" 285 | ], 286 | "metadata": { 287 | "colab": { 288 | "base_uri": "https://localhost:8080/" 289 | }, 290 | "id": "8WbSLsEbpFyb", 291 | "outputId": "3983da8e-76fa-437e-e7aa-b604a25edc2e" 292 | }, 293 | "execution_count": 37, 294 | "outputs": [ 295 | { 296 | "output_type": "stream", 297 | "name": "stdout", 298 | "text": [ 299 | "In the image, there are several shops, prominently featuring restaurants with Japanese signage. The shops include a Tonchin ramen restaurant identifiable by its yellow sign and a restaurant with a large sign featuring a chicken, possibly indicating a specialty in chicken dishes.\n", 300 | "\n", 301 | "There are at least five visible people in the image. Four people are inside the restaurant on the ground floor, seated and appearing to be eating or interacting with each other at the tables. Another person can be seen in a window above, likely a staff member or a chef, possibly preparing food or overseeing the kitchen.\n", 302 | "\n", 303 | "The scene captures a vibrant street life, typical of urban dining areas in Japan, with a focus on culinary experiences and social interactions.\n" 304 | ] 305 | } 306 | ] 307 | } 308 | ] 309 | } -------------------------------------------------------------------------------- /langgraph_nodes_edges.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [], 7 | "authorship_tag": "ABX9TyOcMk1b3oNBRsuZ7B6aSsFY", 8 | "include_colab_link": true 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | }, 14 | "language_info": { 15 | "name": "python" 16 | } 17 | }, 18 | "cells": [ 19 | { 20 | "cell_type": "markdown", 21 | "metadata": { 22 | "id": "view-in-github", 23 | "colab_type": "text" 24 | }, 25 | "source": [ 26 | "\"Open" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 81, 32 | "metadata": { 33 | "id": "--5ZK7q9yi0l" 34 | }, 35 | "outputs": [], 36 | "source": [ 37 | "!pip install -q -U langchain langchain_openai langgraph google-search-results" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "source": [ 43 | "import os\n", 44 | "from google.colab import userdata" 45 | ], 46 | "metadata": { 47 | "id": "uwEe2bRl2OrV" 48 | }, 49 | "execution_count": 82, 50 | "outputs": [] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "source": [ 55 | "os.environ['SERPAPI_API_KEY'] = userdata.get('GOOGLE_API_KEY')\n", 56 | "os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')\n", 57 | "os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n", 58 | "os.environ[\"LANGCHAIN_PROJECT\"] = \"LangGraph\"\n", 59 | "os.environ[\"LANGCHAIN_API_KEY\"] = userdata.get('LANGSMITH_API_KEY')" 60 | ], 61 | "metadata": { 62 | "id": "TOLrjVQJ2Iyq" 63 | }, 64 | "execution_count": 83, 65 | "outputs": [] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "source": [ 70 | "from langchain_community.utilities import SerpAPIWrapper" 71 | ], 72 | "metadata": { 73 | "id": "7k4vCN42zAmb" 74 | }, 75 | "execution_count": 84, 76 | "outputs": [] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "source": [ 81 | "search = SerpAPIWrapper()\n", 82 | "\n", 83 | "search.run(\"Obama's first name?\")" 84 | ], 85 | "metadata": { 86 | "colab": { 87 | "base_uri": "https://localhost:8080/", 88 | "height": 36 89 | }, 90 | "id": "pI2sLQxlzDOK", 91 | "outputId": "c8065154-fe4a-4602-dcf8-4213d0c04fbc" 92 | }, 93 | "execution_count": 85, 94 | "outputs": [ 95 | { 96 | "output_type": "execute_result", 97 | "data": { 98 | "text/plain": [ 99 | "'Barack Hussein Obama II'" 100 | ], 101 | "application/vnd.google.colaboratory.intrinsic+json": { 102 | "type": "string" 103 | } 104 | }, 105 | "metadata": {}, 106 | "execution_count": 85 107 | } 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "source": [ 113 | "import functools, operator, requests, os, json\n", 114 | "from langchain.agents import AgentExecutor, create_openai_tools_agent\n", 115 | "from langchain_core.messages import BaseMessage, HumanMessage\n", 116 | "from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser\n", 117 | "from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder\n", 118 | "from langgraph.graph import StateGraph, END\n", 119 | "from langchain.tools import tool\n", 120 | "from langchain_openai import ChatOpenAI\n", 121 | "from typing import Annotated, Any, Dict, List, Optional, Sequence, TypedDict" 122 | ], 123 | "metadata": { 124 | "id": "tKCnieM72lBq" 125 | }, 126 | "execution_count": 86, 127 | "outputs": [] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "source": [ 132 | "llm = ChatOpenAI(model=\"gpt-4-turbo-preview\")" 133 | ], 134 | "metadata": { 135 | "id": "KEyXHFis2fC2" 136 | }, 137 | "execution_count": 87, 138 | "outputs": [] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "source": [ 143 | "from langchain_core.messages import (\n", 144 | " AIMessage,\n", 145 | " BaseMessage,\n", 146 | " ChatMessage,\n", 147 | " FunctionMessage,\n", 148 | " HumanMessage,\n", 149 | " SystemMessage\n", 150 | ")\n", 151 | "\n", 152 | "@tool(\"web_search\")\n", 153 | "def web_search(query: str) -> str:\n", 154 | " \"\"\"Search with Google SERP API by a query\"\"\"\n", 155 | " search = SerpAPIWrapper()\n", 156 | " return search.run(query)\n", 157 | "\n", 158 | "@tool(\"twitter_writer\")\n", 159 | "def write_tweet(content: str) -> str:\n", 160 | " \"\"\"Based a piece of content, write a tweet.\"\"\"\n", 161 | " chat = ChatOpenAI()\n", 162 | " messages = [\n", 163 | " SystemMessage(\n", 164 | " content=\"You are a Twitter account operator.\"\n", 165 | " \" You are responsible for writing a tweet based on the content given.\"\n", 166 | " \" You should follow the Twitter policy and make sure each tweet has no more than 140 characters.\"\n", 167 | " ),\n", 168 | " HumanMessage(\n", 169 | " content=content\n", 170 | " ),\n", 171 | " ]\n", 172 | " response = chat(messages)\n", 173 | " return response.content" 174 | ], 175 | "metadata": { 176 | "id": "fRUawUK622xO" 177 | }, 178 | "execution_count": 88, 179 | "outputs": [] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "source": [ 184 | "class AgentState(TypedDict):\n", 185 | " # The annotation tells the graph that new messages will always\n", 186 | " # be added to the current states\n", 187 | " messages: Annotated[Sequence[BaseMessage], operator.add]\n", 188 | " # The 'next' field indicates where to route to next\n", 189 | " next: str" 190 | ], 191 | "metadata": { 192 | "id": "dX9Q74L8IK8I" 193 | }, 194 | "execution_count": 90, 195 | "outputs": [] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "source": [ 200 | "def create_agent(llm: ChatOpenAI, tools: list, system_prompt: str):\n", 201 | " prompt = ChatPromptTemplate.from_messages(\n", 202 | " [\n", 203 | " (\n", 204 | " \"system\",\n", 205 | " system_prompt,\n", 206 | " ),\n", 207 | " MessagesPlaceholder(variable_name=\"messages\"),\n", 208 | " MessagesPlaceholder(variable_name=\"agent_scratchpad\"),\n", 209 | " ]\n", 210 | " )\n", 211 | " agent = create_openai_tools_agent(llm, tools, prompt)\n", 212 | " executor = AgentExecutor(agent=agent, tools=tools)\n", 213 | " return executor\n", 214 | "\n", 215 | "def agent_node(state, agent, name):\n", 216 | " result = agent.invoke(state)\n", 217 | " return {\"messages\": [HumanMessage(content=result[\"output\"], name=name)]}" 218 | ], 219 | "metadata": { 220 | "id": "t_qXl_Hz49KR" 221 | }, 222 | "execution_count": 91, 223 | "outputs": [] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "source": [ 228 | "members = [\"Search_Engine\", \"Twitter_Writer\"]\n", 229 | "system_prompt = (\n", 230 | " \"You are a supervisor tasked with managing a conversation between the\"\n", 231 | " \" following workers: {members}. Given the following user request,\"\n", 232 | " \" respond with the worker to act next. Each worker will perform a\"\n", 233 | " \" task and respond with their results and status. When finished,\"\n", 234 | " \" respond with FINISH.\"\n", 235 | ")\n", 236 | "\n", 237 | "options = [\"FINISH\"] + members\n", 238 | "# Using openai function calling can make output parsing easier for us\n", 239 | "function_def = {\n", 240 | " \"name\": \"route\",\n", 241 | " \"description\": \"Select the next role.\",\n", 242 | " \"parameters\": {\n", 243 | " \"title\": \"routeSchema\",\n", 244 | " \"type\": \"object\",\n", 245 | " \"properties\": {\n", 246 | " \"next\": {\n", 247 | " \"title\": \"Next\",\n", 248 | " \"anyOf\": [\n", 249 | " {\"enum\": options},\n", 250 | " ],\n", 251 | " }\n", 252 | " },\n", 253 | " \"required\": [\"next\"],\n", 254 | " },\n", 255 | "}\n", 256 | "prompt = ChatPromptTemplate.from_messages(\n", 257 | " [\n", 258 | " (\"system\", system_prompt),\n", 259 | " MessagesPlaceholder(variable_name=\"messages\"),\n", 260 | " (\n", 261 | " \"system\",\n", 262 | " \"Given the conversation above, who should act next?\"\n", 263 | " \" Or should we FINISH? Select one of: {options}\",\n", 264 | " ),\n", 265 | " ]\n", 266 | ").partial(options=str(options), members=\", \".join(members))\n", 267 | "\n", 268 | "supervisor_chain = (\n", 269 | " prompt\n", 270 | " | llm.bind_functions(functions=[function_def], function_call=\"route\")\n", 271 | " | JsonOutputFunctionsParser()\n", 272 | ")" 273 | ], 274 | "metadata": { 275 | "id": "5KECklFd5z1J" 276 | }, 277 | "execution_count": 92, 278 | "outputs": [] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "source": [ 283 | "search_engine_agent = create_agent(llm, [web_search], \"You are a web search engine.\")\n", 284 | "search_engine_node = functools.partial(agent_node, agent=search_engine_agent, name=\"Search_Engine\")\n", 285 | "\n", 286 | "twitter_operator_agent = create_agent(llm, [write_tweet], \"You are responsible for writing a tweet based on the content given.\")\n", 287 | "twitter_operator_node = functools.partial(agent_node, agent=twitter_operator_agent, name=\"Twitter_Writer\")\n", 288 | "\n", 289 | "workflow = StateGraph(AgentState)\n", 290 | "workflow.add_node(\"Search_Engine\", search_engine_node)\n", 291 | "workflow.add_node(\"Twitter_Writer\", twitter_operator_node)\n", 292 | "workflow.add_node(\"supervisor\", supervisor_chain)" 293 | ], 294 | "metadata": { 295 | "id": "vZID5tQv8E3n" 296 | }, 297 | "execution_count": 93, 298 | "outputs": [] 299 | }, 300 | { 301 | "cell_type": "code", 302 | "source": [ 303 | "for member in members:\n", 304 | " workflow.add_edge(member, \"supervisor\")\n", 305 | "\n", 306 | "conditional_map = {k: k for k in members}\n", 307 | "conditional_map[\"FINISH\"] = END\n", 308 | "workflow.add_conditional_edges(\"supervisor\", lambda x: x[\"next\"], conditional_map)\n", 309 | "\n", 310 | "workflow.set_entry_point(\"supervisor\")\n", 311 | "\n", 312 | "graph = workflow.compile()" 313 | ], 314 | "metadata": { 315 | "id": "LWBp9K739gt8" 316 | }, 317 | "execution_count": 94, 318 | "outputs": [] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "source": [ 323 | "for s in graph.stream(\n", 324 | " {\n", 325 | " \"messages\": [\n", 326 | " HumanMessage(content=\"Write a tweet about LangChain news\")\n", 327 | " ]\n", 328 | " }\n", 329 | "):\n", 330 | " if \"__end__\" not in s:\n", 331 | " print(s)\n", 332 | " print(\"----\")" 333 | ], 334 | "metadata": { 335 | "colab": { 336 | "base_uri": "https://localhost:8080/" 337 | }, 338 | "id": "e9qi4pAX96Tf", 339 | "outputId": "f801546e-8406-48ab-cd35-bb567df5a5b2" 340 | }, 341 | "execution_count": 95, 342 | "outputs": [ 343 | { 344 | "output_type": "stream", 345 | "name": "stdout", 346 | "text": [ 347 | "{'supervisor': {'next': 'Search_Engine'}}\n", 348 | "----\n", 349 | "{'Search_Engine': {'messages': [HumanMessage(content=\"🚀 Exciting news from LangChain! 🌟\\n\\nWe just launched LangGraph, a revolutionary tool to customize your Agent Runtime, marking a significant milestone in our journey. Also, we're thrilled to announce the release of langchain 0.1.0, our first stable version that's fully backward compatible. 🎉\\n\\nStay tuned for more updates on how we're transforming the AI ecosystem. #LangChain #Innovation #AI\\n\\n[Week of 1/22/24]\", name='Search_Engine')]}}\n", 350 | "----\n", 351 | "{'supervisor': {'next': 'Twitter_Writer'}}\n", 352 | "----\n", 353 | "{'Twitter_Writer': {'messages': [HumanMessage(content='🚀 Exciting news from LangChain! 🌟 Introducing LangGraph, a customizable Agent Runtime tool, and langchain 0.1.0, our first stable release. #LangChain #Innovation #AI', name='Twitter_Writer')]}}\n", 354 | "----\n", 355 | "{'supervisor': {'next': 'FINISH'}}\n", 356 | "----\n" 357 | ] 358 | } 359 | ] 360 | }, 361 | { 362 | "cell_type": "code", 363 | "source": [], 364 | "metadata": { 365 | "id": "_SRgFulWAQ9R" 366 | }, 367 | "execution_count": 95, 368 | "outputs": [] 369 | } 370 | ] 371 | } --------------------------------------------------------------------------------