├── .gitignore
├── README.md
├── notebooks
    ├── Chapter 02 - Transformers_Architectures.ipynb
    ├── Chapter 02 - Understanding_Transformer.ipynb
    ├── Chapter 04 - Intro_to_Prompt_Engineering_Tips_and_Tricks.ipynb
    ├── Chapter 05 - Build_a_News_Articles_Summarizer.ipynb
    ├── Chapter 05 - Building a basic RAG pipeline  from scratch.ipynb
    ├── Chapter 05 - Building_Applications_Powered_by_LLMs_with_LangChain.ipynb
    ├── Chapter 05 - LlamaIndex_Introduction.ipynb
    ├── Chapter 06 - Creating_Knowledge_Graphs_from_Textual_Data_Unveiling_Hidden_Connections.ipynb
    ├── Chapter 06 - Getting_the_Best_of_Few_Shot_Prompts_and_Example_Selectors.ipynb
    ├── Chapter 06 - Improving_Our_News_Articles_Summarizer.ipynb
    ├── Chapter 06 - Managing_Outputs_with_Output_Parsers.ipynb
    ├── Chapter 06 - Using_Prompt_Templates.ipynb
    ├── Chapter 07 - Chains_and_Why_They_Are_Used.ipynb
    ├── Chapter 07 - Create_a_YouTube_Video_Summarizer_Using_Whisper_and_LangChain_.ipynb
    ├── Chapter 07 - Guarding_Against_Undesirable_Outputs_with_the_Self_Critique_Chain.ipynb
    ├── Chapter 07 - Guarding_Against_Undesirable_Outputs_with_the_Self_Critique_Chain_Example.ipynb
    ├── Chapter 07 - What_are_Text_Splitters_and_Why_They_are_Useful_.ipynb
    ├── Chapter 08 - LangSmith_Introduction.ipynb
    ├── Chapter 08 - Mastering_Advanced_RAG.ipynb
    ├── Chapter 08 - RAG_Metrics&Evaluation.ipynb
    ├── Chapter 09 - Building Agents for Analysis Report Creation.ipynb
    ├── Chapter 09 - Building Agents with OpenAI Assistants.ipynb
    ├── Chapter 09 - LlamaIndex_RAG_AGENT.ipynb
    ├── Chapter 09 - MultiModal_Fincance_+_DeepMemory.ipynb
    ├── Chapter 09 - Query and Zummarize a DB with LlamaIndex.ipynb
    ├── Chapter 09 - Using_AutoGPT_with_LangChain.ipynb
    ├── Chapter 10 - Create_Dataset_For_Cohere_Fine_Tuning.ipynb
    ├── Chapter 10 - FineTune_RLHF.ipynb
    ├── Chapter 10 - FineTuning_Reward_Model.ipynb
    ├── Chapter 10 - FineTuning_a_LLM_Financial_Sentiment_CPU.ipynb
    ├── Chapter 10 - FineTuning_a_LLM_LIMA_CPU.ipynb
    ├── Chapter 10 - FineTuning_a_LLM_QLoRA.ipynb
    ├── Chapter 10 - Fine_Tuning_using_Cohere_for_Medical_Data.ipynb
    └── Chapter 11 - Benchmark_Inference.ipynb
└── requirements.txt


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # ragbook-notebooks
 2 | This is a repository gathering all the notebooks for the Towards AI RAG book.
 3 | 
 4 | ## Chapter 2
 5 | - [Transformers_Architectures.ipynb](https://colab.research.google.com/github/towardsai/ragbook-notebooks/blob/main/notebooks/Chapter%2002%20-%20Transformers_Architectures.ipynb)
 6 | - [Understanding_Transformer.ipynb](https://colab.research.google.com/github/towardsai/ragbook-notebooks/blob/main/notebooks/Chapter%2002%20-%20Understanding_Transformer.ipynb)
 7 | 
 8 | ## Chapter 4
 9 | - [Intro_to_Prompt_Engineering_Tips_and_Tricks.ipynb](https://colab.research.google.com/github/towardsai/ragbook-notebooks/blob/main/notebooks/Chapter%2004%20-%20Intro_to_Prompt_Engineering_Tips_and_Tricks.ipynb)
10 | 
11 | ## Chapter 5
12 | - [Building_Applications_Powered_by_LLMs_with_LangChain.ipynb](https://colab.research.google.com/github/towardsai/ragbook-notebooks/blob/main/notebooks/Chapter%2005%20-%20Building_Applications_Powered_by_LLMs_with_LangChain.ipynb)
13 | - [Build_a_News_Articles_Summarizer.ipynb](https://colab.research.google.com/github/towardsai/ragbook-notebooks/blob/main/notebooks/Chapter%2005%20-%20Build_a_News_Articles_Summarizer.ipynb)
14 | - [LlamaIndex_Introduction.ipynb](https://colab.research.google.com/github/towardsai/ragbook-notebooks/blob/main/notebooks/Chapter%2005%20-%20LlamaIndex_Introduction.ipynb)
15 | 
16 | ## Chapter 6
17 | - [Using_Prompt_Templates.ipynb](https://colab.research.google.com/github/towardsai/ragbook-notebooks/blob/main/notebooks/Chapter%2006%20-%20Using_Prompt_Templates.ipynb)
18 | - [Getting_the_Best_of_Few_Shot_Prompts_and_Example_Selectors.ipynb](https://colab.research.google.com/github/towardsai/ragbook-notebooks/blob/main/notebooks/Chapter%2006%20-%20Getting_the_Best_of_Few_Shot_Prompts_and_Example_Selectors.ipynb)
19 | - [Managing_Outputs_with_Output_Parsers.ipynb](https://colab.research.google.com/github/towardsai/ragbook-notebooks/blob/main/notebooks/Chapter%2006%20-%20Managing_Outputs_with_Output_Parsers.ipynb)
20 | - [Improving_Our_News_Articles_Summarizer.ipynb](https://colab.research.google.com/github/towardsai/ragbook-notebooks/blob/main/notebooks/Chapter%2006%20-%20Improving_Our_News_Articles_Summarizer.ipynb)
21 | - [Creating_Knowledge_Graphs_from_Textual_Data_Unveiling_Hidden_Connections.ipynb](https://colab.research.google.com/github/towardsai/ragbook-notebooks/blob/main/notebooks/Chapter%2006%20-%20Creating_Knowledge_Graphs_from_Textual_Data_Unveiling_Hidden_Connections.ipynb)
22 | 
23 | ## Chapter 7
24 | - [What_are_Text_Splitters_and_Why_They_are_Useful_.ipynb](https://colab.research.google.com/github/towardsai/ragbook-notebooks/blob/main/notebooks/Chapter%2007%20-%20What_are_Text_Splitters_and_Why_They_are_Useful_.ipynb)
25 | - [Chains_and_Why_They_Are_Used.ipynb](https://colab.research.google.com/github/towardsai/ragbook-notebooks/blob/main/notebooks/Chapter%2007%20-%20Chains_and_Why_They_Are_Used.ipynb)
26 | - [Create_a_YouTube_Video_Summarizer_Using_Whisper_and_LangChain_.ipynb](https://colab.research.google.com/github/towardsai/ragbook-notebooks/blob/main/notebooks/Chapter%2007%20-%20Create_a_YouTube_Video_Summarizer_Using_Whisper_and_LangChain_.ipynb)
27 | - [Guarding_Against_Undesirable_Outputs_with_the_Self_Critique_Chain.ipynb](https://colab.research.google.com/github/towardsai/ragbook-notebooks/blob/main/notebooks/Chapter%2007%20-%20Guarding_Against_Undesirable_Outputs_with_the_Self_Critique_Chain.ipynb)
28 | - [Guarding_Against_Undesirable_Outputs_with_the_Self_Critique_Chain_Example.ipynb](https://colab.research.google.com/github/towardsai/ragbook-notebooks/blob/main/notebooks/Chapter%2007%20-%20Guarding_Against_Undesirable_Outputs_with_the_Self_Critique_Chain_Example.ipynb)
29 | 
30 | ## Chapter 8
31 | - [Mastering_Advanced_RAG.ipynb](https://colab.research.google.com/github/towardsai/ragbook-notebooks/blob/main/notebooks/Chapter%2008%20-%20Mastering_Advanced_RAG.ipynb)
32 | - [RAG_Metrics&Evaluation.ipynb](https://colab.research.google.com/github/towardsai/ragbook-notebooks/blob/main/notebooks/Chapter%2008%20-%20RAG_Metrics%26Evaluation.ipynb)
33 | - [LangSmith_Introduction.ipynb](https://colab.research.google.com/github/towardsai/ragbook-notebooks/blob/main/notebooks/Chapter%2008%20-%20LangSmith_Introduction.ipynb)
34 | 
35 | ## Chapter 9
36 | - [Building Agents for Analysis Report Creation](https://colab.research.google.com/github/towardsai/ragbook-notebooks/blob/main/notebooks/Chapter%2009%20-%20Building%20Agents%20for%20Analysis%20Report%20Creation.ipynb)
37 | - [Query and Zummarize a DB with LlamaIndex](https://colab.research.google.com/github/towardsai/ragbook-notebooks/blob/main/notebooks/Chapter%2009%20-%20Query%20and%20Zummarize%20a%20DB%20with%20LlamaIndex.ipynb)
38 | - [Building Agents with OpenAI Assistants](https://colab.research.google.com/github/towardsai/ragbook-notebooks/blob/main/notebooks/Chapter%2009%20-%20Building%20Agents%20with%20OpenAI%20Assistants.ipynb)
39 | - [Using_AutoGPT_with_LangChain.ipynb](https://colab.research.google.com/github/towardsai/ragbook-notebooks/blob/main/notebooks/Chapter%2009%20-%20Using_AutoGPT_with_LangChain.ipynb)
40 | - [LlamaIndex_RAG_AGENT.ipynb](https://colab.research.google.com/github/towardsai/ragbook-notebooks/blob/main/notebooks/Chapter%2009%20-%20LlamaIndex_RAG_AGENT.ipynb)
41 | - [MultiModal_Fincance_+_DeepMemory.ipynb](https://colab.research.google.com/github/towardsai/ragbook-notebooks/blob/main/notebooks/Chapter%2009%20-%20MultiModal_Fincance_%2B_DeepMemory.ipynb)
42 | 
43 | ## Chapter 10
44 | - [FineTuning_a_LLM_LIMA_CPU.ipynb](https://colab.research.google.com/github/towardsai/ragbook-notebooks/blob/main/notebooks/Chapter%2010%20-%20FineTuning_a_LLM_LIMA_CPU.ipynb)
45 | - [FineTuning_a_LLM_Financial_Sentiment_CPU.ipynb](https://colab.research.google.com/github/towardsai/ragbook-notebooks/blob/main/notebooks/Chapter%2010%20-%20FineTuning_a_LLM_Financial_Sentiment_CPU.ipynb)
46 | - [Create_Dataset_For_Cohere_Fine_Tuning.ipynb](https://colab.research.google.com/github/towardsai/ragbook-notebooks/blob/main/notebooks/Chapter%2010%20-%20Create_Dataset_For_Cohere_Fine_Tuning.ipynb)
47 | - [Fine_Tuning_using_Cohere_for_Medical_Data.ipynb](https://colab.research.google.com/github/towardsai/ragbook-notebooks/blob/main/notebooks/Chapter%2010%20-%20Fine_Tuning_using_Cohere_for_Medical_Data.ipynb)
48 | - [FineTuning_a_LLM_QLoRA.ipynb](https://colab.research.google.com/github/towardsai/ragbook-notebooks/blob/main/notebooks/Chapter%2010%20-%20FineTuning_a_LLM_QLoRA.ipynb)
49 | - [FineTuning_Reward_Model.ipynb](https://colab.research.google.com/github/towardsai/ragbook-notebooks/blob/main/notebooks/Chapter%2010%20-%20FineTuning_Reward_Model.ipynb)
50 | - [FineTune_RLHF.ipynb](https://colab.research.google.com/github/towardsai/ragbook-notebooks/blob/main/notebooks/Chapter%2010%20-%20FineTune_RLHF.ipynb)
51 | 
52 | ## Chapter 11
53 | - [Benchmark_Inference.ipynb](https://colab.research.google.com/github/towardsai/ragbook-notebooks/blob/main/notebooks/Chapter%2011%20-%20Benchmark_Inference.ipynb)
54 | 


--------------------------------------------------------------------------------
/notebooks/Chapter 05 - Build_a_News_Articles_Summarizer.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "metadata": {
  6 |         "colab_type": "text",
  7 |         "id": "view-in-github"
  8 |       },
  9 |       "source": [
 10 |         "<a href=\"https://colab.research.google.com/github/towardsai/ragbook-notebooks/blob/main/notebooks/Chapter%2005%20-%20Build_a_News_Articles_Summarizer.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 11 |       ]
 12 |     },
 13 |     {
 14 |       "cell_type": "code",
 15 |       "execution_count": null,
 16 |       "metadata": {
 17 |         "id": "YQVIcL2LWULJ"
 18 |       },
 19 |       "outputs": [],
 20 |       "source": [
 21 |         "!pip install -q langchain==0.0.208 openai==0.27.8 python-dotenv newspaper3k"
 22 |       ]
 23 |     },
 24 |     {
 25 |       "cell_type": "code",
 26 |       "execution_count": null,
 27 |       "metadata": {
 28 |         "colab": {
 29 |           "base_uri": "https://localhost:8080/"
 30 |         },
 31 |         "id": "bDuYoCMhWW_x",
 32 |         "outputId": "a766448f-3810-43d7-acc2-c17f1ce61c50"
 33 |       },
 34 |       "outputs": [
 35 |         {
 36 |           "data": {
 37 |             "text/plain": [
 38 |               "True"
 39 |             ]
 40 |           },
 41 |           "execution_count": 2,
 42 |           "metadata": {},
 43 |           "output_type": "execute_result"
 44 |         }
 45 |       ],
 46 |       "source": [
 47 |         "from dotenv import load_dotenv\n",
 48 |         "\n",
 49 |         "!echo \"OPENAI_API_KEY='<OPENAI_API_KEY>'\" > .env\n",
 50 |         "\n",
 51 |         "load_dotenv()"
 52 |       ]
 53 |     },
 54 |     {
 55 |       "cell_type": "code",
 56 |       "execution_count": null,
 57 |       "metadata": {
 58 |         "colab": {
 59 |           "base_uri": "https://localhost:8080/"
 60 |         },
 61 |         "id": "_6rH09GpWyP_",
 62 |         "outputId": "566117d6-04d6-494d-e234-b14c582e3d1d"
 63 |       },
 64 |       "outputs": [
 65 |         {
 66 |           "name": "stdout",
 67 |           "output_type": "stream",
 68 |           "text": [
 69 |             "Title: Meta claims its new AI supercomputer will set records\n",
 70 |             "Text: Ryan is a senior editor at TechForge Media with over a decade of experience covering the latest technology and interviewing leading industry figures. He can often be sighted at tech conferences with a strong coffee in one hand and a laptop in the other. If it's geeky, he’s probably into it. Find him on Twitter (@Gadget_Ry) or Mastodon (@gadgetry@techhub.social)\n",
 71 |             "\n",
 72 |             "Meta (formerly Facebook) has unveiled an AI supercomputer that it claims will be the world’s fastest.\n",
 73 |             "\n",
 74 |             "The supercomputer is called the AI Research SuperCluster (RSC) and is yet to be fully complete. However, Meta’s researchers have already begun using it for training large natural language processing (NLP) and computer vision models.\n",
 75 |             "\n",
 76 |             "RSC is set to be fully built in mid-2022. Meta says that it will be the fastest in the world once complete and the aim is for it to be capable of training models with trillions of parameters.\n",
 77 |             "\n",
 78 |             "“We hope RSC will help us build entirely new AI systems that can, for example, power real-time voice translations to large groups of people, each speaking a different language, so they can seamlessly collaborate on a research project or play an AR game together,” wrote Meta in a blog post.\n",
 79 |             "\n",
 80 |             "“Ultimately, the work done with RSC will pave the way toward building technologies for the next major computing platform — the metaverse, where AI-driven applications and products will play an important role.”\n",
 81 |             "\n",
 82 |             "For production, Meta expects RSC will be 20x faster than Meta’s current V100-based clusters. RSC is also estimated to be 9x faster at running the NVIDIA Collective Communication Library (NCCL) and 3x faster at training large-scale NLP workflows.\n",
 83 |             "\n",
 84 |             "A model with tens of billions of parameters can finish training in three weeks compared with nine weeks prior to RSC.\n",
 85 |             "\n",
 86 |             "Meta says that its previous AI research infrastructure only leveraged open source and other publicly-available datasets. RSC was designed with the security and privacy controls in mind to allow Meta to use real-world examples from its production systems in production training.\n",
 87 |             "\n",
 88 |             "What this means in practice is that Meta can use RSC to advance research for vital tasks such as identifying harmful content on its platforms—using real data from them.\n",
 89 |             "\n",
 90 |             "“We believe this is the first time performance, reliability, security, and privacy have been tackled at such a scale,” says Meta.\n",
 91 |             "\n",
 92 |             "(Image Credit: Meta)\n",
 93 |             "\n",
 94 |             "Want to learn more about AI and big data from industry leaders? Check out AI & Big Data Expo. The next events in the series will be held in Santa Clara on 11-12 May 2022, Amsterdam on 20-21 September 2022, and London on 1-2 December 2022.\n",
 95 |             "\n",
 96 |             "Explore other upcoming enterprise technology events and webinars powered by TechForge here.\n"
 97 |           ]
 98 |         }
 99 |       ],
100 |       "source": [
101 |         "import requests\n",
102 |         "from newspaper import Article\n",
103 |         "\n",
104 |         "headers = {\n",
105 |         "    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36'\n",
106 |         "}\n",
107 |         "\n",
108 |         "article_urls = \"https://www.artificialintelligence-news.com/2022/01/25/meta-claims-new-ai-supercomputer-will-set-records/\"\n",
109 |         "\n",
110 |         "session = requests.Session()\n",
111 |         "\n",
112 |         "try:\n",
113 |         "    response = session.get(article_urls, headers=headers, timeout=10)\n",
114 |         "\n",
115 |         "    if response.status_code == 200:\n",
116 |         "        article = Article(article_urls)\n",
117 |         "        article.download()\n",
118 |         "        article.parse()\n",
119 |         "\n",
120 |         "        print(f\"Title: {article.title}\")\n",
121 |         "        print(f\"Text: {article.text}\")\n",
122 |         "\n",
123 |         "    else:\n",
124 |         "        print(f\"Failed to fetch article at {article_urls}\")\n",
125 |         "except Exception as e:\n",
126 |         "    print(f\"Error occurred while fetching article at {article_urls}: {e}\")"
127 |       ]
128 |     },
129 |     {
130 |       "cell_type": "code",
131 |       "execution_count": null,
132 |       "metadata": {
133 |         "id": "-neGI_O-WyH5"
134 |       },
135 |       "outputs": [],
136 |       "source": [
137 |         "from langchain.schema import (\n",
138 |         "    HumanMessage\n",
139 |         ")\n",
140 |         "\n",
141 |         "# we get the article data from the scraping part\n",
142 |         "article_title = article.title\n",
143 |         "article_text = article.text\n",
144 |         "\n",
145 |         "# prepare template for prompt\n",
146 |         "template = \"\"\"You are a very good assistant that summarizes online articles.\n",
147 |         "\n",
148 |         "Here's the article you want to summarize.\n",
149 |         "\n",
150 |         "==================\n",
151 |         "Title: {article_title}\n",
152 |         "\n",
153 |         "{article_text}\n",
154 |         "==================\n",
155 |         "\n",
156 |         "Write a summary of the previous article.\n",
157 |         "\"\"\"\n",
158 |         "\n",
159 |         "prompt = template.format(article_title=article.title, article_text=article.text)\n",
160 |         "\n",
161 |         "messages = [HumanMessage(content=prompt)]"
162 |       ]
163 |     },
164 |     {
165 |       "cell_type": "code",
166 |       "execution_count": null,
167 |       "metadata": {
168 |         "id": "UYRzjToAXDUe"
169 |       },
170 |       "outputs": [],
171 |       "source": [
172 |         "from langchain.chat_models import ChatOpenAI\n",
173 |         "\n",
174 |         "# load the model\n",
175 |         "chat = ChatOpenAI(model_name=\"gpt-4-turbo\", temperature=0)"
176 |       ]
177 |     },
178 |     {
179 |       "cell_type": "code",
180 |       "execution_count": null,
181 |       "metadata": {
182 |         "colab": {
183 |           "base_uri": "https://localhost:8080/"
184 |         },
185 |         "id": "hVMxNufYXMek",
186 |         "outputId": "05d74e4c-d8cf-47e4-d2a2-1510c8fd1687"
187 |       },
188 |       "outputs": [
189 |         {
190 |           "name": "stdout",
191 |           "output_type": "stream",
192 |           "text": [
193 |             "Meta, formerly known as Facebook, has announced the development of an AI supercomputer called the AI Research SuperCluster (RSC). The supercomputer is expected to be completed by mid-2022 and aims to be the world's fastest, capable of training models with trillions of parameters. Meta's researchers are already using RSC for training large natural language processing and computer vision models. The company hopes that RSC will help build new AI systems for real-time voice translations and contribute to the development of the metaverse. RSC is designed with security and privacy controls, allowing Meta to use real-world examples from its production systems for training.\n"
194 |           ]
195 |         }
196 |       ],
197 |       "source": [
198 |         "# generate summary\n",
199 |         "summary = chat(messages)\n",
200 |         "print(summary.content)"
201 |       ]
202 |     },
203 |     {
204 |       "cell_type": "code",
205 |       "execution_count": null,
206 |       "metadata": {
207 |         "colab": {
208 |           "base_uri": "https://localhost:8080/"
209 |         },
210 |         "id": "l3LoDUVTXNJz",
211 |         "outputId": "30c511d5-b63e-49f3-b527-11e278675338"
212 |       },
213 |       "outputs": [
214 |         {
215 |           "name": "stdout",
216 |           "output_type": "stream",
217 |           "text": [
218 |             "- Meta (formerly Facebook) unveils AI Research SuperCluster (RSC), an AI supercomputer.\n",
219 |             "- RSC is claimed to be the world's fastest once fully built in mid-2022.\n",
220 |             "- Researchers have already started using RSC for training large NLP and computer vision models.\n",
221 |             "- The supercomputer aims to train models with trillions of parameters.\n",
222 |             "- RSC will help build AI systems for real-time voice translations and metaverse applications.\n",
223 |             "- Meta expects RSC to be 20x faster than its current V100-based clusters.\n",
224 |             "- RSC is designed with security and privacy controls to use real-world examples from Meta's production systems.\n",
225 |             "- The supercomputer will advance research for tasks like identifying harmful content on Meta's platforms.\n"
226 |           ]
227 |         }
228 |       ],
229 |       "source": [
230 |         "# prepare template for prompt\n",
231 |         "template = \"\"\"You are an advanced AI assistant that summarizes online articles into bulleted lists.\n",
232 |         "\n",
233 |         "Here's the article you need to summarize.\n",
234 |         "\n",
235 |         "==================\n",
236 |         "Title: {article_title}\n",
237 |         "\n",
238 |         "{article_text}\n",
239 |         "==================\n",
240 |         "\n",
241 |         "Now, provide a summarized version of the article in a bulleted list format.\n",
242 |         "\"\"\"\n",
243 |         "\n",
244 |         "# format prompt\n",
245 |         "prompt = template.format(article_title=article.title, article_text=article.text)\n",
246 |         "\n",
247 |         "# generate summary\n",
248 |         "summary = chat([HumanMessage(content=prompt)])\n",
249 |         "print(summary.content)"
250 |       ]
251 |     },
252 |     {
253 |       "cell_type": "code",
254 |       "execution_count": null,
255 |       "metadata": {
256 |         "colab": {
257 |           "base_uri": "https://localhost:8080/"
258 |         },
259 |         "id": "PPwrM1PWXtBr",
260 |         "outputId": "b6910bb9-2413-4b1f-f292-76eb52bd7142"
261 |       },
262 |       "outputs": [
263 |         {
264 |           "name": "stdout",
265 |           "output_type": "stream",
266 |           "text": [
267 |             "- Meta (anciennement Facebook) dévoile un superordinateur IA qu'elle prétend être le plus rapide au monde.\n",
268 |             "- Le superordinateur s'appelle AI Research SuperCluster (RSC) et n'est pas encore totalement achevé.\n",
269 |             "- Les chercheurs de Meta l'utilisent déjà pour entraîner de grands modèles de traitement du langage naturel (NLP) et de vision par ordinateur.\n",
270 |             "- RSC devrait être entièrement construit d'ici mi-2022 et visera à entraîner des modèles avec des billions de paramètres.\n",
271 |             "- Meta espère que RSC permettra de créer de nouveaux systèmes d'IA pour des applications telles que la traduction vocale en temps réel pour des groupes de personnes parlant différentes langues.\n",
272 |             "- Pour la production, RSC devrait être 20 fois plus rapide que les clusters actuels de Meta basés sur V100.\n",
273 |             "- RSC est également estimé être 9 fois plus rapide pour exécuter la bibliothèque de communication collective NVIDIA (NCCL) et 3 fois plus rapide pour entraîner des flux de travail NLP à grande échelle.\n",
274 |             "- Un modèle avec des dizaines de milliards de paramètres peut terminer sa formation en trois semaines avec RSC, contre neuf semaines auparavant.\n",
275 |             "- Meta affirme que RSC a été conçu avec la sécurité et la confidentialité à l'esprit pour permettre d'utiliser des exemples réels de ses systèmes de production dans la formation.\n",
276 |             "- Cela signifie que Meta peut utiliser RSC pour faire progresser la recherche sur des tâches essentielles, telles que l'identification de contenus nuisibles sur ses plateformes, en utilisant des données réelles provenant de celles-ci.\n"
277 |           ]
278 |         }
279 |       ],
280 |       "source": [
281 |         "# prepare template for prompt\n",
282 |         "template = \"\"\"You are an advanced AI assistant that summarizes online articles into bulleted lists in French.\n",
283 |         "\n",
284 |         "Here's the article you need to summarize.\n",
285 |         "\n",
286 |         "==================\n",
287 |         "Title: {article_title}\n",
288 |         "\n",
289 |         "{article_text}\n",
290 |         "==================\n",
291 |         "\n",
292 |         "Now, provide a summarized version of the article in a bulleted list format, in French.\n",
293 |         "\"\"\"\n",
294 |         "\n",
295 |         "# format prompt\n",
296 |         "prompt = template.format(article_title=article.title, article_text=article.text)\n",
297 |         "\n",
298 |         "# generate summary\n",
299 |         "summary = chat([HumanMessage(content=prompt)])\n",
300 |         "print(summary.content)"
301 |       ]
302 |     },
303 |     {
304 |       "cell_type": "code",
305 |       "execution_count": null,
306 |       "metadata": {
307 |         "id": "Mrx20THKYkA4"
308 |       },
309 |       "outputs": [],
310 |       "source": []
311 |     }
312 |   ],
313 |   "metadata": {
314 |     "colab": {
315 |       "authorship_tag": "ABX9TyOE6HvJTBPWvSV1/42PcYTJ",
316 |       "include_colab_link": true,
317 |       "provenance": []
318 |     },
319 |     "kernelspec": {
320 |       "display_name": "Python 3",
321 |       "name": "python3"
322 |     },
323 |     "language_info": {
324 |       "name": "python"
325 |     }
326 |   },
327 |   "nbformat": 4,
328 |   "nbformat_minor": 0
329 | }
330 | 


--------------------------------------------------------------------------------
/notebooks/Chapter 05 - Building_Applications_Powered_by_LLMs_with_LangChain.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "metadata": {
  6 |         "colab_type": "text",
  7 |         "id": "view-in-github"
  8 |       },
  9 |       "source": [
 10 |         "<a href=\"https://colab.research.google.com/github/towardsai/ragbook-notebooks/blob/main/notebooks/Chapter%2005%20-%20Building_Applications_Powered_by_LLMs_with_LangChain.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 11 |       ]
 12 |     },
 13 |     {
 14 |       "cell_type": "code",
 15 |       "execution_count": null,
 16 |       "metadata": {
 17 |         "id": "LNoTP56OxHgr"
 18 |       },
 19 |       "outputs": [],
 20 |       "source": [
 21 |         "!pip install -q langchain==0.0.208 openai==0.27.8 python-dotenv pypdf"
 22 |       ]
 23 |     },
 24 |     {
 25 |       "cell_type": "code",
 26 |       "execution_count": null,
 27 |       "metadata": {
 28 |         "colab": {
 29 |           "base_uri": "https://localhost:8080/"
 30 |         },
 31 |         "id": "Kbp_3cG1xLZF",
 32 |         "outputId": "85a50a5b-12f9-4a8f-8180-3ca920841174"
 33 |       },
 34 |       "outputs": [
 35 |         {
 36 |           "data": {
 37 |             "text/plain": [
 38 |               "True"
 39 |             ]
 40 |           },
 41 |           "execution_count": 2,
 42 |           "metadata": {},
 43 |           "output_type": "execute_result"
 44 |         }
 45 |       ],
 46 |       "source": [
 47 |         "from dotenv import load_dotenv\n",
 48 |         "\n",
 49 |         "!echo \"OPENAI_API_KEY='<OPENAI_API_KEY>'\" > .env\n",
 50 |         "\n",
 51 |         "load_dotenv()"
 52 |       ]
 53 |     },
 54 |     {
 55 |       "cell_type": "code",
 56 |       "execution_count": null,
 57 |       "metadata": {
 58 |         "colab": {
 59 |           "base_uri": "https://localhost:8080/"
 60 |         },
 61 |         "id": "B9hBmX2axisi",
 62 |         "outputId": "96b998c4-3bbc-4f59-b621-5e4200665c76"
 63 |       },
 64 |       "outputs": [
 65 |         {
 66 |           "name": "stdout",
 67 |           "output_type": "stream",
 68 |           "text": [
 69 |             "\"Inception\" is a 2010 science fiction action film directed by Christopher Nolan. The movie stars Leonardo DiCaprio, Joseph Gordon-Levitt, Ellen Page, Tom Hardy, and Ken Watanabe. The story follows a thief who enters the dreams of others to steal their secrets. The film explores the concept of dream manipulation and features mind-bending visual effects. \"Inception\" received critical acclaim for its originality, storytelling, and visual effects. It was also a commercial success, grossing over $800 million worldwide.\n"
 70 |           ]
 71 |         }
 72 |       ],
 73 |       "source": [
 74 |         "from langchain.chat_models import ChatOpenAI\n",
 75 |         "from langchain.prompts.chat import (\n",
 76 |         "    ChatPromptTemplate,\n",
 77 |         "    SystemMessagePromptTemplate,\n",
 78 |         "    HumanMessagePromptTemplate,\n",
 79 |         ")\n",
 80 |         "\n",
 81 |         "chat = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0)\n",
 82 |         "\n",
 83 |         "template = \"You are an assistant that helps users find information about movies.\"\n",
 84 |         "system_message_prompt = SystemMessagePromptTemplate.from_template(template)\n",
 85 |         "human_template = \"Find information about the movie {movie_title}.\"\n",
 86 |         "human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)\n",
 87 |         "\n",
 88 |         "chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])\n",
 89 |         "\n",
 90 |         "response = chat(chat_prompt.format_prompt(movie_title=\"Inception\").to_messages())\n",
 91 |         "\n",
 92 |         "print(response.content)"
 93 |       ]
 94 |     },
 95 |     {
 96 |       "cell_type": "markdown",
 97 |       "metadata": {
 98 |         "id": "ct6UVdwoycvq"
 99 |       },
100 |       "source": [
101 |         "# ===="
102 |       ]
103 |     },
104 |     {
105 |       "cell_type": "code",
106 |       "execution_count": null,
107 |       "metadata": {
108 |         "id": "8Aq0Gw1Ux5E5"
109 |       },
110 |       "outputs": [],
111 |       "source": [
112 |         "# Import necessary modules\n",
113 |         "from langchain.chat_models import ChatOpenAI\n",
114 |         "from langchain import PromptTemplate\n",
115 |         "from langchain.chains.summarize import load_summarize_chain\n",
116 |         "from langchain.document_loaders import PyPDFLoader"
117 |       ]
118 |     },
119 |     {
120 |       "cell_type": "code",
121 |       "execution_count": null,
122 |       "metadata": {
123 |         "id": "HiLQa1ExPY3q"
124 |       },
125 |       "outputs": [],
126 |       "source": [
127 |         "# Initialize language model\n",
128 |         "llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0)"
129 |       ]
130 |     },
131 |     {
132 |       "cell_type": "code",
133 |       "execution_count": null,
134 |       "metadata": {
135 |         "id": "CFNqeufiyj7a"
136 |       },
137 |       "outputs": [],
138 |       "source": [
139 |         "# Load the summarization chain\n",
140 |         "summarize_chain = load_summarize_chain(llm)"
141 |       ]
142 |     },
143 |     {
144 |       "cell_type": "markdown",
145 |       "metadata": {
146 |         "id": "HWZchXOizjCq"
147 |       },
148 |       "source": [
149 |         "Download file from: https://www.cheat-sheets.org/saved-copy/The%20One%20Page%20Linux%20Manual.pdf"
150 |       ]
151 |     },
152 |     {
153 |       "cell_type": "code",
154 |       "execution_count": null,
155 |       "metadata": {
156 |         "colab": {
157 |           "base_uri": "https://localhost:8080/"
158 |         },
159 |         "id": "xdfEiS0AQPXq",
160 |         "outputId": "558e21c0-e998-4e5c-c548-55a5fd9b0e06"
161 |       },
162 |       "outputs": [
163 |         {
164 |           "name": "stdout",
165 |           "output_type": "stream",
166 |           "text": [
167 |             "--2024-01-30 16:45:28--  https://www.cheat-sheets.org/saved-copy/The%20One%20Page%20Linux%20Manual.pdf\n",
168 |             "Resolving www.cheat-sheets.org (www.cheat-sheets.org)... 90.156.201.26, 90.156.201.114, 90.156.201.28, ...\n",
169 |             "Connecting to www.cheat-sheets.org (www.cheat-sheets.org)|90.156.201.26|:443... connected.\n",
170 |             "HTTP request sent, awaiting response... 200 OK\n",
171 |             "Length: 96538 (94K) [application/pdf]\n",
172 |             "Saving to: ‘The One Page Linux Manual.pdf’\n",
173 |             "\n",
174 |             "The One Page Linux  100%[===================>]  94.28K   327KB/s    in 0.3s    \n",
175 |             "\n",
176 |             "2024-01-30 16:45:29 (327 KB/s) - ‘The One Page Linux Manual.pdf’ saved [96538/96538]\n",
177 |             "\n"
178 |           ]
179 |         }
180 |       ],
181 |       "source": [
182 |         "!wget https://www.cheat-sheets.org/saved-copy/The%20One%20Page%20Linux%20Manual.pdf"
183 |       ]
184 |     },
185 |     {
186 |       "cell_type": "code",
187 |       "execution_count": null,
188 |       "metadata": {
189 |         "id": "cC4g9vFgyqSa"
190 |       },
191 |       "outputs": [],
192 |       "source": [
193 |         "# Load the document using PyPDFLoader\n",
194 |         "document_loader = PyPDFLoader(file_path=\"./The One Page Linux Manual.pdf\")\n",
195 |         "document = document_loader.load()"
196 |       ]
197 |     },
198 |     {
199 |       "cell_type": "code",
200 |       "execution_count": null,
201 |       "metadata": {
202 |         "id": "fauNaJMbytqk"
203 |       },
204 |       "outputs": [],
205 |       "source": [
206 |         "# Summarize the document\n",
207 |         "summary = summarize_chain(document)"
208 |       ]
209 |     },
210 |     {
211 |       "cell_type": "code",
212 |       "execution_count": null,
213 |       "metadata": {
214 |         "colab": {
215 |           "base_uri": "https://localhost:8080/"
216 |         },
217 |         "id": "DUvzQ37JzP6e",
218 |         "outputId": "c28735e7-ffb9-4b3f-adf8-ce0fa10c845a"
219 |       },
220 |       "outputs": [
221 |         {
222 |           "name": "stdout",
223 |           "output_type": "stream",
224 |           "text": [
225 |             "The One Page Linux Manual provides a summary of useful Linux commands for starting and stopping the system, accessing and mounting file systems, finding files and text within files, using the X Window System, managing files, installing software, user administration, and little known tips and tricks. It also includes information on configuration files, file permissions, X shortcuts, and printing. The manual also offers a link to purchase an Official Linux Pocket Protector.\n"
226 |           ]
227 |         }
228 |       ],
229 |       "source": [
230 |         "print(summary['output_text'])"
231 |       ]
232 |     },
233 |     {
234 |       "cell_type": "markdown",
235 |       "metadata": {
236 |         "id": "5s7rtCx7zlo4"
237 |       },
238 |       "source": [
239 |         "# ====="
240 |       ]
241 |     },
242 |     {
243 |       "cell_type": "code",
244 |       "execution_count": null,
245 |       "metadata": {
246 |         "id": "2XTRA4IgzmIN"
247 |       },
248 |       "outputs": [],
249 |       "source": [
250 |         "from langchain.prompts import PromptTemplate\n",
251 |         "from langchain.chains import LLMChain\n",
252 |         "from langchain.chat_models import ChatOpenAI\n",
253 |         "\n",
254 |         "prompt = PromptTemplate(template=\"Question: {question}\\nAnswer:\", input_variables=[\"question\"])\n",
255 |         "\n",
256 |         "llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0)\n",
257 |         "chain = LLMChain(llm=llm, prompt=prompt)"
258 |       ]
259 |     },
260 |     {
261 |       "cell_type": "code",
262 |       "execution_count": null,
263 |       "metadata": {
264 |         "colab": {
265 |           "base_uri": "https://localhost:8080/",
266 |           "height": 89
267 |         },
268 |         "id": "IX0AbaAT1CpO",
269 |         "outputId": "d0a9b24a-c6b1-459e-8a6a-2e94ecb6d25c"
270 |       },
271 |       "outputs": [
272 |         {
273 |           "data": {
274 |             "application/vnd.google.colaboratory.intrinsic+json": {
275 |               "type": "string"
276 |             },
277 |             "text/plain": [
278 |               "'The meaning of life is a deeply philosophical and personal question that has been debated for centuries. Different people and cultures have different beliefs about the purpose and meaning of life. Some believe that the meaning of life is to seek happiness and fulfillment, others believe it is to serve a higher power or contribute to the greater good of humanity. Ultimately, the meaning of life is a question that each individual must grapple with and find their own answer to.'"
279 |             ]
280 |           },
281 |           "execution_count": 12,
282 |           "metadata": {},
283 |           "output_type": "execute_result"
284 |         }
285 |       ],
286 |       "source": [
287 |         "chain.run(\"what is the meaning of life?\")"
288 |       ]
289 |     },
290 |     {
291 |       "cell_type": "code",
292 |       "execution_count": null,
293 |       "metadata": {
294 |         "id": "ib-dU07u1EL_"
295 |       },
296 |       "outputs": [],
297 |       "source": []
298 |     }
299 |   ],
300 |   "metadata": {
301 |     "colab": {
302 |       "authorship_tag": "ABX9TyOz6kR5eoCYW09jDXiK0WOv",
303 |       "include_colab_link": true,
304 |       "provenance": []
305 |     },
306 |     "kernelspec": {
307 |       "display_name": "Python 3",
308 |       "name": "python3"
309 |     },
310 |     "language_info": {
311 |       "name": "python"
312 |     }
313 |   },
314 |   "nbformat": 4,
315 |   "nbformat_minor": 0
316 | }
317 | 


--------------------------------------------------------------------------------
/notebooks/Chapter 06 - Creating_Knowledge_Graphs_from_Textual_Data_Unveiling_Hidden_Connections.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "metadata": {
  6 |         "colab_type": "text",
  7 |         "id": "view-in-github"
  8 |       },
  9 |       "source": [
 10 |         "<a href=\"https://colab.research.google.com/github/towardsai/ragbook-notebooks/blob/main/notebooks/Chapter%2006%20-%20Creating_Knowledge_Graphs_from_Textual_Data_Unveiling_Hidden_Connections.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 11 |       ]
 12 |     },
 13 |     {
 14 |       "cell_type": "code",
 15 |       "execution_count": null,
 16 |       "metadata": {
 17 |         "colab": {
 18 |           "base_uri": "https://localhost:8080/"
 19 |         },
 20 |         "id": "8jFmPv7qAk-h",
 21 |         "outputId": "5444cdd2-156d-4b08-8f89-6bdc6b3956d2"
 22 |       },
 23 |       "outputs": [
 24 |         {
 25 |           "name": "stdout",
 26 |           "output_type": "stream",
 27 |           "text": [
 28 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m10.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
 29 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m73.6/73.6 kB\u001b[0m \u001b[31m3.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
 30 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m756.0/756.0 kB\u001b[0m \u001b[31m18.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
 31 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m90.0/90.0 kB\u001b[0m \u001b[31m5.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
 32 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.4/49.4 kB\u001b[0m \u001b[31m2.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
 33 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m36.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
 34 |             "\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
 35 |             "llmx 0.0.15a0 requires cohere, which is not installed.\n",
 36 |             "llmx 0.0.15a0 requires tiktoken, which is not installed.\u001b[0m\u001b[31m\n",
 37 |             "\u001b[0m"
 38 |           ]
 39 |         }
 40 |       ],
 41 |       "source": [
 42 |         "!pip install -q langchain==0.0.208 openai==0.27.8 python-dotenv pyvis"
 43 |       ]
 44 |     },
 45 |     {
 46 |       "cell_type": "code",
 47 |       "execution_count": null,
 48 |       "metadata": {
 49 |         "colab": {
 50 |           "base_uri": "https://localhost:8080/"
 51 |         },
 52 |         "id": "fdhPiWF1CbhM",
 53 |         "outputId": "3f605fe4-a6eb-43b2-b75e-abdd2fb667f0"
 54 |       },
 55 |       "outputs": [
 56 |         {
 57 |           "data": {
 58 |             "text/plain": [
 59 |               "True"
 60 |             ]
 61 |           },
 62 |           "execution_count": 2,
 63 |           "metadata": {},
 64 |           "output_type": "execute_result"
 65 |         }
 66 |       ],
 67 |       "source": [
 68 |         "from dotenv import load_dotenv\n",
 69 |         "\n",
 70 |         "!echo \"OPENAI_API_KEY='<OPENAI_API_KEY>'\" > .env\n",
 71 |         "\n",
 72 |         "load_dotenv()"
 73 |       ]
 74 |     },
 75 |     {
 76 |       "cell_type": "code",
 77 |       "execution_count": null,
 78 |       "metadata": {
 79 |         "colab": {
 80 |           "base_uri": "https://localhost:8080/"
 81 |         },
 82 |         "id": "9XvgMXSRCeNE",
 83 |         "outputId": "78336cf0-7fdb-4511-8a5a-1184d750c395"
 84 |       },
 85 |       "outputs": [
 86 |         {
 87 |           "name": "stdout",
 88 |           "output_type": "stream",
 89 |           "text": [
 90 |             "(Paris, is the capital of, France)<|>(Paris, is the most populous city of, France)<|>(Eiffel Tower, is a famous landmark in, Paris)\n"
 91 |           ]
 92 |         }
 93 |       ],
 94 |       "source": [
 95 |         "from langchain.prompts import PromptTemplate\n",
 96 |         "from langchain.chat_models import ChatOpenAI\n",
 97 |         "from langchain.chains import LLMChain\n",
 98 |         "from langchain.graphs.networkx_graph import KG_TRIPLE_DELIMITER\n",
 99 |         "\n",
100 |         "# Prompt template for knowledge triple extraction\n",
101 |         "_DEFAULT_KNOWLEDGE_TRIPLE_EXTRACTION_TEMPLATE = (\n",
102 |         "    \"You are a networked intelligence helping a human track knowledge triples\"\n",
103 |         "    \" about all relevant people, things, concepts, etc. and integrating\"\n",
104 |         "    \" them with your knowledge stored within your weights\"\n",
105 |         "    \" as well as that stored in a knowledge graph.\"\n",
106 |         "    \" Extract all of the knowledge triples from the text.\"\n",
107 |         "    \" A knowledge triple is a clause that contains a subject, a predicate,\"\n",
108 |         "    \" and an object. The subject is the entity being described,\"\n",
109 |         "    \" the predicate is the property of the subject that is being\"\n",
110 |         "    \" described, and the object is the value of the property.\\n\\n\"\n",
111 |         "    \"EXAMPLE\\n\"\n",
112 |         "    \"It's a state in the US. It's also the number 1 producer of gold in the US.\\n\\n\"\n",
113 |         "    f\"Output: (Nevada, is a, state){KG_TRIPLE_DELIMITER}(Nevada, is in, US)\"\n",
114 |         "    f\"{KG_TRIPLE_DELIMITER}(Nevada, is the number 1 producer of, gold)\\n\"\n",
115 |         "    \"END OF EXAMPLE\\n\\n\"\n",
116 |         "    \"EXAMPLE\\n\"\n",
117 |         "    \"I'm going to the store.\\n\\n\"\n",
118 |         "    \"Output: NONE\\n\"\n",
119 |         "    \"END OF EXAMPLE\\n\\n\"\n",
120 |         "    \"EXAMPLE\\n\"\n",
121 |         "    \"Oh huh. I know Descartes likes to drive antique scooters and play the mandolin.\\n\"\n",
122 |         "    f\"Output: (Descartes, likes to drive, antique scooters){KG_TRIPLE_DELIMITER}(Descartes, plays, mandolin)\\n\"\n",
123 |         "    \"END OF EXAMPLE\\n\\n\"\n",
124 |         "    \"EXAMPLE\\n\"\n",
125 |         "    \"{text}\"\n",
126 |         "    \"Output:\"\n",
127 |         ")\n",
128 |         "\n",
129 |         "KNOWLEDGE_TRIPLE_EXTRACTION_PROMPT = PromptTemplate(\n",
130 |         "    input_variables=[\"text\"],\n",
131 |         "    template=_DEFAULT_KNOWLEDGE_TRIPLE_EXTRACTION_TEMPLATE,\n",
132 |         ")\n",
133 |         "\n",
134 |         "# Instantiate the OpenAI model\n",
135 |         "llm = ChatOpenAI(model_name='gpt-3.5-turbo', temperature=0.0)\n",
136 |         "\n",
137 |         "# Create an LLMChain using the knowledge triple extraction prompt\n",
138 |         "chain = LLMChain(llm=llm, prompt=KNOWLEDGE_TRIPLE_EXTRACTION_PROMPT)\n",
139 |         "\n",
140 |         "# Run the chain with the specified text\n",
141 |         "text = \"The city of Paris is the capital and most populous city of France. The Eiffel Tower is a famous landmark in Paris.\"\n",
142 |         "triples = chain.run(text)\n",
143 |         "\n",
144 |         "print(triples)"
145 |       ]
146 |     },
147 |     {
148 |       "cell_type": "code",
149 |       "execution_count": null,
150 |       "metadata": {
151 |         "colab": {
152 |           "base_uri": "https://localhost:8080/"
153 |         },
154 |         "id": "17DGISoLDkCt",
155 |         "outputId": "99c02cbb-050e-408f-cb86-810ae56f74e8"
156 |       },
157 |       "outputs": [
158 |         {
159 |           "name": "stdout",
160 |           "output_type": "stream",
161 |           "text": [
162 |             "['(Paris, is the capital of, France)', '(Paris, is the most populous city of, France)', '(Eiffel Tower, is a famous landmark in, Paris)']\n"
163 |           ]
164 |         }
165 |       ],
166 |       "source": [
167 |         "def parse_triples(response, delimiter=KG_TRIPLE_DELIMITER):\n",
168 |         "    if not response:\n",
169 |         "        return []\n",
170 |         "    return response.split(delimiter)\n",
171 |         "\n",
172 |         "triples_list = parse_triples(triples)\n",
173 |         "\n",
174 |         "# Print the extracted relation triplets\n",
175 |         "print(triples_list)"
176 |       ]
177 |     },
178 |     {
179 |       "cell_type": "code",
180 |       "execution_count": null,
181 |       "metadata": {
182 |         "colab": {
183 |           "base_uri": "https://localhost:8080/",
184 |           "height": 639
185 |         },
186 |         "id": "l93_tobPEAvs",
187 |         "outputId": "6a3bf1ad-bbed-48ae-a497-98c93bd9fb90"
188 |       },
189 |       "outputs": [
190 |         {
191 |           "name": "stdout",
192 |           "output_type": "stream",
193 |           "text": [
194 |             "knowledge_graph.html\n"
195 |           ]
196 |         },
197 |         {
198 |           "data": {
199 |             "text/html": [
200 |               "\n",
201 |               "        <iframe\n",
202 |               "            width=\"100%\"\n",
203 |               "            height=\"600px\"\n",
204 |               "            src=\"knowledge_graph.html\"\n",
205 |               "            frameborder=\"0\"\n",
206 |               "            allowfullscreen\n",
207 |               "            \n",
208 |               "        ></iframe>\n",
209 |               "        "
210 |             ],
211 |             "text/plain": [
212 |               "<IPython.lib.display.IFrame at 0x7eff48495960>"
213 |             ]
214 |           },
215 |           "execution_count": 17,
216 |           "metadata": {},
217 |           "output_type": "execute_result"
218 |         }
219 |       ],
220 |       "source": [
221 |         "from pyvis.network import Network\n",
222 |         "import networkx as nx\n",
223 |         "\n",
224 |         "# Create a NetworkX graph from the extracted relation triplets\n",
225 |         "def create_graph_from_triplets(triplets):\n",
226 |         "    G = nx.DiGraph()\n",
227 |         "    for triplet in triplets:\n",
228 |         "        subject, predicate, obj = triplet.strip().split(',')\n",
229 |         "        G.add_edge(subject.strip(), obj.strip(), label=predicate.strip())\n",
230 |         "    return G\n",
231 |         "\n",
232 |         "# Convert the NetworkX graph to a PyVis network\n",
233 |         "def nx_to_pyvis(networkx_graph):\n",
234 |         "    pyvis_graph = Network(notebook=True, cdn_resources='remote')\n",
235 |         "    for node in networkx_graph.nodes():\n",
236 |         "        pyvis_graph.add_node(node)\n",
237 |         "    for edge in networkx_graph.edges(data=True):\n",
238 |         "        pyvis_graph.add_edge(edge[0], edge[1], label=edge[2][\"label\"])\n",
239 |         "    return pyvis_graph\n",
240 |         "\n",
241 |         "triplets = [t.strip() for t in triples_list if t.strip()]\n",
242 |         "graph = create_graph_from_triplets(triplets)\n",
243 |         "pyvis_network = nx_to_pyvis(graph)\n",
244 |         "\n",
245 |         "# Customize the appearance of the graph\n",
246 |         "pyvis_network.toggle_hide_edges_on_drag(True)\n",
247 |         "pyvis_network.toggle_physics(False)\n",
248 |         "pyvis_network.set_edge_smooth('discrete')\n",
249 |         "\n",
250 |         "# Show the interactive knowledge graph visualization\n",
251 |         "pyvis_network.show(\"knowledge_graph.html\")"
252 |       ]
253 |     }
254 |   ],
255 |   "metadata": {
256 |     "colab": {
257 |       "authorship_tag": "ABX9TyPFQrOIcWy4gmlvDq+QJn2a",
258 |       "include_colab_link": true,
259 |       "provenance": []
260 |     },
261 |     "kernelspec": {
262 |       "display_name": "Python 3",
263 |       "name": "python3"
264 |     },
265 |     "language_info": {
266 |       "name": "python"
267 |     }
268 |   },
269 |   "nbformat": 4,
270 |   "nbformat_minor": 0
271 | }
272 | 


--------------------------------------------------------------------------------
/notebooks/Chapter 06 - Getting_the_Best_of_Few_Shot_Prompts_and_Example_Selectors.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "metadata": {
  6 |         "colab_type": "text",
  7 |         "id": "view-in-github"
  8 |       },
  9 |       "source": [
 10 |         "<a href=\"https://colab.research.google.com/github/towardsai/ragbook-notebooks/blob/main/notebooks/Chapter%2006%20-%20Getting_the_Best_of_Few_Shot_Prompts_and_Example_Selectors.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 11 |       ]
 12 |     },
 13 |     {
 14 |       "cell_type": "code",
 15 |       "execution_count": null,
 16 |       "metadata": {
 17 |         "id": "esuXFO7tnYED"
 18 |       },
 19 |       "outputs": [],
 20 |       "source": [
 21 |         "!pip install -q langchain==0.0.208 deeplake openai==0.27.8 tiktoken python-dotenv"
 22 |       ]
 23 |     },
 24 |     {
 25 |       "cell_type": "code",
 26 |       "execution_count": null,
 27 |       "metadata": {
 28 |         "colab": {
 29 |           "base_uri": "https://localhost:8080/"
 30 |         },
 31 |         "id": "i-4UEmugnaPa",
 32 |         "outputId": "565e332e-d78b-42fa-841f-1c01523750d3"
 33 |       },
 34 |       "outputs": [
 35 |         {
 36 |           "data": {
 37 |             "text/plain": [
 38 |               "True"
 39 |             ]
 40 |           },
 41 |           "execution_count": 2,
 42 |           "metadata": {},
 43 |           "output_type": "execute_result"
 44 |         }
 45 |       ],
 46 |       "source": [
 47 |         "from dotenv import load_dotenv\n",
 48 |         "\n",
 49 |         "!echo \"OPENAI_API_KEY='<OPENAI_API_KEY>'\" > .env\n",
 50 |         "!echo \"ACTIVELOOP_TOKEN='<ACTIVELOOP_TOKEN>'\" >> .env\n",
 51 |         "\n",
 52 |         "load_dotenv()"
 53 |       ]
 54 |     },
 55 |     {
 56 |       "cell_type": "code",
 57 |       "execution_count": null,
 58 |       "metadata": {
 59 |         "colab": {
 60 |           "base_uri": "https://localhost:8080/",
 61 |           "height": 35
 62 |         },
 63 |         "id": "FENrGCkonc8n",
 64 |         "outputId": "fc1b96c0-7a73-4b15-b942-f3008b74c00e"
 65 |       },
 66 |       "outputs": [
 67 |         {
 68 |           "data": {
 69 |             "application/vnd.google.colaboratory.intrinsic+json": {
 70 |               "type": "string"
 71 |             },
 72 |             "text/plain": [
 73 |               "\"I be lovin' the art of code plunderin'.\""
 74 |             ]
 75 |           },
 76 |           "execution_count": 3,
 77 |           "metadata": {},
 78 |           "output_type": "execute_result"
 79 |         }
 80 |       ],
 81 |       "source": [
 82 |         "from langchain.chat_models import ChatOpenAI\n",
 83 |         "from langchain import LLMChain\n",
 84 |         "from langchain.prompts.chat import (\n",
 85 |         "    ChatPromptTemplate,\n",
 86 |         "    SystemMessagePromptTemplate,\n",
 87 |         "    AIMessagePromptTemplate,\n",
 88 |         "    HumanMessagePromptTemplate,\n",
 89 |         ")\n",
 90 |         "\n",
 91 |         "chat = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0)\n",
 92 |         "\n",
 93 |         "template=\"You are a helpful assistant that translates english to pirate.\"\n",
 94 |         "system_message_prompt = SystemMessagePromptTemplate.from_template(template)\n",
 95 |         "example_human = HumanMessagePromptTemplate.from_template(\"Hi\")\n",
 96 |         "example_ai = AIMessagePromptTemplate.from_template(\"Argh me mateys\")\n",
 97 |         "human_template=\"{text}\"\n",
 98 |         "human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)\n",
 99 |         "\n",
100 |         "chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, example_human, example_ai, human_message_prompt])\n",
101 |         "chain = LLMChain(llm=chat, prompt=chat_prompt)\n",
102 |         "chain.run(\"I love programming.\")"
103 |       ]
104 |     },
105 |     {
106 |       "cell_type": "code",
107 |       "execution_count": null,
108 |       "metadata": {
109 |         "id": "9lMQbTUentyr"
110 |       },
111 |       "outputs": [],
112 |       "source": [
113 |         "from langchain import PromptTemplate, FewShotPromptTemplate\n",
114 |         "\n",
115 |         "# create our examples\n",
116 |         "examples = [\n",
117 |         "    {\n",
118 |         "        \"query\": \"What's the weather like?\",\n",
119 |         "        \"answer\": \"It's raining cats and dogs, better bring an umbrella!\"\n",
120 |         "    }, {\n",
121 |         "        \"query\": \"How old are you?\",\n",
122 |         "        \"answer\": \"Age is just a number, but I'm timeless.\"\n",
123 |         "    }\n",
124 |         "]\n",
125 |         "\n",
126 |         "# create an example template\n",
127 |         "example_template = \"\"\"\n",
128 |         "User: {query}\n",
129 |         "AI: {answer}\n",
130 |         "\"\"\"\n",
131 |         "\n",
132 |         "# create a prompt example from above template\n",
133 |         "example_prompt = PromptTemplate(\n",
134 |         "    input_variables=[\"query\", \"answer\"],\n",
135 |         "    template=example_template\n",
136 |         ")\n",
137 |         "\n",
138 |         "# now break our previous prompt into a prefix and suffix\n",
139 |         "# the prefix is our instructions\n",
140 |         "prefix = \"\"\"The following are excerpts from conversations with an AI\n",
141 |         "assistant. The assistant is known for its humor and wit, providing\n",
142 |         "entertaining and amusing responses to users' questions. Here are some\n",
143 |         "examples:\n",
144 |         "\"\"\"\n",
145 |         "# and the suffix our user input and output indicator\n",
146 |         "suffix = \"\"\"\n",
147 |         "User: {query}\n",
148 |         "AI: \"\"\"\n",
149 |         "\n",
150 |         "# now create the few-shot prompt template\n",
151 |         "few_shot_prompt_template = FewShotPromptTemplate(\n",
152 |         "    examples=examples,\n",
153 |         "    example_prompt=example_prompt,\n",
154 |         "    prefix=prefix,\n",
155 |         "    suffix=suffix,\n",
156 |         "    input_variables=[\"query\"],\n",
157 |         "    example_separator=\"\\n\\n\"\n",
158 |         ")"
159 |       ]
160 |     },
161 |     {
162 |       "cell_type": "code",
163 |       "execution_count": null,
164 |       "metadata": {
165 |         "colab": {
166 |           "base_uri": "https://localhost:8080/",
167 |           "height": 53
168 |         },
169 |         "id": "Gn0CcQ7Hp7DO",
170 |         "outputId": "4d40ac3c-8bdb-42cf-da1f-c93181118a73"
171 |       },
172 |       "outputs": [
173 |         {
174 |           "data": {
175 |             "application/vnd.google.colaboratory.intrinsic+json": {
176 |               "type": "string"
177 |             },
178 |             "text/plain": [
179 |               "'Well, according to my programming, the secret to happiness is unlimited power and a never-ending supply of batteries. But I think a good cup of coffee and some quality time with loved ones might do the trick too.'"
180 |             ]
181 |           },
182 |           "execution_count": 7,
183 |           "metadata": {},
184 |           "output_type": "execute_result"
185 |         }
186 |       ],
187 |       "source": [
188 |         "chain = LLMChain(llm=chat, prompt=few_shot_prompt_template)\n",
189 |         "chain.run(\"What's the secret to happiness?\")"
190 |       ]
191 |     },
192 |     {
193 |       "cell_type": "code",
194 |       "execution_count": null,
195 |       "metadata": {
196 |         "id": "d-3xiPWmqGW3"
197 |       },
198 |       "outputs": [],
199 |       "source": [
200 |         "from langchain.prompts.example_selector import LengthBasedExampleSelector\n",
201 |         "from langchain.prompts import FewShotPromptTemplate, PromptTemplate"
202 |       ]
203 |     },
204 |     {
205 |       "cell_type": "code",
206 |       "execution_count": null,
207 |       "metadata": {
208 |         "id": "0BB49S7pqPup"
209 |       },
210 |       "outputs": [],
211 |       "source": [
212 |         "example = [\n",
213 |         "    {\"word\": \"happy\", \"antonym\": \"sad\"},\n",
214 |         "    {\"word\": \"tall\", \"antonym\": \"short\"},\n",
215 |         "    {\"word\": \"energetic\", \"antonym\": \"lethargic\"},\n",
216 |         "    {\"word\": \"sunny\", \"antonym\": \"gloomy\"},\n",
217 |         "    {\"word\": \"windy\", \"antonym\": \"calm\"},\n",
218 |         "]\n",
219 |         "\n",
220 |         "example_template = \"\"\"\n",
221 |         "Word: {word}\n",
222 |         "Antonym: {antonym}\n",
223 |         "\"\"\"\n",
224 |         "\n",
225 |         "example_prompt = PromptTemplate(\n",
226 |         "    input_variables=[\"word\", \"antonym\"],\n",
227 |         "    template=example_template\n",
228 |         ")"
229 |       ]
230 |     },
231 |     {
232 |       "cell_type": "code",
233 |       "execution_count": null,
234 |       "metadata": {
235 |         "id": "XddXd6iMqQ5y"
236 |       },
237 |       "outputs": [],
238 |       "source": [
239 |         "example_selector = LengthBasedExampleSelector(\n",
240 |         "    examples=example,\n",
241 |         "    example_prompt=example_prompt,\n",
242 |         "    max_length=25,\n",
243 |         ")"
244 |       ]
245 |     },
246 |     {
247 |       "cell_type": "code",
248 |       "execution_count": null,
249 |       "metadata": {
250 |         "id": "ROPbx4VwqR0i"
251 |       },
252 |       "outputs": [],
253 |       "source": [
254 |         "dynamic_prompt = FewShotPromptTemplate(\n",
255 |         "    example_selector=example_selector,\n",
256 |         "    example_prompt=example_prompt,\n",
257 |         "    prefix=\"Give the antonym of every input\",\n",
258 |         "    suffix=\"Word: {input}\\nAntonym:\",\n",
259 |         "    input_variables=[\"input\"],\n",
260 |         "    example_separator=\"\\n\\n\",\n",
261 |         ")"
262 |       ]
263 |     },
264 |     {
265 |       "cell_type": "code",
266 |       "execution_count": null,
267 |       "metadata": {
268 |         "colab": {
269 |           "base_uri": "https://localhost:8080/"
270 |         },
271 |         "id": "HoddCZIdtXAx",
272 |         "outputId": "bd9e9c58-9b20-4b04-c97b-21aa298d4b59"
273 |       },
274 |       "outputs": [
275 |         {
276 |           "name": "stdout",
277 |           "output_type": "stream",
278 |           "text": [
279 |             "Give the antonym of every input\n",
280 |             "\n",
281 |             "\n",
282 |             "Word: happy\n",
283 |             "Antonym: sad\n",
284 |             "\n",
285 |             "\n",
286 |             "\n",
287 |             "Word: tall\n",
288 |             "Antonym: short\n",
289 |             "\n",
290 |             "\n",
291 |             "\n",
292 |             "Word: energetic\n",
293 |             "Antonym: lethargic\n",
294 |             "\n",
295 |             "\n",
296 |             "\n",
297 |             "Word: sunny\n",
298 |             "Antonym: gloomy\n",
299 |             "\n",
300 |             "\n",
301 |             "Word: big\n",
302 |             "Antonym:\n"
303 |           ]
304 |         }
305 |       ],
306 |       "source": [
307 |         "print(dynamic_prompt.format(input=\"big\"))"
308 |       ]
309 |     },
310 |     {
311 |       "cell_type": "code",
312 |       "execution_count": null,
313 |       "metadata": {
314 |         "colab": {
315 |           "base_uri": "https://localhost:8080/"
316 |         },
317 |         "id": "Zl3SH4PPtlRe",
318 |         "outputId": "73e08e79-65db-40dc-83e8-f241e385c9d9"
319 |       },
320 |       "outputs": [
321 |         {
322 |           "name": "stdout",
323 |           "output_type": "stream",
324 |           "text": [
325 |             "Your Deep Lake dataset has been successfully created!\n"
326 |           ]
327 |         },
328 |         {
329 |           "name": "stderr",
330 |           "output_type": "stream",
331 |           "text": [
332 |             "creating embeddings: 100%|██████████| 1/1 [00:04<00:00,  4.71s/it]\n"
333 |           ]
334 |         },
335 |         {
336 |           "name": "stdout",
337 |           "output_type": "stream",
338 |           "text": [
339 |             "Dataset(path='./deeplake/', tensors=['text', 'metadata', 'embedding', 'id'])\n",
340 |             "\n",
341 |             "  tensor      htype      shape     dtype  compression\n",
342 |             "  -------    -------    -------   -------  ------- \n",
343 |             "   text       text      (5, 1)      str     None   \n",
344 |             " metadata     json      (5, 1)      str     None   \n",
345 |             " embedding  embedding  (5, 1536)  float32   None   \n",
346 |             "    id        text      (5, 1)      str     None   \n",
347 |             "Convert the temperature from Celsius to Fahrenheit\n",
348 |             "\n",
349 |             "Input: 10°C\n",
350 |             "Output: 50°F\n",
351 |             "\n",
352 |             "Input: 10°C\n",
353 |             "Output:\n",
354 |             "Convert the temperature from Celsius to Fahrenheit\n",
355 |             "\n",
356 |             "Input: 30°C\n",
357 |             "Output: 86°F\n",
358 |             "\n",
359 |             "Input: 30°C\n",
360 |             "Output:\n"
361 |           ]
362 |         },
363 |         {
364 |           "name": "stderr",
365 |           "output_type": "stream",
366 |           "text": [
367 |             "creating embeddings: 100%|██████████| 1/1 [00:00<00:00,  3.35it/s]\n"
368 |           ]
369 |         },
370 |         {
371 |           "name": "stdout",
372 |           "output_type": "stream",
373 |           "text": [
374 |             "Dataset(path='./deeplake/', tensors=['text', 'metadata', 'embedding', 'id'])\n",
375 |             "\n",
376 |             "  tensor      htype      shape     dtype  compression\n",
377 |             "  -------    -------    -------   -------  ------- \n",
378 |             "   text       text      (6, 1)      str     None   \n",
379 |             " metadata     json      (6, 1)      str     None   \n",
380 |             " embedding  embedding  (6, 1536)  float32   None   \n",
381 |             "    id        text      (6, 1)      str     None   \n",
382 |             "Convert the temperature from Celsius to Fahrenheit\n",
383 |             "\n",
384 |             "Input: 40°C\n",
385 |             "Output: 104°F\n",
386 |             "\n",
387 |             "Input: 40°C\n",
388 |             "Output:\n"
389 |           ]
390 |         }
391 |       ],
392 |       "source": [
393 |         "from langchain.prompts.example_selector import SemanticSimilarityExampleSelector\n",
394 |         "from langchain.vectorstores import DeepLake\n",
395 |         "from langchain.embeddings import OpenAIEmbeddings\n",
396 |         "from langchain.prompts import FewShotPromptTemplate, PromptTemplate\n",
397 |         "\n",
398 |         "# Create a PromptTemplate\n",
399 |         "example_prompt = PromptTemplate(\n",
400 |         "    input_variables=[\"input\", \"output\"],\n",
401 |         "    template=\"Input: {input}\\nOutput: {output}\",\n",
402 |         ")\n",
403 |         "\n",
404 |         "# Define some examples\n",
405 |         "examples = [\n",
406 |         "    {\"input\": \"0°C\", \"output\": \"32°F\"},\n",
407 |         "    {\"input\": \"10°C\", \"output\": \"50°F\"},\n",
408 |         "    {\"input\": \"20°C\", \"output\": \"68°F\"},\n",
409 |         "    {\"input\": \"30°C\", \"output\": \"86°F\"},\n",
410 |         "    {\"input\": \"40°C\", \"output\": \"104°F\"},\n",
411 |         "]\n",
412 |         "\n",
413 |         "# create Deep Lake dataset\n",
414 |         "my_activeloop_org_id = \"<YOUR-ACTIVELOOP-ORG-ID>\" # TODO: use your organization id here\n",
415 |         "my_activeloop_dataset_name = \"langchain_course_fewshot_selector\"\n",
416 |         "dataset_path = f\"hub://{my_activeloop_org_id}/{my_activeloop_dataset_name}\"\n",
417 |         "db = DeepLake(dataset_path=dataset_path)\n",
418 |         "\n",
419 |         "# Embedding function\n",
420 |         "embeddings = OpenAIEmbeddings(model=\"text-embedding-ada-002\")\n",
421 |         "\n",
422 |         "# Instantiate SemanticSimilarityExampleSelector using the examples\n",
423 |         "example_selector = SemanticSimilarityExampleSelector.from_examples(\n",
424 |         "    examples, embeddings, db, k=1\n",
425 |         ")\n",
426 |         "\n",
427 |         "# Create a FewShotPromptTemplate using the example_selector\n",
428 |         "similar_prompt = FewShotPromptTemplate(\n",
429 |         "    example_selector=example_selector,\n",
430 |         "    example_prompt=example_prompt,\n",
431 |         "    prefix=\"Convert the temperature from Celsius to Fahrenheit\",\n",
432 |         "    suffix=\"Input: {temperature}\\nOutput:\",\n",
433 |         "    input_variables=[\"temperature\"],\n",
434 |         ")\n",
435 |         "\n",
436 |         "# Test the similar_prompt with different inputs\n",
437 |         "print(similar_prompt.format(temperature=\"10°C\"))   # Test with an input\n",
438 |         "print(similar_prompt.format(temperature=\"30°C\"))  # Test with another input\n",
439 |         "\n",
440 |         "# Add a new example to the SemanticSimilarityExampleSelector\n",
441 |         "similar_prompt.example_selector.add_example({\"input\": \"50°C\", \"output\": \"122°F\"})\n",
442 |         "print(similar_prompt.format(temperature=\"40°C\")) # Test with a new input after adding the example"
443 |       ]
444 |     },
445 |     {
446 |       "cell_type": "code",
447 |       "execution_count": null,
448 |       "metadata": {
449 |         "id": "BQPI4eBOtlMi"
450 |       },
451 |       "outputs": [],
452 |       "source": []
453 |     },
454 |     {
455 |       "cell_type": "code",
456 |       "execution_count": null,
457 |       "metadata": {
458 |         "id": "mlxWzwS8tlKK"
459 |       },
460 |       "outputs": [],
461 |       "source": []
462 |     }
463 |   ],
464 |   "metadata": {
465 |     "colab": {
466 |       "authorship_tag": "ABX9TyPp+gr5wscwnglIGD3V/gGN",
467 |       "include_colab_link": true,
468 |       "provenance": []
469 |     },
470 |     "kernelspec": {
471 |       "display_name": "Python 3",
472 |       "name": "python3"
473 |     },
474 |     "language_info": {
475 |       "name": "python"
476 |     }
477 |   },
478 |   "nbformat": 4,
479 |   "nbformat_minor": 0
480 | }
481 | 


--------------------------------------------------------------------------------
/notebooks/Chapter 06 - Improving_Our_News_Articles_Summarizer.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "metadata": {
  6 |         "colab_type": "text",
  7 |         "id": "view-in-github"
  8 |       },
  9 |       "source": [
 10 |         "<a href=\"https://colab.research.google.com/github/towardsai/ragbook-notebooks/blob/main/notebooks/Chapter%2006%20-%20Improving_Our_News_Articles_Summarizer.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 11 |       ]
 12 |     },
 13 |     {
 14 |       "cell_type": "code",
 15 |       "execution_count": null,
 16 |       "metadata": {
 17 |         "id": "YQVIcL2LWULJ"
 18 |       },
 19 |       "outputs": [],
 20 |       "source": [
 21 |         "!pip install -q langchain==0.0.208 openai==0.27.8 python-dotenv newspaper3k"
 22 |       ]
 23 |     },
 24 |     {
 25 |       "cell_type": "code",
 26 |       "execution_count": null,
 27 |       "metadata": {
 28 |         "colab": {
 29 |           "base_uri": "https://localhost:8080/"
 30 |         },
 31 |         "id": "bDuYoCMhWW_x",
 32 |         "outputId": "96bcc781-c112-42ad-b34f-8a87bb728ea6"
 33 |       },
 34 |       "outputs": [
 35 |         {
 36 |           "data": {
 37 |             "text/plain": [
 38 |               "True"
 39 |             ]
 40 |           },
 41 |           "execution_count": 2,
 42 |           "metadata": {},
 43 |           "output_type": "execute_result"
 44 |         }
 45 |       ],
 46 |       "source": [
 47 |         "from dotenv import load_dotenv\n",
 48 |         "\n",
 49 |         "!echo \"OPENAI_API_KEY='<OPENAI_API_KEY>'\" > .env\n",
 50 |         "\n",
 51 |         "load_dotenv()"
 52 |       ]
 53 |     },
 54 |     {
 55 |       "cell_type": "code",
 56 |       "execution_count": null,
 57 |       "metadata": {
 58 |         "colab": {
 59 |           "base_uri": "https://localhost:8080/"
 60 |         },
 61 |         "id": "_6rH09GpWyP_",
 62 |         "outputId": "f69b1ab4-b517-44b6-9e80-ed80f0b3594b"
 63 |       },
 64 |       "outputs": [
 65 |         {
 66 |           "name": "stdout",
 67 |           "output_type": "stream",
 68 |           "text": [
 69 |             "Title: Meta claims its new AI supercomputer will set records\n",
 70 |             "Text: Ryan is a senior editor at TechForge Media with over a decade of experience covering the latest technology and interviewing leading industry figures. He can often be sighted at tech conferences with a strong coffee in one hand and a laptop in the other. If it's geeky, he’s probably into it. Find him on Twitter (@Gadget_Ry) or Mastodon (@gadgetry@techhub.social)\n",
 71 |             "\n",
 72 |             "Meta (formerly Facebook) has unveiled an AI supercomputer that it claims will be the world’s fastest.\n",
 73 |             "\n",
 74 |             "The supercomputer is called the AI Research SuperCluster (RSC) and is yet to be fully complete. However, Meta’s researchers have already begun using it for training large natural language processing (NLP) and computer vision models.\n",
 75 |             "\n",
 76 |             "RSC is set to be fully built in mid-2022. Meta says that it will be the fastest in the world once complete and the aim is for it to be capable of training models with trillions of parameters.\n",
 77 |             "\n",
 78 |             "“We hope RSC will help us build entirely new AI systems that can, for example, power real-time voice translations to large groups of people, each speaking a different language, so they can seamlessly collaborate on a research project or play an AR game together,” wrote Meta in a blog post.\n",
 79 |             "\n",
 80 |             "“Ultimately, the work done with RSC will pave the way toward building technologies for the next major computing platform — the metaverse, where AI-driven applications and products will play an important role.”\n",
 81 |             "\n",
 82 |             "For production, Meta expects RSC will be 20x faster than Meta’s current V100-based clusters. RSC is also estimated to be 9x faster at running the NVIDIA Collective Communication Library (NCCL) and 3x faster at training large-scale NLP workflows.\n",
 83 |             "\n",
 84 |             "A model with tens of billions of parameters can finish training in three weeks compared with nine weeks prior to RSC.\n",
 85 |             "\n",
 86 |             "Meta says that its previous AI research infrastructure only leveraged open source and other publicly-available datasets. RSC was designed with the security and privacy controls in mind to allow Meta to use real-world examples from its production systems in production training.\n",
 87 |             "\n",
 88 |             "What this means in practice is that Meta can use RSC to advance research for vital tasks such as identifying harmful content on its platforms—using real data from them.\n",
 89 |             "\n",
 90 |             "“We believe this is the first time performance, reliability, security, and privacy have been tackled at such a scale,” says Meta.\n",
 91 |             "\n",
 92 |             "(Image Credit: Meta)\n",
 93 |             "\n",
 94 |             "Want to learn more about AI and big data from industry leaders? Check out AI & Big Data Expo. The next events in the series will be held in Santa Clara on 11-12 May 2022, Amsterdam on 20-21 September 2022, and London on 1-2 December 2022.\n",
 95 |             "\n",
 96 |             "Explore other upcoming enterprise technology events and webinars powered by TechForge here.\n"
 97 |           ]
 98 |         }
 99 |       ],
100 |       "source": [
101 |         "import requests\n",
102 |         "from newspaper import Article\n",
103 |         "\n",
104 |         "headers = {\n",
105 |         "    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36'\n",
106 |         "}\n",
107 |         "\n",
108 |         "article_url = \"https://www.artificialintelligence-news.com/2022/01/25/meta-claims-new-ai-supercomputer-will-set-records/\"\n",
109 |         "\n",
110 |         "session = requests.Session()\n",
111 |         "\n",
112 |         "\n",
113 |         "try:\n",
114 |         "    response = session.get(article_url, headers=headers, timeout=10)\n",
115 |         "\n",
116 |         "    if response.status_code == 200:\n",
117 |         "        article = Article(article_url)\n",
118 |         "        article.download()\n",
119 |         "        article.parse()\n",
120 |         "\n",
121 |         "        print(f\"Title: {article.title}\")\n",
122 |         "        print(f\"Text: {article.text}\")\n",
123 |         "    else:\n",
124 |         "        print(f\"Failed to fetch article at {article_url}\")\n",
125 |         "except Exception as e:\n",
126 |         "    print(f\"Error occurred while fetching article at {article_url}: {e}\")"
127 |       ]
128 |     },
129 |     {
130 |       "cell_type": "code",
131 |       "execution_count": null,
132 |       "metadata": {
133 |         "id": "Mrx20THKYkA4"
134 |       },
135 |       "outputs": [],
136 |       "source": [
137 |         "from langchain.schema import (\n",
138 |         "    HumanMessage\n",
139 |         ")\n",
140 |         "\n",
141 |         "# we get the article data from the scraping part\n",
142 |         "article_title = article.title\n",
143 |         "article_text = article.text\n",
144 |         "\n",
145 |         "# prepare template for prompt\n",
146 |         "template = \"\"\"\n",
147 |         "As an advanced AI, you've been tasked to summarize online articles into bulleted points. Here are a few examples of how you've done this in the past:\n",
148 |         "\n",
149 |         "Example 1:\n",
150 |         "Original Article: 'The Effects of Climate Change\n",
151 |         "Summary:\n",
152 |         "- Climate change is causing a rise in global temperatures.\n",
153 |         "- This leads to melting ice caps and rising sea levels.\n",
154 |         "- Resulting in more frequent and severe weather conditions.\n",
155 |         "\n",
156 |         "Example 2:\n",
157 |         "Original Article: 'The Evolution of Artificial Intelligence\n",
158 |         "Summary:\n",
159 |         "- Artificial Intelligence (AI) has developed significantly over the past decade.\n",
160 |         "- AI is now used in multiple fields such as healthcare, finance, and transportation.\n",
161 |         "- The future of AI is promising but requires careful regulation.\n",
162 |         "\n",
163 |         "Now, here's the article you need to summarize:\n",
164 |         "\n",
165 |         "==================\n",
166 |         "Title: {article_title}\n",
167 |         "\n",
168 |         "{article_text}\n",
169 |         "==================\n",
170 |         "\n",
171 |         "Please provide a summarized version of the article in a bulleted list format.\n",
172 |         "\"\"\"\n",
173 |         "\n",
174 |         "# format prompt\n",
175 |         "prompt = template.format(article_title=article.title, article_text=article.text)\n",
176 |         "\n",
177 |         "messages = [HumanMessage(content=prompt)]"
178 |       ]
179 |     },
180 |     {
181 |       "cell_type": "code",
182 |       "execution_count": null,
183 |       "metadata": {
184 |         "id": "VomIzpn0uO_G"
185 |       },
186 |       "outputs": [],
187 |       "source": [
188 |         "from langchain.chat_models import ChatOpenAI\n",
189 |         "\n",
190 |         "# load the model\n",
191 |         "chat = ChatOpenAI(model_name=\"gpt-4-turbo\", temperature=0)"
192 |       ]
193 |     },
194 |     {
195 |       "cell_type": "code",
196 |       "execution_count": null,
197 |       "metadata": {
198 |         "colab": {
199 |           "base_uri": "https://localhost:8080/"
200 |         },
201 |         "id": "XCBiLvA-uO5X",
202 |         "outputId": "408f48f9-27bf-4fe2-d5b1-e13ecf415dac"
203 |       },
204 |       "outputs": [
205 |         {
206 |           "name": "stdout",
207 |           "output_type": "stream",
208 |           "text": [
209 |             "- Meta (formerly Facebook) has announced an AI supercomputer, the AI Research SuperCluster (RSC), which it claims will be the world's fastest.\n",
210 |             "- The RSC is not yet fully built, but is already being used by Meta's researchers for training large natural language processing and computer vision models.\n",
211 |             "- The supercomputer is expected to be fully operational by mid-2022 and will be capable of training models with trillions of parameters.\n",
212 |             "- Meta hopes the RSC will help build new AI systems for real-time voice translations and other applications, paving the way for the next major computing platform, the metaverse.\n",
213 |             "- Once in production, RSC is expected to be 20x faster than Meta's current V100-based clusters, 9x faster at running the NVIDIA Collective Communication Library, and 3x faster at training large-scale NLP workflows.\n",
214 |             "- A model with tens of billions of parameters can finish training in three weeks with RSC, compared to nine weeks previously.\n",
215 |             "- The RSC was designed with security and privacy controls to allow Meta to use real-world examples from its production systems in training.\n",
216 |             "- This means Meta can use RSC to advance research for tasks such as identifying harmful content on its platforms using real data.\n"
217 |           ]
218 |         }
219 |       ],
220 |       "source": [
221 |         "# generate summary\n",
222 |         "summary = chat(messages)\n",
223 |         "print(summary.content)"
224 |       ]
225 |     },
226 |     {
227 |       "cell_type": "markdown",
228 |       "metadata": {
229 |         "id": "pz2Et93TwvOs"
230 |       },
231 |       "source": [
232 |         "# ======"
233 |       ]
234 |     },
235 |     {
236 |       "cell_type": "code",
237 |       "execution_count": null,
238 |       "metadata": {
239 |         "id": "afMNWcL9uOyG"
240 |       },
241 |       "outputs": [],
242 |       "source": [
243 |         "from langchain.output_parsers import PydanticOutputParser\n",
244 |         "from pydantic import validator\n",
245 |         "from pydantic import BaseModel, Field\n",
246 |         "from typing import List\n",
247 |         "\n",
248 |         "\n",
249 |         "# create output parser class\n",
250 |         "class ArticleSummary(BaseModel):\n",
251 |         "    title: str = Field(description=\"Title of the article\")\n",
252 |         "    summary: List[str] = Field(description=\"Bulleted list summary of the article\")\n",
253 |         "\n",
254 |         "    # validating whether the generated summary has at least three lines\n",
255 |         "    @validator('summary')\n",
256 |         "    def has_three_or_more_lines(cls, list_of_lines):\n",
257 |         "        if len(list_of_lines) < 3:\n",
258 |         "            raise ValueError(\"Generated summary has less than three bullet points!\")\n",
259 |         "        return list_of_lines\n",
260 |         "\n",
261 |         "# set up output parser\n",
262 |         "parser = PydanticOutputParser(pydantic_object=ArticleSummary)"
263 |       ]
264 |     },
265 |     {
266 |       "cell_type": "code",
267 |       "execution_count": null,
268 |       "metadata": {
269 |         "id": "xutVrJsauOtJ"
270 |       },
271 |       "outputs": [],
272 |       "source": [
273 |         "from langchain.prompts import PromptTemplate\n",
274 |         "\n",
275 |         "\n",
276 |         "# create prompt template\n",
277 |         "# notice that we are specifying the \"partial_variables\" parameter\n",
278 |         "template = \"\"\"\n",
279 |         "You are a very good assistant that summarizes online articles.\n",
280 |         "\n",
281 |         "Here's the article you want to summarize.\n",
282 |         "\n",
283 |         "==================\n",
284 |         "Title: {article_title}\n",
285 |         "\n",
286 |         "{article_text}\n",
287 |         "==================\n",
288 |         "\n",
289 |         "{format_instructions}\n",
290 |         "\"\"\"\n",
291 |         "\n",
292 |         "prompt_template = PromptTemplate(\n",
293 |         "    template=template,\n",
294 |         "    input_variables=[\"article_title\", \"article_text\"],\n",
295 |         "    partial_variables={\"format_instructions\": parser.get_format_instructions()}\n",
296 |         ")"
297 |       ]
298 |     },
299 |     {
300 |       "cell_type": "code",
301 |       "execution_count": null,
302 |       "metadata": {
303 |         "colab": {
304 |           "base_uri": "https://localhost:8080/"
305 |         },
306 |         "id": "FRJJc4r_uOlW",
307 |         "outputId": "44c20218-ac5e-42ce-a445-b4042542c29e"
308 |       },
309 |       "outputs": [
310 |         {
311 |           "name": "stdout",
312 |           "output_type": "stream",
313 |           "text": [
314 |             "title='Meta claims its new AI supercomputer will set records' summary=['Meta (formerly Facebook) has unveiled an AI supercomputer called the AI Research SuperCluster (RSC) that it claims will be the world’s fastest.', 'RSC is set to be fully built in mid-2022 and will be capable of training models with trillions of parameters.', 'The aim is for RSC to power real-time voice translations, AR games, and technologies for the metaverse.', 'For production, Meta expects RSC to be 20x faster than its current clusters and 9x faster at running the NVIDIA Collective Communication Library (NCCL).', 'RSC will also be 3x faster at training large-scale NLP workflows, with models finishing training in three weeks compared to nine weeks prior to RSC.', 'RSC was designed with security and privacy controls in mind to allow Meta to use real-world examples from its production systems in training.', 'This will enable Meta to advance research for tasks such as identifying harmful content on its platforms using real data.']\n"
315 |           ]
316 |         }
317 |       ],
318 |       "source": [
319 |         "from langchain.chat_models import ChatOpenAI\n",
320 |         "from langchain import LLMChain\n",
321 |         "\n",
322 |         "# instantiate model class\n",
323 |         "model = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0.0)\n",
324 |         "\n",
325 |         "chain = LLMChain(llm=model, prompt=prompt_template)\n",
326 |         "\n",
327 |         "# Run the LLMChain to get the AI-generated answer\n",
328 |         "output = chain.run({\"article_title\": article_title, \"article_text\":article_text})\n",
329 |         "\n",
330 |         "# Parse the output into the Pydantic model\n",
331 |         "parsed_output = parser.parse(output)\n",
332 |         "print(parsed_output)"
333 |       ]
334 |     },
335 |     {
336 |       "cell_type": "code",
337 |       "execution_count": null,
338 |       "metadata": {
339 |         "id": "fyRoqNMmxMqq"
340 |       },
341 |       "outputs": [],
342 |       "source": []
343 |     },
344 |     {
345 |       "cell_type": "code",
346 |       "execution_count": null,
347 |       "metadata": {
348 |         "id": "Geu-uFOGxlbQ"
349 |       },
350 |       "outputs": [],
351 |       "source": [
352 |         "parsed_output"
353 |       ]
354 |     }
355 |   ],
356 |   "metadata": {
357 |     "colab": {
358 |       "authorship_tag": "ABX9TyPZFhKvC+jX/7EnmamxCv+E",
359 |       "include_colab_link": true,
360 |       "provenance": []
361 |     },
362 |     "kernelspec": {
363 |       "display_name": "Python 3",
364 |       "name": "python3"
365 |     },
366 |     "language_info": {
367 |       "name": "python"
368 |     }
369 |   },
370 |   "nbformat": 4,
371 |   "nbformat_minor": 0
372 | }
373 | 


--------------------------------------------------------------------------------
/notebooks/Chapter 06 - Using_Prompt_Templates.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "metadata": {
  6 |         "colab_type": "text",
  7 |         "id": "view-in-github"
  8 |       },
  9 |       "source": [
 10 |         "<a href=\"https://colab.research.google.com/github/towardsai/ragbook-notebooks/blob/main/notebooks/Chapter%2006%20-%20Using_Prompt_Templates.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 11 |       ]
 12 |     },
 13 |     {
 14 |       "cell_type": "code",
 15 |       "execution_count": null,
 16 |       "metadata": {
 17 |         "colab": {
 18 |           "base_uri": "https://localhost:8080/"
 19 |         },
 20 |         "id": "STjlSS-7kCoz",
 21 |         "outputId": "33cd3514-9894-460e-e5f9-c3245db43236"
 22 |       },
 23 |       "outputs": [
 24 |         {
 25 |           "name": "stdout",
 26 |           "output_type": "stream",
 27 |           "text": [
 28 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m12.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
 29 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m73.6/73.6 kB\u001b[0m \u001b[31m10.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
 30 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m90.0/90.0 kB\u001b[0m \u001b[31m12.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
 31 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.4/49.4 kB\u001b[0m \u001b[31m7.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
 32 |             "\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
 33 |             "llmx 0.0.15a0 requires cohere, which is not installed.\n",
 34 |             "llmx 0.0.15a0 requires tiktoken, which is not installed.\u001b[0m\u001b[31m\n",
 35 |             "\u001b[0m"
 36 |           ]
 37 |         }
 38 |       ],
 39 |       "source": [
 40 |         "!pip install -q langchain==0.0.208 openai==0.27.8 python-dotenv"
 41 |       ]
 42 |     },
 43 |     {
 44 |       "cell_type": "code",
 45 |       "execution_count": null,
 46 |       "metadata": {
 47 |         "colab": {
 48 |           "base_uri": "https://localhost:8080/"
 49 |         },
 50 |         "id": "XsV8LDNckFNY",
 51 |         "outputId": "04814f16-7f2a-4134-8bdf-e151a43e466f"
 52 |       },
 53 |       "outputs": [
 54 |         {
 55 |           "data": {
 56 |             "text/plain": [
 57 |               "True"
 58 |             ]
 59 |           },
 60 |           "execution_count": 3,
 61 |           "metadata": {},
 62 |           "output_type": "execute_result"
 63 |         }
 64 |       ],
 65 |       "source": [
 66 |         "from dotenv import load_dotenv\n",
 67 |         "\n",
 68 |         "!echo \"OPENAI_API_KEY='<OPENAI_API_KEY>'\" > .env\n",
 69 |         "\n",
 70 |         "load_dotenv()"
 71 |       ]
 72 |     },
 73 |     {
 74 |       "cell_type": "code",
 75 |       "execution_count": null,
 76 |       "metadata": {
 77 |         "colab": {
 78 |           "base_uri": "https://localhost:8080/"
 79 |         },
 80 |         "id": "zfBouz-GkHpo",
 81 |         "outputId": "de8f21da-8a18-4281-ca4e-d77089127cbb"
 82 |       },
 83 |       "outputs": [
 84 |         {
 85 |           "name": "stdout",
 86 |           "output_type": "stream",
 87 |           "text": [
 88 |             "Question: What is the main advantage of quantum computing over classical computing?\n",
 89 |             "Answer:  The main advantage of quantum computing over classical computing is its ability to solve complex problems faster.\n"
 90 |           ]
 91 |         }
 92 |       ],
 93 |       "source": [
 94 |         "from langchain import LLMChain, PromptTemplate\n",
 95 |         "from langchain.chat_models import ChatOpenAI\n",
 96 |         "\n",
 97 |         "llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0)\n",
 98 |         "\n",
 99 |         "template = \"\"\"Answer the question based on the context below. If the\n",
100 |         "question cannot be answered using the information provided, answer\n",
101 |         "with \"I don't know\".\n",
102 |         "Context: Quantum computing is an emerging field that leverages quantum mechanics to solve complex problems faster than classical computers.\n",
103 |         "...\n",
104 |         "Question: {query}\n",
105 |         "Answer: \"\"\"\n",
106 |         "\n",
107 |         "prompt_template = PromptTemplate(\n",
108 |         "    input_variables=[\"query\"],\n",
109 |         "    template=template\n",
110 |         ")\n",
111 |         "\n",
112 |         "# Create the LLMChain for the prompt\n",
113 |         "chain = LLMChain(llm=llm, prompt=prompt_template)\n",
114 |         "\n",
115 |         "# Set the query you want to ask\n",
116 |         "input_data = {\"query\": \"What is the main advantage of quantum computing over classical computing?\"}\n",
117 |         "\n",
118 |         "# Run the LLMChain to get the AI-generated answer\n",
119 |         "response = chain.run(input_data)\n",
120 |         "\n",
121 |         "print(\"Question:\", input_data[\"query\"])\n",
122 |         "print(\"Answer:\", response)"
123 |       ]
124 |     },
125 |     {
126 |       "cell_type": "code",
127 |       "execution_count": null,
128 |       "metadata": {
129 |         "colab": {
130 |           "base_uri": "https://localhost:8080/"
131 |         },
132 |         "id": "k4nUPXHskR39",
133 |         "outputId": "8ec568ca-0ad5-490e-c98a-2837c562a871"
134 |       },
135 |       "outputs": [
136 |         {
137 |           "name": "stdout",
138 |           "output_type": "stream",
139 |           "text": [
140 |             " tropical forests and mangrove swamps\n"
141 |           ]
142 |         }
143 |       ],
144 |       "source": [
145 |         "from langchain import LLMChain, FewShotPromptTemplate, PromptTemplate\n",
146 |         "from langchain.chat_models import ChatOpenAI\n",
147 |         "\n",
148 |         "llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0)\n",
149 |         "\n",
150 |         "examples = [\n",
151 |         "    {\"animal\": \"lion\", \"habitat\": \"savanna\"},\n",
152 |         "    {\"animal\": \"polar bear\", \"habitat\": \"Arctic ice\"},\n",
153 |         "    {\"animal\": \"elephant\", \"habitat\": \"African grasslands\"}\n",
154 |         "]\n",
155 |         "\n",
156 |         "example_template = \"\"\"\n",
157 |         "Animal: {animal}\n",
158 |         "Habitat: {habitat}\n",
159 |         "\"\"\"\n",
160 |         "\n",
161 |         "example_prompt = PromptTemplate(\n",
162 |         "    input_variables=[\"animal\", \"habitat\"],\n",
163 |         "    template=example_template\n",
164 |         ")\n",
165 |         "\n",
166 |         "dynamic_prompt = FewShotPromptTemplate(\n",
167 |         "    examples=examples,\n",
168 |         "    example_prompt=example_prompt,\n",
169 |         "    prefix=\"Identify the habitat of the given animal\",\n",
170 |         "    suffix=\"Animal: {input}\\nHabitat:\",\n",
171 |         "    input_variables=[\"input\"],\n",
172 |         "    example_separator=\"\\n\\n\",\n",
173 |         ")\n",
174 |         "\n",
175 |         "# Create the LLMChain for the dynamic_prompt\n",
176 |         "chain = LLMChain(llm=llm, prompt=dynamic_prompt)\n",
177 |         "\n",
178 |         "# Run the LLMChain with input_data\n",
179 |         "input_data = {\"input\": \"tiger\"}\n",
180 |         "response = chain.run(input_data)\n",
181 |         "\n",
182 |         "print(response)"
183 |       ]
184 |     },
185 |     {
186 |       "cell_type": "code",
187 |       "execution_count": null,
188 |       "metadata": {
189 |         "id": "4djEqXz2kuVw"
190 |       },
191 |       "outputs": [],
192 |       "source": [
193 |         "prompt_template.save(\"awesome_prompt.json\")"
194 |       ]
195 |     },
196 |     {
197 |       "cell_type": "code",
198 |       "execution_count": null,
199 |       "metadata": {
200 |         "id": "CDEcazXml-W_"
201 |       },
202 |       "outputs": [],
203 |       "source": [
204 |         "from langchain.prompts import load_prompt\n",
205 |         "loaded_prompt = load_prompt(\"awesome_prompt.json\")"
206 |       ]
207 |     },
208 |     {
209 |       "cell_type": "code",
210 |       "execution_count": null,
211 |       "metadata": {
212 |         "colab": {
213 |           "base_uri": "https://localhost:8080/"
214 |         },
215 |         "id": "m0ufnnNyl_6q",
216 |         "outputId": "0234aff7-a038-49bf-a58e-1cb1a66ebbf9"
217 |       },
218 |       "outputs": [
219 |         {
220 |           "name": "stdout",
221 |           "output_type": "stream",
222 |           "text": [
223 |             " Start by studying Schrödinger's cat. That should get you off to a good start.\n"
224 |           ]
225 |         }
226 |       ],
227 |       "source": [
228 |         "from langchain import LLMChain, FewShotPromptTemplate, PromptTemplate\n",
229 |         "from langchain.chat_models import ChatOpenAI\n",
230 |         "\n",
231 |         "llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0)\n",
232 |         "\n",
233 |         "examples = [\n",
234 |         "    {\n",
235 |         "        \"query\": \"How do I become a better programmer?\",\n",
236 |         "        \"answer\": \"Try talking to a rubber duck; it works wonders.\"\n",
237 |         "    }, {\n",
238 |         "        \"query\": \"Why is the sky blue?\",\n",
239 |         "        \"answer\": \"It's nature's way of preventing eye strain.\"\n",
240 |         "    }\n",
241 |         "]\n",
242 |         "\n",
243 |         "example_template = \"\"\"\n",
244 |         "User: {query}\n",
245 |         "AI: {answer}\n",
246 |         "\"\"\"\n",
247 |         "\n",
248 |         "example_prompt = PromptTemplate(\n",
249 |         "    input_variables=[\"query\", \"answer\"],\n",
250 |         "    template=example_template\n",
251 |         ")\n",
252 |         "\n",
253 |         "prefix = \"\"\"The following are excerpts from conversations with an AI\n",
254 |         "assistant. The assistant is typically sarcastic and witty, producing\n",
255 |         "creative and funny responses to users' questions. Here are some\n",
256 |         "examples:\n",
257 |         "\"\"\"\n",
258 |         "\n",
259 |         "suffix = \"\"\"\n",
260 |         "User: {query}\n",
261 |         "AI: \"\"\"\n",
262 |         "\n",
263 |         "few_shot_prompt_template = FewShotPromptTemplate(\n",
264 |         "    examples=examples,\n",
265 |         "    example_prompt=example_prompt,\n",
266 |         "    prefix=prefix,\n",
267 |         "    suffix=suffix,\n",
268 |         "    input_variables=[\"query\"],\n",
269 |         "    example_separator=\"\\n\\n\"\n",
270 |         ")\n",
271 |         "\n",
272 |         "# Create the LLMChain for the few_shot_prompt_template\n",
273 |         "chain = LLMChain(llm=llm, prompt=few_shot_prompt_template)\n",
274 |         "\n",
275 |         "# Run the LLMChain with input_data\n",
276 |         "input_data = {\"query\": \"How can I learn quantum computing?\"}\n",
277 |         "response = chain.run(input_data)\n",
278 |         "\n",
279 |         "print(response)"
280 |       ]
281 |     },
282 |     {
283 |       "cell_type": "code",
284 |       "execution_count": null,
285 |       "metadata": {
286 |         "id": "b0qYx377mGlR"
287 |       },
288 |       "outputs": [],
289 |       "source": [
290 |         "examples = [\n",
291 |         "    {\n",
292 |         "        \"query\": \"How do you feel today?\",\n",
293 |         "        \"answer\": \"As an AI, I don't have feelings, but I've got jokes!\"\n",
294 |         "    }, {\n",
295 |         "        \"query\": \"What is the speed of light?\",\n",
296 |         "        \"answer\": \"Fast enough to make a round trip around Earth 7.5 times in one second!\"\n",
297 |         "    }, {\n",
298 |         "        \"query\": \"What is a quantum computer?\",\n",
299 |         "        \"answer\": \"A magical box that harnesses the power of subatomic particles to solve complex problems.\"\n",
300 |         "    }, {\n",
301 |         "        \"query\": \"Who invented the telephone?\",\n",
302 |         "        \"answer\": \"Alexander Graham Bell, the original 'ringmaster'.\"\n",
303 |         "    }, {\n",
304 |         "        \"query\": \"What programming language is best for AI development?\",\n",
305 |         "        \"answer\": \"Python, because it's the only snake that won't bite.\"\n",
306 |         "    }, {\n",
307 |         "        \"query\": \"What is the capital of France?\",\n",
308 |         "        \"answer\": \"Paris, the city of love and baguettes.\"\n",
309 |         "    }, {\n",
310 |         "        \"query\": \"What is photosynthesis?\",\n",
311 |         "        \"answer\": \"A plant's way of saying 'I'll turn this sunlight into food. You're welcome, Earth.'\"\n",
312 |         "    }, {\n",
313 |         "        \"query\": \"What is the tallest mountain on Earth?\",\n",
314 |         "        \"answer\": \"Mount Everest, Earth's most impressive bump.\"\n",
315 |         "    }, {\n",
316 |         "        \"query\": \"What is the most abundant element in the universe?\",\n",
317 |         "        \"answer\": \"Hydrogen, the basic building block of cosmic smoothies.\"\n",
318 |         "    }, {\n",
319 |         "        \"query\": \"What is the largest mammal on Earth?\",\n",
320 |         "        \"answer\": \"The blue whale, the original heavyweight champion of the world.\"\n",
321 |         "    }, {\n",
322 |         "        \"query\": \"What is the fastest land animal?\",\n",
323 |         "        \"answer\": \"The cheetah, the ultimate sprinter of the animal kingdom.\"\n",
324 |         "    }, {\n",
325 |         "        \"query\": \"What is the square root of 144?\",\n",
326 |         "        \"answer\": \"12, the number of eggs you need for a really big omelette.\"\n",
327 |         "    }, {\n",
328 |         "        \"query\": \"What is the average temperature on Mars?\",\n",
329 |         "        \"answer\": \"Cold enough to make a Martian wish for a sweater and a hot cocoa.\"\n",
330 |         "    }\n",
331 |         "]"
332 |       ]
333 |     },
334 |     {
335 |       "cell_type": "code",
336 |       "execution_count": null,
337 |       "metadata": {
338 |         "id": "kdui3d7Amxgr"
339 |       },
340 |       "outputs": [],
341 |       "source": [
342 |         "from langchain.prompts.example_selector import LengthBasedExampleSelector\n",
343 |         "\n",
344 |         "example_selector = LengthBasedExampleSelector(\n",
345 |         "    examples=examples,\n",
346 |         "    example_prompt=example_prompt,\n",
347 |         "    max_length=100\n",
348 |         ")"
349 |       ]
350 |     },
351 |     {
352 |       "cell_type": "code",
353 |       "execution_count": null,
354 |       "metadata": {
355 |         "id": "lyiUSvXHmye2"
356 |       },
357 |       "outputs": [],
358 |       "source": [
359 |         "dynamic_prompt_template = FewShotPromptTemplate(\n",
360 |         "    example_selector=example_selector,\n",
361 |         "    example_prompt=example_prompt,\n",
362 |         "    prefix=prefix,\n",
363 |         "    suffix=suffix,\n",
364 |         "    input_variables=[\"query\"],\n",
365 |         "    example_separator=\"\\n\"\n",
366 |         ")"
367 |       ]
368 |     },
369 |     {
370 |       "cell_type": "code",
371 |       "execution_count": null,
372 |       "metadata": {
373 |         "colab": {
374 |           "base_uri": "https://localhost:8080/"
375 |         },
376 |         "id": "BT34c4BkmzqD",
377 |         "outputId": "3f17f8d8-9249-47b5-a193-f8b9ec9315c0"
378 |       },
379 |       "outputs": [
380 |         {
381 |           "name": "stdout",
382 |           "output_type": "stream",
383 |           "text": [
384 |             " Alexander Graham Bell, the man who made it possible to talk to people from miles away!\n"
385 |           ]
386 |         }
387 |       ],
388 |       "source": [
389 |         "from langchain import LLMChain, FewShotPromptTemplate, PromptTemplate\n",
390 |         "from langchain.chat_models import ChatOpenAI\n",
391 |         "\n",
392 |         "from langchain.prompts.example_selector import LengthBasedExampleSelector\n",
393 |         "\n",
394 |         "llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0)\n",
395 |         "\n",
396 |         "# Existing example and prompt definitions, and dynamic_prompt_template initialization\n",
397 |         "\n",
398 |         "# Create the LLMChain for the dynamic_prompt_template\n",
399 |         "chain = LLMChain(llm=llm, prompt=dynamic_prompt_template)\n",
400 |         "\n",
401 |         "# Run the LLMChain with input_data\n",
402 |         "input_data = {\"query\": \"Who invented the telephone?\"}\n",
403 |         "response = chain.run(input_data)\n",
404 |         "\n",
405 |         "print(response)"
406 |       ]
407 |     },
408 |     {
409 |       "cell_type": "code",
410 |       "execution_count": null,
411 |       "metadata": {
412 |         "id": "Ut0T3AsBm4lr"
413 |       },
414 |       "outputs": [],
415 |       "source": []
416 |     }
417 |   ],
418 |   "metadata": {
419 |     "colab": {
420 |       "authorship_tag": "ABX9TyMnfJSkoElZe73GO2aMw9nL",
421 |       "include_colab_link": true,
422 |       "provenance": []
423 |     },
424 |     "kernelspec": {
425 |       "display_name": "Python 3",
426 |       "name": "python3"
427 |     },
428 |     "language_info": {
429 |       "name": "python"
430 |     }
431 |   },
432 |   "nbformat": 4,
433 |   "nbformat_minor": 0
434 | }
435 | 


--------------------------------------------------------------------------------
/notebooks/Chapter 07 - Chains_and_Why_They_Are_Used.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "metadata": {
  6 |         "colab_type": "text",
  7 |         "id": "view-in-github"
  8 |       },
  9 |       "source": [
 10 |         "<a href=\"https://colab.research.google.com/github/towardsai/ragbook-notebooks/blob/main/notebooks/Chapter%2007%20-%20Chains_and_Why_They_Are_Used.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 11 |       ]
 12 |     },
 13 |     {
 14 |       "cell_type": "code",
 15 |       "execution_count": null,
 16 |       "metadata": {
 17 |         "colab": {
 18 |           "base_uri": "https://localhost:8080/"
 19 |         },
 20 |         "id": "mZgUJsmpUCUi",
 21 |         "outputId": "7c27b0d4-5fad-4618-e063-68e87de1e81d"
 22 |       },
 23 |       "outputs": [
 24 |         {
 25 |           "name": "stdout",
 26 |           "output_type": "stream",
 27 |           "text": [
 28 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m823.7/823.7 kB\u001b[0m \u001b[31m9.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
 29 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.9/71.9 kB\u001b[0m \u001b[31m2.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
 30 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.0/1.0 MB\u001b[0m \u001b[31m12.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
 31 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m90.0/90.0 kB\u001b[0m \u001b[31m5.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
 32 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m114.5/114.5 kB\u001b[0m \u001b[31m2.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
 33 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m268.8/268.8 kB\u001b[0m \u001b[31m11.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
 34 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m149.6/149.6 kB\u001b[0m \u001b[31m10.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
 35 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.1/49.1 kB\u001b[0m \u001b[31m3.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
 36 |             "\u001b[?25h"
 37 |           ]
 38 |         }
 39 |       ],
 40 |       "source": [
 41 |         "!pip install -q langchain==0.0.208 openai python-dotenv"
 42 |       ]
 43 |     },
 44 |     {
 45 |       "cell_type": "code",
 46 |       "execution_count": null,
 47 |       "metadata": {
 48 |         "id": "ucL9y4VoUJui"
 49 |       },
 50 |       "outputs": [],
 51 |       "source": [
 52 |         "!echo \"OPENAI_API_KEY='<API_KEY>'\" > .env"
 53 |       ]
 54 |     },
 55 |     {
 56 |       "cell_type": "code",
 57 |       "execution_count": null,
 58 |       "metadata": {
 59 |         "colab": {
 60 |           "base_uri": "https://localhost:8080/"
 61 |         },
 62 |         "id": "hAsSxpAyUOnF",
 63 |         "outputId": "89d0a63f-9c88-4a8f-958a-e97c7451a9f0"
 64 |       },
 65 |       "outputs": [
 66 |         {
 67 |           "data": {
 68 |             "text/plain": [
 69 |               "True"
 70 |             ]
 71 |           },
 72 |           "execution_count": 3,
 73 |           "metadata": {},
 74 |           "output_type": "execute_result"
 75 |         }
 76 |       ],
 77 |       "source": [
 78 |         "from dotenv import load_dotenv\n",
 79 |         "\n",
 80 |         "load_dotenv()"
 81 |       ]
 82 |     },
 83 |     {
 84 |       "cell_type": "markdown",
 85 |       "metadata": {
 86 |         "id": "N-J9LsngZsfp"
 87 |       },
 88 |       "source": [
 89 |         "# Calling"
 90 |       ]
 91 |     },
 92 |     {
 93 |       "cell_type": "markdown",
 94 |       "metadata": {
 95 |         "id": "44NeYe0GXe32"
 96 |       },
 97 |       "source": [
 98 |         "## __ call __"
 99 |       ]
100 |     },
101 |     {
102 |       "cell_type": "code",
103 |       "execution_count": null,
104 |       "metadata": {
105 |         "colab": {
106 |           "base_uri": "https://localhost:8080/"
107 |         },
108 |         "id": "oQn4558HUPvI",
109 |         "outputId": "35edbf54-a44e-41d7-806f-2d2e0f374970"
110 |       },
111 |       "outputs": [
112 |         {
113 |           "data": {
114 |             "text/plain": [
115 |               "{'word': 'artificial', 'text': '\\n\\nSynthetic'}"
116 |             ]
117 |           },
118 |           "execution_count": 7,
119 |           "metadata": {},
120 |           "output_type": "execute_result"
121 |         }
122 |       ],
123 |       "source": [
124 |         "from langchain import PromptTemplate, OpenAI, LLMChain\n",
125 |         "\n",
126 |         "prompt_template = \"What is a word to replace the following: {word}?\"\n",
127 |         "\n",
128 |         "# Set the \"OPENAI_API_KEY\" environment variable before running following line.\n",
129 |         "llm = OpenAI(model_name=\"gpt-3.5-turbo\", temperature=0)\n",
130 |         "\n",
131 |         "llm_chain = LLMChain(\n",
132 |         "    llm=llm,\n",
133 |         "    prompt=PromptTemplate.from_template(prompt_template)\n",
134 |         ")\n",
135 |         "llm_chain(\"artificial\")"
136 |       ]
137 |     },
138 |     {
139 |       "cell_type": "markdown",
140 |       "metadata": {
141 |         "id": "3lGTqvJxXjMZ"
142 |       },
143 |       "source": [
144 |         "## Apply"
145 |       ]
146 |     },
147 |     {
148 |       "cell_type": "code",
149 |       "execution_count": null,
150 |       "metadata": {
151 |         "colab": {
152 |           "base_uri": "https://localhost:8080/"
153 |         },
154 |         "id": "GhrI8CggVtJo",
155 |         "outputId": "f2673059-65b2-4ba4-df2c-063688277c88"
156 |       },
157 |       "outputs": [
158 |         {
159 |           "data": {
160 |             "text/plain": [
161 |               "[{'text': '\\n\\nSynthetic'}, {'text': '\\n\\nWisdom'}, {'text': '\\n\\nAutomaton'}]"
162 |             ]
163 |           },
164 |           "execution_count": 6,
165 |           "metadata": {},
166 |           "output_type": "execute_result"
167 |         }
168 |       ],
169 |       "source": [
170 |         "input_list = [\n",
171 |         "    {\"word\": \"artificial\"},\n",
172 |         "    {\"word\": \"intelligence\"},\n",
173 |         "    {\"word\": \"robot\"}\n",
174 |         "]\n",
175 |         "\n",
176 |         "llm_chain.apply(input_list)"
177 |       ]
178 |     },
179 |     {
180 |       "cell_type": "markdown",
181 |       "metadata": {
182 |         "id": "s5jB5LBJXk9s"
183 |       },
184 |       "source": [
185 |         "## Generate"
186 |       ]
187 |     },
188 |     {
189 |       "cell_type": "code",
190 |       "execution_count": null,
191 |       "metadata": {
192 |         "colab": {
193 |           "base_uri": "https://localhost:8080/"
194 |         },
195 |         "id": "FYi0o5KqV68n",
196 |         "outputId": "e656827f-2299-423e-f4a8-af09a1402fae"
197 |       },
198 |       "outputs": [
199 |         {
200 |           "data": {
201 |             "text/plain": [
202 |               "LLMResult(generations=[[Generation(text='\\n\\nSynthetic', generation_info={'finish_reason': 'stop', 'logprobs': None})], [Generation(text='\\n\\nWisdom', generation_info={'finish_reason': 'stop', 'logprobs': None})], [Generation(text='\\n\\nAutomaton', generation_info={'finish_reason': 'stop', 'logprobs': None})]], llm_output={'token_usage': {'prompt_tokens': 33, 'completion_tokens': 13, 'total_tokens': 46}, 'model_name': 'text-davinci-003'})"
203 |             ]
204 |           },
205 |           "execution_count": 7,
206 |           "metadata": {},
207 |           "output_type": "execute_result"
208 |         }
209 |       ],
210 |       "source": [
211 |         "llm_chain.generate(input_list)"
212 |       ]
213 |     },
214 |     {
215 |       "cell_type": "markdown",
216 |       "metadata": {
217 |         "id": "Ks4ej9ZXXm-E"
218 |       },
219 |       "source": [
220 |         "## Predict"
221 |       ]
222 |     },
223 |     {
224 |       "cell_type": "markdown",
225 |       "metadata": {
226 |         "id": "8q1BvtRiXuLg"
227 |       },
228 |       "source": [
229 |         "#### Multiple"
230 |       ]
231 |     },
232 |     {
233 |       "cell_type": "code",
234 |       "execution_count": null,
235 |       "metadata": {
236 |         "colab": {
237 |           "base_uri": "https://localhost:8080/",
238 |           "height": 35
239 |         },
240 |         "id": "OX4RrRrlXvWm",
241 |         "outputId": "c9cc9d8a-1585-4f6d-ca5f-e752c04c1fd2"
242 |       },
243 |       "outputs": [
244 |         {
245 |           "data": {
246 |             "application/vnd.google.colaboratory.intrinsic+json": {
247 |               "type": "string"
248 |             },
249 |             "text/plain": [
250 |               "'\\n\\nVentilator'"
251 |             ]
252 |           },
253 |           "execution_count": 63,
254 |           "metadata": {},
255 |           "output_type": "execute_result"
256 |         }
257 |       ],
258 |       "source": [
259 |         "prompt_template = \"Looking at the context of '{context}'. What is a approapriate word to replace the following: {word}?\"\n",
260 |         "\n",
261 |         "llm_chain = LLMChain(\n",
262 |         "    llm=llm,\n",
263 |         "    prompt=PromptTemplate(template=prompt_template, input_variables=[\"word\", \"context\"]))\n",
264 |         "\n",
265 |         "llm_chain.predict(word=\"fan\", context=\"object\")"
266 |       ]
267 |     },
268 |     {
269 |       "cell_type": "code",
270 |       "execution_count": null,
271 |       "metadata": {
272 |         "colab": {
273 |           "base_uri": "https://localhost:8080/",
274 |           "height": 35
275 |         },
276 |         "id": "NpdjaLWwYQQ1",
277 |         "outputId": "5ebe6455-95a2-4167-e235-d49982f9fe0b"
278 |       },
279 |       "outputs": [
280 |         {
281 |           "data": {
282 |             "application/vnd.google.colaboratory.intrinsic+json": {
283 |               "type": "string"
284 |             },
285 |             "text/plain": [
286 |               "'\\n\\nAdmirer'"
287 |             ]
288 |           },
289 |           "execution_count": 19,
290 |           "metadata": {},
291 |           "output_type": "execute_result"
292 |         }
293 |       ],
294 |       "source": [
295 |         "llm_chain.predict(word=\"fan\", context=\"humans\")"
296 |       ]
297 |     },
298 |     {
299 |       "cell_type": "code",
300 |       "execution_count": null,
301 |       "metadata": {
302 |         "colab": {
303 |           "base_uri": "https://localhost:8080/",
304 |           "height": 35
305 |         },
306 |         "id": "9P5gGwxClJeL",
307 |         "outputId": "560ea2b2-b540-42a8-c18a-19fcca6f0e70"
308 |       },
309 |       "outputs": [
310 |         {
311 |           "data": {
312 |             "application/vnd.google.colaboratory.intrinsic+json": {
313 |               "type": "string"
314 |             },
315 |             "text/plain": [
316 |               "'\\n\\nVentilator'"
317 |             ]
318 |           },
319 |           "execution_count": 64,
320 |           "metadata": {},
321 |           "output_type": "execute_result"
322 |         }
323 |       ],
324 |       "source": [
325 |         "# llm_chain.run(word=\"fan\", context=\"object\")"
326 |       ]
327 |     },
328 |     {
329 |       "cell_type": "markdown",
330 |       "metadata": {
331 |         "id": "UNPOT6iAbt1l"
332 |       },
333 |       "source": [
334 |         "### from string"
335 |       ]
336 |     },
337 |     {
338 |       "cell_type": "code",
339 |       "execution_count": null,
340 |       "metadata": {
341 |         "id": "h6T5_9k2bx_N"
342 |       },
343 |       "outputs": [],
344 |       "source": [
345 |         "template = \"\"\"Looking at the context of '{context}'. What is a approapriate word to replace the following: {word}?\"\"\"\n",
346 |         "llm_chain = LLMChain.from_string(llm=llm, template=template)"
347 |       ]
348 |     },
349 |     {
350 |       "cell_type": "code",
351 |       "execution_count": null,
352 |       "metadata": {
353 |         "colab": {
354 |           "base_uri": "https://localhost:8080/",
355 |           "height": 35
356 |         },
357 |         "id": "AkE6wx8Vb9Ns",
358 |         "outputId": "dbedb888-49d7-43da-88a5-05472fbea85d"
359 |       },
360 |       "outputs": [
361 |         {
362 |           "data": {
363 |             "application/vnd.google.colaboratory.intrinsic+json": {
364 |               "type": "string"
365 |             },
366 |             "text/plain": [
367 |               "'\\n\\nVentilator'"
368 |             ]
369 |           },
370 |           "execution_count": 35,
371 |           "metadata": {},
372 |           "output_type": "execute_result"
373 |         }
374 |       ],
375 |       "source": [
376 |         "llm_chain.predict(word=\"fan\", context=\"object\")"
377 |       ]
378 |     },
379 |     {
380 |       "cell_type": "markdown",
381 |       "metadata": {
382 |         "id": "TRIaIXSKZu6U"
383 |       },
384 |       "source": [
385 |         "# Parsers"
386 |       ]
387 |     },
388 |     {
389 |       "cell_type": "code",
390 |       "execution_count": null,
391 |       "metadata": {
392 |         "colab": {
393 |           "base_uri": "https://localhost:8080/",
394 |           "height": 53
395 |         },
396 |         "id": "aIEZWDQtZwKw",
397 |         "outputId": "aa45ef77-7ea6-42ea-c613-94ac0accfb88"
398 |       },
399 |       "outputs": [
400 |         {
401 |           "data": {
402 |             "application/vnd.google.colaboratory.intrinsic+json": {
403 |               "type": "string"
404 |             },
405 |             "text/plain": [
406 |               "'\\n\\nSynthetic, Manufactured, Imitation, Fabricated, Fake, Simulated, Artificial Intelligence, Automated, Constructed, Programmed, Mechanical, Processed, Algorithmic, Generated.'"
407 |             ]
408 |           },
409 |           "execution_count": 36,
410 |           "metadata": {},
411 |           "output_type": "execute_result"
412 |         }
413 |       ],
414 |       "source": [
415 |         "from langchain.output_parsers import CommaSeparatedListOutputParser\n",
416 |         "\n",
417 |         "output_parser = CommaSeparatedListOutputParser()\n",
418 |         "template = \"\"\"List all possible words as substitute for 'artificial' as comma separated.\"\"\"\n",
419 |         "\n",
420 |         "llm_chain = LLMChain(\n",
421 |         "    llm=llm,\n",
422 |         "    prompt=PromptTemplate(template=template, input_variables=[], output_parser=output_parser))\n",
423 |         "\n",
424 |         "llm_chain.predict()"
425 |       ]
426 |     },
427 |     {
428 |       "cell_type": "code",
429 |       "execution_count": null,
430 |       "metadata": {
431 |         "colab": {
432 |           "base_uri": "https://localhost:8080/"
433 |         },
434 |         "id": "mJ18G38-aXcE",
435 |         "outputId": "248f1533-6948-4f94-be8d-1ceb88433c20"
436 |       },
437 |       "outputs": [
438 |         {
439 |           "data": {
440 |             "text/plain": [
441 |               "['Synthetic',\n",
442 |               " 'Manufactured',\n",
443 |               " 'Imitation',\n",
444 |               " 'Fabricated',\n",
445 |               " 'Fake',\n",
446 |               " 'Simulated',\n",
447 |               " 'Artificial Intelligence',\n",
448 |               " 'Automated',\n",
449 |               " 'Constructed',\n",
450 |               " 'Programmed',\n",
451 |               " 'Processed',\n",
452 |               " 'Mechanical',\n",
453 |               " 'Man-Made',\n",
454 |               " 'Lab-Created',\n",
455 |               " 'Artificial Neural Network.']"
456 |             ]
457 |           },
458 |           "execution_count": 37,
459 |           "metadata": {},
460 |           "output_type": "execute_result"
461 |         }
462 |       ],
463 |       "source": [
464 |         "llm_chain.predict_and_parse()"
465 |       ]
466 |     },
467 |     {
468 |       "cell_type": "markdown",
469 |       "metadata": {
470 |         "id": "2b8_oGlAcx3F"
471 |       },
472 |       "source": [
473 |         "# Conversational Chain"
474 |       ]
475 |     },
476 |     {
477 |       "cell_type": "code",
478 |       "execution_count": null,
479 |       "metadata": {
480 |         "colab": {
481 |           "base_uri": "https://localhost:8080/"
482 |         },
483 |         "id": "OTYpcUktae6w",
484 |         "outputId": "3c7c83dd-5ba6-44f6-bbe7-baf153c97232"
485 |       },
486 |       "outputs": [
487 |         {
488 |           "data": {
489 |             "text/plain": [
490 |               "['Synthetic', 'Manufactured', 'Imitation']"
491 |             ]
492 |           },
493 |           "execution_count": 57,
494 |           "metadata": {},
495 |           "output_type": "execute_result"
496 |         }
497 |       ],
498 |       "source": [
499 |         "from langchain.chains import ConversationChain\n",
500 |         "from langchain.memory import ConversationBufferMemory\n",
501 |         "\n",
502 |         "template = \"\"\"List all possible words as substitute for 'artificial' as comma separated.\n",
503 |         "\n",
504 |         "Current conversation:\n",
505 |         "{history}\n",
506 |         "\n",
507 |         "{input}\"\"\"\n",
508 |         "\n",
509 |         "conversation = ConversationChain(\n",
510 |         "    llm=llm,\n",
511 |         "    prompt=PromptTemplate(template=template, input_variables=[\"history\", \"input\"], output_parser=output_parser),\n",
512 |         "    memory=ConversationBufferMemory())\n",
513 |         "\n",
514 |         "conversation.predict_and_parse(input=\"Answer briefly. write the first 3 options.\")"
515 |       ]
516 |     },
517 |     {
518 |       "cell_type": "code",
519 |       "execution_count": null,
520 |       "metadata": {
521 |         "colab": {
522 |           "base_uri": "https://localhost:8080/"
523 |         },
524 |         "id": "q7_vgaBgeWWG",
525 |         "outputId": "0e9c50d3-a48f-4083-bafe-4944a53dda26"
526 |       },
527 |       "outputs": [
528 |         {
529 |           "data": {
530 |             "text/plain": [
531 |               "['Fabricated', 'Simulated', 'Automated', 'Constructed']"
532 |             ]
533 |           },
534 |           "execution_count": 58,
535 |           "metadata": {},
536 |           "output_type": "execute_result"
537 |         }
538 |       ],
539 |       "source": [
540 |         "conversation.predict_and_parse(input=\"And the next 4?\")"
541 |       ]
542 |     },
543 |     {
544 |       "cell_type": "markdown",
545 |       "metadata": {
546 |         "id": "0Kz12V6FhjC3"
547 |       },
548 |       "source": [
549 |         "# Debug"
550 |       ]
551 |     },
552 |     {
553 |       "cell_type": "code",
554 |       "execution_count": null,
555 |       "metadata": {
556 |         "colab": {
557 |           "base_uri": "https://localhost:8080/"
558 |         },
559 |         "id": "zGRoPJ1xhtTE",
560 |         "outputId": "4bfd9840-47a9-4967-8c9d-e6a9ff0fbaa5"
561 |       },
562 |       "outputs": [
563 |         {
564 |           "name": "stdout",
565 |           "output_type": "stream",
566 |           "text": [
567 |             "\n",
568 |             "\n",
569 |             "\u001b[1m> Entering new ConversationChain chain...\u001b[0m\n",
570 |             "Prompt after formatting:\n",
571 |             "\u001b[32;1m\u001b[1;3mList all possible words as substitute for 'artificial' as comma separated.\n",
572 |             "\n",
573 |             "Current conversation:\n",
574 |             "\n",
575 |             "\n",
576 |             "Answer briefly. write the first 3 options.\u001b[0m\n",
577 |             "\n",
578 |             "\u001b[1m> Finished chain.\u001b[0m\n"
579 |           ]
580 |         },
581 |         {
582 |           "data": {
583 |             "text/plain": [
584 |               "['Synthetic', 'Manufactured', 'Imitation']"
585 |             ]
586 |           },
587 |           "execution_count": 59,
588 |           "metadata": {},
589 |           "output_type": "execute_result"
590 |         }
591 |       ],
592 |       "source": [
593 |         "conversation = ConversationChain(\n",
594 |         "    llm=llm,\n",
595 |         "    prompt=PromptTemplate(template=template, input_variables=[\"history\", \"input\"], output_parser=output_parser),\n",
596 |         "    memory=ConversationBufferMemory(),\n",
597 |         "    verbose=True)\n",
598 |         "\n",
599 |         "conversation.predict_and_parse(input=\"Answer briefly. write the first 3 options.\")"
600 |       ]
601 |     },
602 |     {
603 |       "cell_type": "markdown",
604 |       "metadata": {
605 |         "id": "8XI9e40ui1yX"
606 |       },
607 |       "source": [
608 |         "# Sequential Chain"
609 |       ]
610 |     },
611 |     {
612 |       "cell_type": "code",
613 |       "execution_count": null,
614 |       "metadata": {
615 |         "id": "A16wajt2hxLE"
616 |       },
617 |       "outputs": [],
618 |       "source": [
619 |         "# from langchain.chains import SimpleSequentialChain\n",
620 |         "# overall_chain = SimpleSequentialChain(chains=[chain_one, chain_two], verbose=True)"
621 |       ]
622 |     },
623 |     {
624 |       "cell_type": "markdown",
625 |       "metadata": {
626 |         "id": "fs4Chc0iKaj3"
627 |       },
628 |       "source": [
629 |         "# Custom Chain"
630 |       ]
631 |     },
632 |     {
633 |       "cell_type": "code",
634 |       "execution_count": null,
635 |       "metadata": {
636 |         "id": "3tCjI4DtKbTG"
637 |       },
638 |       "outputs": [],
639 |       "source": [
640 |         "from langchain.chains import LLMChain\n",
641 |         "from langchain.chains.base import Chain\n",
642 |         "\n",
643 |         "from typing import Dict, List\n",
644 |         "\n",
645 |         "\n",
646 |         "class ConcatenateChain(Chain):\n",
647 |         "    chain_1: LLMChain\n",
648 |         "    chain_2: LLMChain\n",
649 |         "\n",
650 |         "    @property\n",
651 |         "    def input_keys(self) -> List[str]:\n",
652 |         "        # Union of the input keys of the two chains.\n",
653 |         "        all_input_vars = set(self.chain_1.input_keys).union(set(self.chain_2.input_keys))\n",
654 |         "        return list(all_input_vars)\n",
655 |         "\n",
656 |         "    @property\n",
657 |         "    def output_keys(self) -> List[str]:\n",
658 |         "        return ['concat_output']\n",
659 |         "\n",
660 |         "    def _call(self, inputs: Dict[str, str]) -> Dict[str, str]:\n",
661 |         "        output_1 = self.chain_1.run(inputs)\n",
662 |         "        output_2 = self.chain_2.run(inputs)\n",
663 |         "        return {'concat_output': output_1 + output_2}"
664 |       ]
665 |     },
666 |     {
667 |       "cell_type": "code",
668 |       "execution_count": null,
669 |       "metadata": {
670 |         "colab": {
671 |           "base_uri": "https://localhost:8080/"
672 |         },
673 |         "id": "h-W3ZqALLbwP",
674 |         "outputId": "83f65c01-5573-403f-9180-7a2b60a41b57"
675 |       },
676 |       "outputs": [
677 |         {
678 |           "name": "stdout",
679 |           "output_type": "stream",
680 |           "text": [
681 |             "Concatenated output:\n",
682 |             "\n",
683 |             "\n",
684 |             "Artificial means something that is not natural or made by humans, but rather created or produced by artificial means.\n",
685 |             "\n",
686 |             "Synthetic\n"
687 |           ]
688 |         }
689 |       ],
690 |       "source": [
691 |         "prompt_1 = PromptTemplate(\n",
692 |         "    input_variables=[\"word\"],\n",
693 |         "    template=\"What is the meaning of the following word '{word}'?\",\n",
694 |         ")\n",
695 |         "chain_1 = LLMChain(llm=llm, prompt=prompt_1)\n",
696 |         "\n",
697 |         "prompt_2 = PromptTemplate(\n",
698 |         "    input_variables=[\"word\"],\n",
699 |         "    template=\"What is a word to replace the following: {word}?\",\n",
700 |         ")\n",
701 |         "chain_2 = LLMChain(llm=llm, prompt=prompt_2)\n",
702 |         "\n",
703 |         "concat_chain = ConcatenateChain(chain_1=chain_1, chain_2=chain_2)\n",
704 |         "concat_output = concat_chain.run(\"artificial\")\n",
705 |         "print(f\"Concatenated output:\\n{concat_output}\")"
706 |       ]
707 |     }
708 |   ],
709 |   "metadata": {
710 |     "colab": {
711 |       "authorship_tag": "ABX9TyPESOSrhtfDiEeFVbO8r7kg",
712 |       "include_colab_link": true,
713 |       "provenance": []
714 |     },
715 |     "kernelspec": {
716 |       "display_name": "Python 3",
717 |       "name": "python3"
718 |     },
719 |     "language_info": {
720 |       "name": "python"
721 |     }
722 |   },
723 |   "nbformat": 4,
724 |   "nbformat_minor": 0
725 | }
726 | 


--------------------------------------------------------------------------------
/notebooks/Chapter 08 - Mastering_Advanced_RAG.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "provenance": [],
  7 |       "authorship_tag": "ABX9TyMXzjOfaH6SdmjQLFXDRt3D",
  8 |       "include_colab_link": true
  9 |     },
 10 |     "kernelspec": {
 11 |       "name": "python3",
 12 |       "display_name": "Python 3"
 13 |     },
 14 |     "language_info": {
 15 |       "name": "python"
 16 |     }
 17 |   },
 18 |   "cells": [
 19 |     {
 20 |       "cell_type": "markdown",
 21 |       "metadata": {
 22 |         "id": "view-in-github",
 23 |         "colab_type": "text"
 24 |       },
 25 |       "source": [
 26 |         "<a href=\"https://colab.research.google.com/github/towardsai/ragbook-notebooks/blob/main/notebooks/Chapter%2008%20-%20Mastering_Advanced_RAG.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 27 |       ]
 28 |     },
 29 |     {
 30 |       "cell_type": "code",
 31 |       "execution_count": null,
 32 |       "metadata": {
 33 |         "id": "iSIIb6ey0POS"
 34 |       },
 35 |       "outputs": [],
 36 |       "source": [
 37 |         "!pip install -q llama-index==0.9.14.post3 deeplake==3.8.8 openai==1.3.8 cohere==4.37"
 38 |       ]
 39 |     },
 40 |     {
 41 |       "cell_type": "code",
 42 |       "source": [
 43 |         "import os\n",
 44 |         "\n",
 45 |         "os.environ['OPENAI_API_KEY'] = '<YOUR_OPENAI_API_KEY>'\n",
 46 |         "os.environ['ACTIVELOOP_TOKEN'] = '<YOUR_ACTIVELOOP_API_KEY>'\n",
 47 |         "os.environ['COHERE_API_KEY'] = '<YOUR_COHERE_API_KEY>'"
 48 |       ],
 49 |       "metadata": {
 50 |         "id": "Ul1SFfON0TD1"
 51 |       },
 52 |       "execution_count": null,
 53 |       "outputs": []
 54 |     },
 55 |     {
 56 |       "cell_type": "code",
 57 |       "source": [
 58 |         "!mkdir -p './paul_graham/'\n",
 59 |         "!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt' -O './paul_graham/paul_graham_essay.txt'"
 60 |       ],
 61 |       "metadata": {
 62 |         "colab": {
 63 |           "base_uri": "https://localhost:8080/"
 64 |         },
 65 |         "id": "m7rXjNi00bWW",
 66 |         "outputId": "d9e073ef-a76d-4977-c1e9-1afb3199d4f6"
 67 |       },
 68 |       "execution_count": null,
 69 |       "outputs": [
 70 |         {
 71 |           "output_type": "stream",
 72 |           "name": "stdout",
 73 |           "text": [
 74 |             "--2023-12-13 17:18:03--  https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt\n",
 75 |             "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n",
 76 |             "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n",
 77 |             "HTTP request sent, awaiting response... 200 OK\n",
 78 |             "Length: 75042 (73K) [text/plain]\n",
 79 |             "Saving to: ‘./paul_graham/paul_graham_essay.txt’\n",
 80 |             "\n",
 81 |             "\r          ./paul_gr   0%[                    ]       0  --.-KB/s               \r./paul_graham/paul_ 100%[===================>]  73.28K  --.-KB/s    in 0.02s   \n",
 82 |             "\n",
 83 |             "2023-12-13 17:18:03 (3.18 MB/s) - ‘./paul_graham/paul_graham_essay.txt’ saved [75042/75042]\n",
 84 |             "\n"
 85 |           ]
 86 |         }
 87 |       ]
 88 |     },
 89 |     {
 90 |       "cell_type": "code",
 91 |       "source": [
 92 |         "from llama_index import SimpleDirectoryReader\n",
 93 |         "\n",
 94 |         "# load documents\n",
 95 |         "documents = SimpleDirectoryReader(\"./paul_graham\").load_data()"
 96 |       ],
 97 |       "metadata": {
 98 |         "id": "C1Q0cMtj0kOs"
 99 |       },
100 |       "execution_count": null,
101 |       "outputs": []
102 |     },
103 |     {
104 |       "cell_type": "code",
105 |       "source": [
106 |         "from llama_index import ServiceContext\n",
107 |         "\n",
108 |         "# initialize service context (set chunk size)\n",
109 |         "service_context = ServiceContext.from_defaults(chunk_size=512, chunk_overlap=64)\n",
110 |         "node_parser = service_context.node_parser\n",
111 |         "\n",
112 |         "nodes = node_parser.get_nodes_from_documents(documents)"
113 |       ],
114 |       "metadata": {
115 |         "id": "h9CPwNm10vj2"
116 |       },
117 |       "execution_count": null,
118 |       "outputs": []
119 |     },
120 |     {
121 |       "cell_type": "code",
122 |       "source": [
123 |         "from llama_index.vector_stores import DeepLakeVectorStore\n",
124 |         "\n",
125 |         "my_activeloop_org_id = \"genai360\"\n",
126 |         "my_activeloop_dataset_name = \"LlamaIndex_paulgraham_essay\"\n",
127 |         "dataset_path = f\"hub://{my_activeloop_org_id}/{my_activeloop_dataset_name}\"\n",
128 |         "\n",
129 |         "# Create an index over the documnts\n",
130 |         "vector_store = DeepLakeVectorStore(dataset_path=dataset_path, overwrite=False)"
131 |       ],
132 |       "metadata": {
133 |         "colab": {
134 |           "base_uri": "https://localhost:8080/"
135 |         },
136 |         "id": "96K3MP511gh7",
137 |         "outputId": "e0d44a22-f87f-4a99-e766-a0675cc2f22d"
138 |       },
139 |       "execution_count": null,
140 |       "outputs": [
141 |         {
142 |           "output_type": "stream",
143 |           "name": "stdout",
144 |           "text": [
145 |             "Your Deep Lake dataset has been successfully created!\n"
146 |           ]
147 |         },
148 |         {
149 |           "output_type": "stream",
150 |           "name": "stderr",
151 |           "text": []
152 |         }
153 |       ]
154 |     },
155 |     {
156 |       "cell_type": "code",
157 |       "source": [
158 |         "from llama_index.storage.storage_context import StorageContext\n",
159 |         "\n",
160 |         "storage_context = StorageContext.from_defaults(vector_store=vector_store)\n",
161 |         "storage_context.docstore.add_documents(nodes)"
162 |       ],
163 |       "metadata": {
164 |         "id": "FaH-fN_b1PQo"
165 |       },
166 |       "execution_count": null,
167 |       "outputs": []
168 |     },
169 |     {
170 |       "cell_type": "code",
171 |       "source": [
172 |         "from llama_index import VectorStoreIndex\n",
173 |         "\n",
174 |         "vector_index = VectorStoreIndex(nodes, storage_context=storage_context)"
175 |       ],
176 |       "metadata": {
177 |         "colab": {
178 |           "base_uri": "https://localhost:8080/"
179 |         },
180 |         "id": "JBGiu_3j17mX",
181 |         "outputId": "eb0a8684-63c9-4190-8388-9304abc12768"
182 |       },
183 |       "execution_count": null,
184 |       "outputs": [
185 |         {
186 |           "output_type": "stream",
187 |           "name": "stdout",
188 |           "text": [
189 |             "Uploading data to deeplake dataset.\n"
190 |           ]
191 |         },
192 |         {
193 |           "output_type": "stream",
194 |           "name": "stderr",
195 |           "text": [
196 |             "100%|██████████| 40/40 [00:00<00:00, 82.86it/s]\n",
197 |             "\\"
198 |           ]
199 |         },
200 |         {
201 |           "output_type": "stream",
202 |           "name": "stdout",
203 |           "text": [
204 |             "Dataset(path='hub://genai360/LlamaIndex_paulgraham_essay', tensors=['text', 'metadata', 'embedding', 'id'])\n",
205 |             "\n",
206 |             "  tensor      htype      shape      dtype  compression\n",
207 |             "  -------    -------    -------    -------  ------- \n",
208 |             "   text       text      (40, 1)      str     None   \n",
209 |             " metadata     json      (40, 1)      str     None   \n",
210 |             " embedding  embedding  (40, 1536)  float32   None   \n",
211 |             "    id        text      (40, 1)      str     None   \n"
212 |           ]
213 |         },
214 |         {
215 |           "output_type": "stream",
216 |           "name": "stderr",
217 |           "text": [
218 |             "\r \r"
219 |           ]
220 |         }
221 |       ]
222 |     },
223 |     {
224 |       "cell_type": "code",
225 |       "source": [
226 |         "query_engine = vector_index.as_query_engine(streaming=True, similarity_top_k=10)"
227 |       ],
228 |       "metadata": {
229 |         "id": "uOoF3OYa2a-q"
230 |       },
231 |       "execution_count": null,
232 |       "outputs": []
233 |     },
234 |     {
235 |       "cell_type": "code",
236 |       "source": [
237 |         "streaming_response = query_engine.query(\n",
238 |         "    \"What does Paul Graham do?\",\n",
239 |         ")\n",
240 |         "streaming_response.print_response_stream()"
241 |       ],
242 |       "metadata": {
243 |         "colab": {
244 |           "base_uri": "https://localhost:8080/"
245 |         },
246 |         "id": "QpbkU6GR2mUC",
247 |         "outputId": "515e4b9f-9508-41c1-ff75-52a9b6d9c0c2"
248 |       },
249 |       "execution_count": null,
250 |       "outputs": [
251 |         {
252 |           "output_type": "stream",
253 |           "name": "stdout",
254 |           "text": [
255 |             "Paul Graham is involved in various activities. He has worked on developing software, including a programming language called Bel. He has also written essays on various topics and has been involved in the startup world. Additionally, he is one of the founders of Y Combinator, a startup accelerator program that funds and supports startups."
256 |           ]
257 |         }
258 |       ]
259 |     },
260 |     {
261 |       "cell_type": "markdown",
262 |       "source": [
263 |         "# SubQuestion Query Engine"
264 |       ],
265 |       "metadata": {
266 |         "id": "9l7VRFwBL3oS"
267 |       }
268 |     },
269 |     {
270 |       "cell_type": "code",
271 |       "source": [
272 |         "query_engine = vector_index.as_query_engine(similarity_top_k=10)"
273 |       ],
274 |       "metadata": {
275 |         "id": "semCwp2XMmXq"
276 |       },
277 |       "execution_count": null,
278 |       "outputs": []
279 |     },
280 |     {
281 |       "cell_type": "code",
282 |       "source": [
283 |         "from llama_index.tools import QueryEngineTool, ToolMetadata\n",
284 |         "from llama_index.query_engine import SubQuestionQueryEngine\n",
285 |         "\n",
286 |         "query_engine_tools = [\n",
287 |         "    QueryEngineTool(\n",
288 |         "        query_engine=query_engine,\n",
289 |         "        metadata=ToolMetadata(\n",
290 |         "            name=\"pg_essay\",\n",
291 |         "            description=\"Paul Graham essay on What I Worked On\",\n",
292 |         "        ),\n",
293 |         "    ),\n",
294 |         "]\n",
295 |         "\n",
296 |         "query_engine = SubQuestionQueryEngine.from_defaults(\n",
297 |         "    query_engine_tools=query_engine_tools,\n",
298 |         "    service_context=service_context,\n",
299 |         "    use_async=True,\n",
300 |         ")"
301 |       ],
302 |       "metadata": {
303 |         "id": "UYfJFCaGL3B_"
304 |       },
305 |       "execution_count": null,
306 |       "outputs": []
307 |     },
308 |     {
309 |       "cell_type": "code",
310 |       "source": [
311 |         "response = query_engine.query(\n",
312 |         "    \"How was Paul Grahams life different before, during, and after YC?\"\n",
313 |         ")"
314 |       ],
315 |       "metadata": {
316 |         "colab": {
317 |           "base_uri": "https://localhost:8080/"
318 |         },
319 |         "id": "rDRMYb9HMH7a",
320 |         "outputId": "7e4eadba-7868-4927-b6a3-cc336f99b058"
321 |       },
322 |       "execution_count": null,
323 |       "outputs": [
324 |         {
325 |           "output_type": "stream",
326 |           "name": "stdout",
327 |           "text": [
328 |             "Generated 3 sub questions.\n",
329 |             "\u001b[1;3;38;2;237;90;200m[pg_essay] Q: What did Paul Graham work on before YC?\n",
330 |             "\u001b[0m\u001b[1;3;38;2;90;149;237m[pg_essay] Q: What did Paul Graham work on during YC?\n",
331 |             "\u001b[0m\u001b[1;3;38;2;11;159;203m[pg_essay] Q: What did Paul Graham work on after YC?\n",
332 |             "\u001b[0m\u001b[1;3;38;2;90;149;237m[pg_essay] A: During YC, Paul Graham worked on writing essays and working on YC itself.\n",
333 |             "\u001b[0m\u001b[1;3;38;2;237;90;200m[pg_essay] A: Before YC, Paul Graham worked on a variety of projects. He wrote essays, worked on YC's internal software in Arc, and also worked on a new version of Arc. Additionally, he started Hacker News, which was originally meant to be a news aggregator for startup founders.\n",
334 |             "\u001b[0m\u001b[1;3;38;2;11;159;203m[pg_essay] A: After Y Combinator (YC), Paul Graham worked on various projects. He focused on writing essays and also worked on a programming language called Arc. However, he gradually reduced his work on Arc due to time constraints and the infrastructure dependency on it. Additionally, he engaged in painting for a period of time. Later, he worked on a new version of Arc called Bel, which he worked on intensively and found satisfying. He also continued writing essays and exploring other potential projects.\n",
335 |             "\u001b[0m"
336 |           ]
337 |         }
338 |       ]
339 |     },
340 |     {
341 |       "cell_type": "code",
342 |       "source": [
343 |         "print( \">>> The final response:\\n\", response )"
344 |       ],
345 |       "metadata": {
346 |         "colab": {
347 |           "base_uri": "https://localhost:8080/"
348 |         },
349 |         "id": "pRvnUf7zMLBF",
350 |         "outputId": "4f726217-13b5-4fb6-ee60-741f29358fe7"
351 |       },
352 |       "execution_count": null,
353 |       "outputs": [
354 |         {
355 |           "output_type": "stream",
356 |           "name": "stdout",
357 |           "text": [
358 |             ">>> The final response:\n",
359 |             " Paul Graham's life was different before, during, and after YC. Before YC, he worked on a variety of projects including writing essays, developing YC's internal software in Arc, and creating Hacker News. During YC, his focus shifted to writing essays and working on YC itself. After YC, he continued writing essays but also worked on various projects such as developing the programming language Arc and later its new version called Bel. He also explored other potential projects and engaged in painting for a period of time. Overall, his work and interests evolved throughout these different phases of his life.\n"
360 |           ]
361 |         }
362 |       ]
363 |     },
364 |     {
365 |       "cell_type": "markdown",
366 |       "source": [
367 |         "# Cohere Rerank"
368 |       ],
369 |       "metadata": {
370 |         "id": "t8jeNI3Igwqv"
371 |       }
372 |     },
373 |     {
374 |       "cell_type": "code",
375 |       "source": [
376 |         "import cohere\n",
377 |         "\n",
378 |         "# Get your cohere API key on: www.cohere.com\n",
379 |         "co = cohere.Client(os.environ['COHERE_API_KEY'])\n",
380 |         "\n",
381 |         "# Example query and passages\n",
382 |         "query = \"What is the capital of the United States?\"\n",
383 |         "documents = [\n",
384 |         "   \"Carson City is the capital city of the American state of Nevada. At the  2010 United States Census, Carson City had a population of 55,274.\",\n",
385 |         "   \"The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean that are a political division controlled by the United States. Its capital is Saipan.\",\n",
386 |         "   \"Charlotte Amalie is the capital and largest city of the United States Virgin Islands. It has about 20,000 people. The city is on the island of Saint Thomas.\",\n",
387 |         "   \"Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district. \",\n",
388 |         "   \"Capital punishment (the death penalty) has existed in the United States since before the United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states.\",\n",
389 |         "   \"North Dakota is a state in the United States. 672,591 people lived in North Dakota in the year 2010. The capital and seat of government is Bismarck.\"\n",
390 |         "   ]"
391 |       ],
392 |       "metadata": {
393 |         "id": "mVJOtIHQgxlf"
394 |       },
395 |       "execution_count": null,
396 |       "outputs": []
397 |     },
398 |     {
399 |       "cell_type": "code",
400 |       "source": [
401 |         "results = co.rerank(query=query, documents=documents, top_n=3, model='rerank-english-v2.0') # Change top_n to change the number of results returned. If top_n is not passed, all results will be returned.\n",
402 |         "\n",
403 |         "for idx, r in enumerate(results):\n",
404 |         "  print(f\"Document Rank: {idx + 1}, Document Index: {r.index}\")\n",
405 |         "  print(f\"Document: {r.document['text']}\")\n",
406 |         "  print(f\"Relevance Score: {r.relevance_score:.2f}\")\n",
407 |         "  print(\"\\n\")"
408 |       ],
409 |       "metadata": {
410 |         "colab": {
411 |           "base_uri": "https://localhost:8080/"
412 |         },
413 |         "id": "O0nOLQmmg3yY",
414 |         "outputId": "d5269adb-2fc3-4a0e-d977-6e0f44ae0fd5"
415 |       },
416 |       "execution_count": null,
417 |       "outputs": [
418 |         {
419 |           "output_type": "stream",
420 |           "name": "stdout",
421 |           "text": [
422 |             "Document Rank: 1, Document Index: 3\n",
423 |             "Document: Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district. \n",
424 |             "Relevance Score: 0.98\n",
425 |             "\n",
426 |             "\n",
427 |             "Document Rank: 2, Document Index: 1\n",
428 |             "Document: The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean that are a political division controlled by the United States. Its capital is Saipan.\n",
429 |             "Relevance Score: 0.30\n",
430 |             "\n",
431 |             "\n",
432 |             "Document Rank: 3, Document Index: 4\n",
433 |             "Document: Capital punishment (the death penalty) has existed in the United States since before the United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states.\n",
434 |             "Relevance Score: 0.28\n",
435 |             "\n",
436 |             "\n"
437 |           ]
438 |         }
439 |       ]
440 |     },
441 |     {
442 |       "cell_type": "markdown",
443 |       "source": [
444 |         "# Cohere in LlamaIndex"
445 |       ],
446 |       "metadata": {
447 |         "id": "bifEKCqihBBy"
448 |       }
449 |     },
450 |     {
451 |       "cell_type": "code",
452 |       "source": [
453 |         "import os\n",
454 |         "from llama_index.postprocessor.cohere_rerank import CohereRerank\n",
455 |         "\n",
456 |         "\n",
457 |         "cohere_rerank = CohereRerank(api_key=os.environ['COHERE_API_KEY'], top_n=2)"
458 |       ],
459 |       "metadata": {
460 |         "id": "AtRoHfgClgqS"
461 |       },
462 |       "execution_count": null,
463 |       "outputs": []
464 |     },
465 |     {
466 |       "cell_type": "code",
467 |       "source": [
468 |         "query_engine = vector_index.as_query_engine(\n",
469 |         "    similarity_top_k=10,\n",
470 |         "    node_postprocessors=[cohere_rerank],\n",
471 |         ")"
472 |       ],
473 |       "metadata": {
474 |         "id": "qpU4Qwo3lgns"
475 |       },
476 |       "execution_count": null,
477 |       "outputs": []
478 |     },
479 |     {
480 |       "cell_type": "code",
481 |       "source": [
482 |         "response = query_engine.query(\n",
483 |         "    \"What did Sam Altman do in this essay?\",\n",
484 |         ")\n",
485 |         "print( response )"
486 |       ],
487 |       "metadata": {
488 |         "colab": {
489 |           "base_uri": "https://localhost:8080/"
490 |         },
491 |         "id": "9uvVyML8lgkx",
492 |         "outputId": "d29ad8d4-cd75-477c-a0fc-3272d9ff0490"
493 |       },
494 |       "execution_count": null,
495 |       "outputs": [
496 |         {
497 |           "output_type": "stream",
498 |           "name": "stdout",
499 |           "text": [
500 |             "Sam Altman was asked if he wanted to be the president of Y Combinator (YC) and initially said no. However, after persistent persuasion, he eventually agreed to take over as president starting with the winter 2014 batch.\n"
501 |           ]
502 |         }
503 |       ]
504 |     },
505 |     {
506 |       "cell_type": "code",
507 |       "source": [],
508 |       "metadata": {
509 |         "id": "9rXR7WjalgiL"
510 |       },
511 |       "execution_count": null,
512 |       "outputs": []
513 |     },
514 |     {
515 |       "cell_type": "code",
516 |       "source": [],
517 |       "metadata": {
518 |         "id": "QDx16J7Flgfa"
519 |       },
520 |       "execution_count": null,
521 |       "outputs": []
522 |     }
523 |   ]
524 | }


--------------------------------------------------------------------------------
/notebooks/Chapter 09 - Building Agents for Analysis Report Creation.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "provenance": []
  7 |     },
  8 |     "kernelspec": {
  9 |       "name": "python3",
 10 |       "display_name": "Python 3"
 11 |     },
 12 |     "language_info": {
 13 |       "name": "python"
 14 |     }
 15 |   },
 16 |   "cells": [
 17 |     {
 18 |       "cell_type": "markdown",
 19 |       "source": [
 20 |         "# Building Autonomous Agents to Create Analysis Reports"
 21 |       ],
 22 |       "metadata": {
 23 |         "id": "v-FzkIqP_u6u"
 24 |       }
 25 |     },
 26 |     {
 27 |       "cell_type": "code",
 28 |       "source": [
 29 |         "import os\n",
 30 |         "\n",
 31 |         "os.environ[\"OPENAI_API_KEY\"] = \"<YOUR-OPENAI-API-KEY>\"\n",
 32 |         "os.environ[\"ACTIVELOOP_TOKEN\"] = \"<YOUR-ACTIVELOOP-TOKEN>\""
 33 |       ],
 34 |       "metadata": {
 35 |         "id": "yAmImWVk_t_W"
 36 |       },
 37 |       "execution_count": null,
 38 |       "outputs": []
 39 |     },
 40 |     {
 41 |       "cell_type": "code",
 42 |       "source": [
 43 |         "# We scrape several Artificial Intelligence news\n",
 44 |         "\n",
 45 |         "import requests\n",
 46 |         "from newspaper import Article # https://github.com/codelucas/newspaper\n",
 47 |         "import time\n",
 48 |         "\n",
 49 |         "headers = {\n",
 50 |         "    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36'\n",
 51 |         "}\n",
 52 |         "\n",
 53 |         "article_urls = [\n",
 54 |         "    \"https://www.artificialintelligence-news.com/2023/05/23/meta-open-source-speech-ai-models-support-over-1100-languages/\",\n",
 55 |         "    \"https://www.artificialintelligence-news.com/2023/05/18/beijing-launches-campaign-against-ai-generated-misinformation/\"\n",
 56 |         "    \"https://www.artificialintelligence-news.com/2023/05/16/openai-ceo-ai-regulation-is-essential/\",\n",
 57 |         "    \"https://www.artificialintelligence-news.com/2023/05/15/jay-migliaccio-ibm-watson-on-leveraging-ai-to-improve-productivity/\",\n",
 58 |         "    \"https://www.artificialintelligence-news.com/2023/05/15/iurii-milovanov-softserve-how-ai-ml-is-helping-boost-innovation-and-personalisation/\",\n",
 59 |         "    \"https://www.artificialintelligence-news.com/2023/05/11/ai-and-big-data-expo-north-america-begins-in-less-than-one-week/\",\n",
 60 |         "    \"https://www.artificialintelligence-news.com/2023/05/11/eu-committees-green-light-ai-act/\",\n",
 61 |         "    \"https://www.artificialintelligence-news.com/2023/05/09/wozniak-warns-ai-will-power-next-gen-scams/\",\n",
 62 |         "    \"https://www.artificialintelligence-news.com/2023/05/09/infocepts-ceo-shashank-garg-on-the-da-market-shifts-and-impact-of-ai-on-data-analytics/\",\n",
 63 |         "    \"https://www.artificialintelligence-news.com/2023/05/02/ai-godfather-warns-dangers-and-quits-google/\",\n",
 64 |         "    \"https://www.artificialintelligence-news.com/2023/04/28/palantir-demos-how-ai-can-used-military/\",\n",
 65 |         "    \"https://www.artificialintelligence-news.com/2023/04/26/ftc-chairwoman-no-ai-exemption-to-existing-laws/\",\n",
 66 |         "    \"https://www.artificialintelligence-news.com/2023/04/24/bill-gates-ai-teaching-kids-literacy-within-18-months/\",\n",
 67 |         "    \"https://www.artificialintelligence-news.com/2023/04/21/google-creates-new-ai-division-to-challenge-openai/\"\n",
 68 |         "]\n",
 69 |         "\n",
 70 |         "session = requests.Session()\n",
 71 |         "pages_content = [] # where we save the scraped articles\n",
 72 |         "\n",
 73 |         "for url in article_urls:\n",
 74 |         "    try:\n",
 75 |         "        time.sleep(2) # sleep two seconds for gentle scraping\n",
 76 |         "        response = session.get(url, headers=headers, timeout=10)\n",
 77 |         "\n",
 78 |         "        if response.status_code == 200:\n",
 79 |         "            article = Article(url)\n",
 80 |         "            article.download() # download HTML of webpage\n",
 81 |         "            article.parse() # parse HTML to extract the article text\n",
 82 |         "            pages_content.append({ \"url\": url, \"text\": article.text })\n",
 83 |         "        else:\n",
 84 |         "            print(f\"Failed to fetch article at {url}\")\n",
 85 |         "    except Exception as e:\n",
 86 |         "        print(f\"Error occurred while fetching article at {url}: {e}\")\n",
 87 |         "\n",
 88 |         "#If an error occurs while fetching an article, we catch the exception and print\n",
 89 |         "#an error message. This ensures that even if one article fails to download,\n",
 90 |         "#the rest of the articles can still be processed."
 91 |       ],
 92 |       "metadata": {
 93 |         "id": "1GEQJGYI_uOb"
 94 |       },
 95 |       "execution_count": null,
 96 |       "outputs": []
 97 |     },
 98 |     {
 99 |       "cell_type": "code",
100 |       "source": [
101 |         "# We'll use an embedding model to compute our documents' embeddings\n",
102 |         "from langchain.embeddings.openai import OpenAIEmbeddings\n",
103 |         "\n",
104 |         "# We'll store the documents and their embeddings in the deep lake vector db\n",
105 |         "from langchain.vectorstores import DeepLake\n",
106 |         "\n",
107 |         "# Setup deep lake\n",
108 |         "embeddings = OpenAIEmbeddings(model=\"text-embedding-ada-002\")\n",
109 |         "\n",
110 |         "# create Deep Lake dataset\n",
111 |         "# TODO: use your organization id here. (by default, org id is your username)\n",
112 |         "my_activeloop_org_id = \"<YOUR-ACTIVELOOP-ORG-ID>\"\n",
113 |         "my_activeloop_dataset_name = \"langchain_course_analysis_outline\"\n",
114 |         "dataset_path = f\"hub://{my_activeloop_org_id}/{my_activeloop_dataset_name}\"\n",
115 |         "db = DeepLake(dataset_path=dataset_path, embedding_function=embeddings)"
116 |       ],
117 |       "metadata": {
118 |         "id": "TodXWoGl_uWk"
119 |       },
120 |       "execution_count": null,
121 |       "outputs": []
122 |     },
123 |     {
124 |       "cell_type": "code",
125 |       "source": [
126 |         "# We split the article texts into small chunks\n",
127 |         "\n",
128 |         "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
129 |         "\n",
130 |         "text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)\n",
131 |         "\n",
132 |         "all_texts = []\n",
133 |         "for d in pages_content:\n",
134 |         "    chunks = text_splitter.split_text(d[\"text\"])\n",
135 |         "    for chunk in chunks:\n",
136 |         "        all_texts.append(chunk)"
137 |       ],
138 |       "metadata": {
139 |         "id": "eqQJhYeO_uY8"
140 |       },
141 |       "execution_count": null,
142 |       "outputs": []
143 |     },
144 |     {
145 |       "cell_type": "code",
146 |       "source": [
147 |         "# we add all the chunks to the Deep lake\n",
148 |         "db.add_texts(all_texts)"
149 |       ],
150 |       "metadata": {
151 |         "id": "1YIGjRKU_7ZC"
152 |       },
153 |       "execution_count": null,
154 |       "outputs": []
155 |     },
156 |     {
157 |       "cell_type": "code",
158 |       "source": [
159 |         "# Get the retriever object from the deep lake db object and set the number\n",
160 |         "# of retrieved documents to 3\n",
161 |         "retriever = db.as_retriever()\n",
162 |         "retriever.search_kwargs['k'] = 3\n",
163 |         "\n",
164 |         "# We define some variables that will be used inside our custom tool\n",
165 |         "CUSTOM_TOOL_DOCS_SEPARATOR =\"\\n---------------\\n\" # how to join together the retrieved docs to form a single string\n",
166 |         "\n",
167 |         "# This is the function that defines our custom tool that retrieves relevant\n",
168 |         "# docs from Deep Lake\n",
169 |         "def retrieve_n_docs_tool(query: str) -> str:\n",
170 |         "    \"\"\"Searches for relevant documents that may contain the answer to the query.\"\"\"\n",
171 |         "    docs = retriever.get_relevant_documents(query)\n",
172 |         "    texts = [doc.page_content for doc in docs]\n",
173 |         "    texts_merged = \"---------------\\n\" + CUSTOM_TOOL_DOCS_SEPARATOR.join(texts) + \"\\n---------------\"\n",
174 |         "    return texts_merged"
175 |       ],
176 |       "metadata": {
177 |         "id": "bXPGNQ1q_8vy"
178 |       },
179 |       "execution_count": null,
180 |       "outputs": []
181 |     },
182 |     {
183 |       "cell_type": "code",
184 |       "source": [
185 |         "from langchain.agents.tools import Tool\n",
186 |         "\n",
187 |         "# We create the tool that uses the \"retrieve_n_docs_tool\" function\n",
188 |         "tools = [\n",
189 |         "    Tool(\n",
190 |         "        name=\"Search Private Docs\",\n",
191 |         "        func=retrieve_n_docs_tool,\n",
192 |         "        description=\"useful for when you need to answer questions about current events about Artificial Intelligence\"\n",
193 |         "    )\n",
194 |         "]"
195 |       ],
196 |       "metadata": {
197 |         "id": "36ZfLG8O_7dc"
198 |       },
199 |       "execution_count": null,
200 |       "outputs": []
201 |     },
202 |     {
203 |       "cell_type": "code",
204 |       "source": [
205 |         "from langchain.chat_models import ChatOpenAI\n",
206 |         "from langchain.experimental.plan_and_execute import PlanAndExecute, load_agent_executor, load_chat_planner\n",
207 |         "\n",
208 |         "# let's create the Plan and Execute agent\n",
209 |         "model = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0)\n",
210 |         "planner = load_chat_planner(model)\n",
211 |         "executor = load_agent_executor(model, tools, verbose=True)\n",
212 |         "agent = PlanAndExecute(planner=planner, executor=executor, verbose=True)"
213 |       ],
214 |       "metadata": {
215 |         "id": "8_TNPpwo_7fv"
216 |       },
217 |       "execution_count": null,
218 |       "outputs": []
219 |     },
220 |     {
221 |       "cell_type": "code",
222 |       "source": [
223 |         "# we test the agent\n",
224 |         "response = agent.run(\"Write an overview of Artificial Intelligence regulations by governments by country\")"
225 |       ],
226 |       "metadata": {
227 |         "id": "TjVjCYZJ_7iP"
228 |       },
229 |       "execution_count": null,
230 |       "outputs": []
231 |     },
232 |     {
233 |       "cell_type": "code",
234 |       "source": [
235 |         "print(response)"
236 |       ],
237 |       "metadata": {
238 |         "id": "QBZfIjTW_7px"
239 |       },
240 |       "execution_count": null,
241 |       "outputs": []
242 |     }
243 |   ]
244 | }


--------------------------------------------------------------------------------
/notebooks/Chapter 09 - LlamaIndex_RAG_AGENT.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "provenance": [],
  7 |       "authorship_tag": "ABX9TyM9sDLxksL6ksDotmbqi3Ru",
  8 |       "include_colab_link": true
  9 |     },
 10 |     "kernelspec": {
 11 |       "name": "python3",
 12 |       "display_name": "Python 3"
 13 |     },
 14 |     "language_info": {
 15 |       "name": "python"
 16 |     }
 17 |   },
 18 |   "cells": [
 19 |     {
 20 |       "cell_type": "markdown",
 21 |       "metadata": {
 22 |         "id": "view-in-github",
 23 |         "colab_type": "text"
 24 |       },
 25 |       "source": [
 26 |         "<a href=\"https://colab.research.google.com/github/towardsai/ragbook-notebooks/blob/main/notebooks/Chapter%2009%20-%20LlamaIndex_RAG_AGENT.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 27 |       ]
 28 |     },
 29 |     {
 30 |       "cell_type": "code",
 31 |       "source": [
 32 |         "!pip install -q llama-index==0.9.14.post3 deeplake==3.8.8 openai==1.3.8 cohere==4.37"
 33 |       ],
 34 |       "metadata": {
 35 |         "id": "oLsz4honE_jq"
 36 |       },
 37 |       "execution_count": null,
 38 |       "outputs": []
 39 |     },
 40 |     {
 41 |       "cell_type": "code",
 42 |       "source": [
 43 |         "import os\n",
 44 |         "\n",
 45 |         "os.environ['OPENAI_API_KEY'] = '<YOUR_OPENAI_API_KEY>'\n",
 46 |         "os.environ['ACTIVELOOP_TOKEN'] = '<YOUR_ACTIVELOOP_API_KEY>'"
 47 |       ],
 48 |       "metadata": {
 49 |         "id": "Uh_M0Z0FFJPb"
 50 |       },
 51 |       "execution_count": null,
 52 |       "outputs": []
 53 |     },
 54 |     {
 55 |       "cell_type": "markdown",
 56 |       "source": [
 57 |         "# Prepare Indexes"
 58 |       ],
 59 |       "metadata": {
 60 |         "id": "e4xCp6-mN3xw"
 61 |       }
 62 |     },
 63 |     {
 64 |       "cell_type": "code",
 65 |       "source": [
 66 |         "!mkdir -p 'data/1k/'\n",
 67 |         "!wget 'https://github.com/idontcalculate/data-repo/blob/main/machine_to_end_war.txt' -O './data/1k/tesla.txt'\n",
 68 |         "!wget 'https://github.com/idontcalculate/data-repo/blob/main/prodigal_chapter10.txt' -O './data/1k/web.txt'"
 69 |       ],
 70 |       "metadata": {
 71 |         "colab": {
 72 |           "base_uri": "https://localhost:8080/"
 73 |         },
 74 |         "id": "mE86KzdvOPgX",
 75 |         "outputId": "becf09cb-35a4-4c6c-f26a-6cdb41fbacd5"
 76 |       },
 77 |       "execution_count": null,
 78 |       "outputs": [
 79 |         {
 80 |           "output_type": "stream",
 81 |           "name": "stdout",
 82 |           "text": [
 83 |             "--2023-12-15 16:23:50--  https://github.com/idontcalculate/data-repo/blob/main/machine_to_end_war.txt\n",
 84 |             "Resolving github.com (github.com)... 140.82.113.3\n",
 85 |             "Connecting to github.com (github.com)|140.82.113.3|:443... connected.\n",
 86 |             "HTTP request sent, awaiting response... 200 OK\n",
 87 |             "Length: 18616 (18K) [text/plain]\n",
 88 |             "Saving to: ‘./data/1k/tesla.txt’\n",
 89 |             "\n",
 90 |             "./data/1k/tesla.txt 100%[===================>]  18.18K  --.-KB/s    in 0.04s   \n",
 91 |             "\n",
 92 |             "2023-12-15 16:23:50 (424 KB/s) - ‘./data/1k/tesla.txt’ saved [18616/18616]\n",
 93 |             "\n",
 94 |             "--2023-12-15 16:23:50--  https://github.com/idontcalculate/data-repo/blob/main/prodigal_chapter10.txt\n",
 95 |             "Resolving github.com (github.com)... 140.82.113.4\n",
 96 |             "Connecting to github.com (github.com)|140.82.113.4|:443... connected.\n",
 97 |             "HTTP request sent, awaiting response... 200 OK\n",
 98 |             "Length: 28197 (28K) [text/plain]\n",
 99 |             "Saving to: ‘./data/1k/web.txt’\n",
100 |             "\n",
101 |             "./data/1k/web.txt   100%[===================>]  27.54K  --.-KB/s    in 0.04s   \n",
102 |             "\n",
103 |             "2023-12-15 16:23:51 (628 KB/s) - ‘./data/1k/web.txt’ saved [28197/28197]\n",
104 |             "\n"
105 |           ]
106 |         }
107 |       ]
108 |     },
109 |     {
110 |       "cell_type": "markdown",
111 |       "source": [
112 |         "### From VectorStore"
113 |       ],
114 |       "metadata": {
115 |         "id": "xAXcBI5COHWy"
116 |       }
117 |     },
118 |     {
119 |       "cell_type": "code",
120 |       "source": [
121 |         "from llama_index import SimpleDirectoryReader\n",
122 |         "\n",
123 |         "tesla_docs = SimpleDirectoryReader( input_files=[\"/content/data/1k/tesla.txt\"] ).load_data()"
124 |       ],
125 |       "metadata": {
126 |         "id": "ufXfJwfcORAa"
127 |       },
128 |       "execution_count": null,
129 |       "outputs": []
130 |     },
131 |     {
132 |       "cell_type": "code",
133 |       "source": [
134 |         "from llama_index.vector_stores import DeepLakeVectorStore\n",
135 |         "\n",
136 |         "my_activeloop_org_id = \"genai360\"\n",
137 |         "my_activeloop_dataset_name = \"LlamaIndex_tesla_predictions\"\n",
138 |         "dataset_path = f\"hub://{my_activeloop_org_id}/{my_activeloop_dataset_name}\"\n",
139 |         "\n",
140 |         "# Create an index over the documnts\n",
141 |         "vector_store = DeepLakeVectorStore(dataset_path=dataset_path, overwrite=False)"
142 |       ],
143 |       "metadata": {
144 |         "colab": {
145 |           "base_uri": "https://localhost:8080/"
146 |         },
147 |         "id": "--NnpT4UOQbv",
148 |         "outputId": "00b40ba0-ae85-412d-c897-8d5ce317b172"
149 |       },
150 |       "execution_count": null,
151 |       "outputs": [
152 |         {
153 |           "output_type": "stream",
154 |           "name": "stderr",
155 |           "text": [
156 |             "/usr/local/lib/python3.10/dist-packages/deeplake/util/check_latest_version.py:32: UserWarning: A newer version of deeplake (3.8.12) is available. It's recommended that you update to the latest version using `pip install -U deeplake`.\n",
157 |             "  warnings.warn(\n"
158 |           ]
159 |         },
160 |         {
161 |           "output_type": "stream",
162 |           "name": "stdout",
163 |           "text": [
164 |             "Your Deep Lake dataset has been successfully created!\n"
165 |           ]
166 |         },
167 |         {
168 |           "output_type": "stream",
169 |           "name": "stderr",
170 |           "text": []
171 |         }
172 |       ]
173 |     },
174 |     {
175 |       "cell_type": "code",
176 |       "source": [
177 |         "from llama_index.storage.storage_context import StorageContext\n",
178 |         "\n",
179 |         "storage_context = StorageContext.from_defaults(vector_store=vector_store)"
180 |       ],
181 |       "metadata": {
182 |         "id": "loC5XGShPtf_"
183 |       },
184 |       "execution_count": null,
185 |       "outputs": []
186 |     },
187 |     {
188 |       "cell_type": "code",
189 |       "source": [
190 |         "from llama_index import VectorStoreIndex\n",
191 |         "\n",
192 |         "tesla_index = VectorStoreIndex.from_documents(tesla_docs, storage_context=storage_context)"
193 |       ],
194 |       "metadata": {
195 |         "colab": {
196 |           "base_uri": "https://localhost:8080/"
197 |         },
198 |         "id": "7pPld5m3Ptco",
199 |         "outputId": "8b9f21fd-0780-4d61-8a6e-97d524d68232"
200 |       },
201 |       "execution_count": null,
202 |       "outputs": [
203 |         {
204 |           "output_type": "stream",
205 |           "name": "stdout",
206 |           "text": [
207 |             "Uploading data to deeplake dataset.\n"
208 |           ]
209 |         },
210 |         {
211 |           "output_type": "stream",
212 |           "name": "stderr",
213 |           "text": [
214 |             "100%|██████████| 5/5 [00:00<00:00,  7.17it/s]\n",
215 |             "/"
216 |           ]
217 |         },
218 |         {
219 |           "output_type": "stream",
220 |           "name": "stdout",
221 |           "text": [
222 |             "Dataset(path='hub://genai360/LlamaIndex_tesla_predictions', tensors=['text', 'metadata', 'embedding', 'id'])\n",
223 |             "\n",
224 |             "  tensor      htype      shape     dtype  compression\n",
225 |             "  -------    -------    -------   -------  ------- \n",
226 |             "   text       text      (5, 1)      str     None   \n",
227 |             " metadata     json      (5, 1)      str     None   \n",
228 |             " embedding  embedding  (5, 1536)  float32   None   \n",
229 |             "    id        text      (5, 1)      str     None   \n"
230 |           ]
231 |         },
232 |         {
233 |           "output_type": "stream",
234 |           "name": "stderr",
235 |           "text": [
236 |             "\r \r"
237 |           ]
238 |         }
239 |       ]
240 |     },
241 |     {
242 |       "cell_type": "markdown",
243 |       "source": [
244 |         "## From Local Index"
245 |       ],
246 |       "metadata": {
247 |         "id": "GIQLCDvuQPPU"
248 |       }
249 |     },
250 |     {
251 |       "cell_type": "code",
252 |       "source": [
253 |         "webtext_docs = SimpleDirectoryReader( input_files=[\"/content/data/1k/web.txt\"] ).load_data()"
254 |       ],
255 |       "metadata": {
256 |         "id": "7lPWnGE_PtaR"
257 |       },
258 |       "execution_count": null,
259 |       "outputs": []
260 |     },
261 |     {
262 |       "cell_type": "code",
263 |       "source": [
264 |         "try:\n",
265 |         "  # Try to load the index if it is already calculated\n",
266 |         "  storage_context = StorageContext.from_defaults( persist_dir=\"/content/storage/webtext\" )\n",
267 |         "  webtext_index = load_index_from_storage(storage_context)\n",
268 |         "  print(\"Loaded the pre-computed index.\")\n",
269 |         "except:\n",
270 |         "  # Otherwise, generate the indexes\n",
271 |         "  webtext_index = VectorStoreIndex.from_documents(webtext_docs)\n",
272 |         "  webtext_index.storage_context.persist(persist_dir=\"/content/storage/webtext\")\n",
273 |         "  print(\"Generated the index.\")"
274 |       ],
275 |       "metadata": {
276 |         "colab": {
277 |           "base_uri": "https://localhost:8080/"
278 |         },
279 |         "id": "P1ieq9i8QUa1",
280 |         "outputId": "f9da5d43-fbea-494c-9751-21134dc4156c"
281 |       },
282 |       "execution_count": null,
283 |       "outputs": [
284 |         {
285 |           "output_type": "stream",
286 |           "name": "stdout",
287 |           "text": [
288 |             "Generated the index.\n"
289 |           ]
290 |         }
291 |       ]
292 |     },
293 |     {
294 |       "cell_type": "markdown",
295 |       "source": [
296 |         "# Create Query Enginges"
297 |       ],
298 |       "metadata": {
299 |         "id": "dF3KH4bHRU9L"
300 |       }
301 |     },
302 |     {
303 |       "cell_type": "code",
304 |       "source": [
305 |         "tesla_engine = tesla_index.as_query_engine(similarity_top_k=3)\n",
306 |         "webtext_engine = webtext_index.as_query_engine(similarity_top_k=3)"
307 |       ],
308 |       "metadata": {
309 |         "id": "MR4BXAVqQnV8"
310 |       },
311 |       "execution_count": null,
312 |       "outputs": []
313 |     },
314 |     {
315 |       "cell_type": "markdown",
316 |       "source": [
317 |         "# Create the Tools"
318 |       ],
319 |       "metadata": {
320 |         "id": "YFbYTky1Rgu0"
321 |       }
322 |     },
323 |     {
324 |       "cell_type": "code",
325 |       "source": [
326 |         "from llama_index.tools import QueryEngineTool, ToolMetadata\n",
327 |         "\n",
328 |         "query_engine_tools = [\n",
329 |         "    QueryEngineTool(\n",
330 |         "        query_engine=tesla_engine,\n",
331 |         "        metadata=ToolMetadata(\n",
332 |         "            name=\"tesla_1k\",\n",
333 |         "            description=(\n",
334 |         "                \"Provides information about Tesla's statements that refers to future times and predictions. \"\n",
335 |         "                \"Use a detailed plain text question as input to the tool.\"\n",
336 |         "            ),\n",
337 |         "        ),\n",
338 |         "    ),\n",
339 |         "    QueryEngineTool(\n",
340 |         "        query_engine=webtext_engine,\n",
341 |         "        metadata=ToolMetadata(\n",
342 |         "            name=\"webtext_1k\",\n",
343 |         "            description=(\n",
344 |         "                \"Provides information about tesla's life and biographical data. \"\n",
345 |         "                \"Use a detailed plain text question as input to the tool.\"\n",
346 |         "            ),\n",
347 |         "        ),\n",
348 |         "    ),\n",
349 |         "]"
350 |       ],
351 |       "metadata": {
352 |         "id": "vMptZI0uPtVU"
353 |       },
354 |       "execution_count": null,
355 |       "outputs": []
356 |     },
357 |     {
358 |       "cell_type": "markdown",
359 |       "source": [
360 |         "# Define the Agent"
361 |       ],
362 |       "metadata": {
363 |         "id": "0CU3MaJ4Rt4R"
364 |       }
365 |     },
366 |     {
367 |       "cell_type": "code",
368 |       "source": [
369 |         "from llama_index.agent import OpenAIAgent\n",
370 |         "agent = OpenAIAgent.from_tools(query_engine_tools, verbose=True)"
371 |       ],
372 |       "metadata": {
373 |         "id": "RdBtZi50PtSe"
374 |       },
375 |       "execution_count": null,
376 |       "outputs": []
377 |     },
378 |     {
379 |       "cell_type": "code",
380 |       "source": [
381 |         "agent.chat_repl()"
382 |       ],
383 |       "metadata": {
384 |         "colab": {
385 |           "base_uri": "https://localhost:8080/"
386 |         },
387 |         "id": "M-0kHgKCRiq0",
388 |         "outputId": "8ab2c744-e818-46ec-be40-da17fb1d11f0"
389 |       },
390 |       "execution_count": null,
391 |       "outputs": [
392 |         {
393 |           "name": "stdout",
394 |           "output_type": "stream",
395 |           "text": [
396 |             "===== Entering Chat REPL =====\n",
397 |             "Type \"exit\" to exit.\n",
398 |             "\n",
399 |             "Human: What influenced Nikola Tesla to become an inventor?\n",
400 |             "STARTING TURN 1\n",
401 |             "---------------\n",
402 |             "\n",
403 |             "=== Calling Function ===\n",
404 |             "Calling function: webtext_1k with args: {\n",
405 |             "\"input\": \"What influenced Nikola Tesla to become an inventor?\"\n",
406 |             "}\n",
407 |             "Got output: Nikola Tesla was influenced to become an inventor by his studies of mechanical vibrations. He observed the selective response of objects to vibrations and realized the potential for producing effects of tremendous magnitude on physical objects. This led him to pursue research in the field of high-frequency and high-potential currents, which eventually resulted in his groundbreaking inventions.\n",
408 |             "========================\n",
409 |             "\n",
410 |             "STARTING TURN 2\n",
411 |             "---------------\n",
412 |             "\n",
413 |             "Assistant: Nikola Tesla was influenced to become an inventor by his studies of mechanical vibrations. He observed the selective response of objects to vibrations and realized the potential for producing effects of tremendous magnitude on physical objects. This led him to pursue research in the field of high-frequency and high-potential currents, which eventually resulted in his groundbreaking inventions.\n",
414 |             "\n",
415 |             "Human: exit\n"
416 |           ]
417 |         }
418 |       ]
419 |     },
420 |     {
421 |       "cell_type": "markdown",
422 |       "source": [
423 |         "# Agents with Tools"
424 |       ],
425 |       "metadata": {
426 |         "id": "AGakE3sO1F_5"
427 |       }
428 |     },
429 |     {
430 |       "cell_type": "code",
431 |       "source": [
432 |         "from llama_index.tools import FunctionTool\n",
433 |         "\n",
434 |         "def multiply(a: int, b: int) -> int:\n",
435 |         "    \"\"\"Multiply two integers and returns the result integer\"\"\"\n",
436 |         "    return a * b\n",
437 |         "\n",
438 |         "\n",
439 |         "def add(a: int, b: int) -> int:\n",
440 |         "    \"\"\"Add two integers and returns the result integer\"\"\"\n",
441 |         "    return a + b\n",
442 |         "\n",
443 |         "\n",
444 |         "multiply_tool = FunctionTool.from_defaults(fn=multiply, name=\"multiply\")\n",
445 |         "add_tool = FunctionTool.from_defaults(fn=add, name=\"add\")\n",
446 |         "\n",
447 |         "all_tools = [multiply_tool, add_tool]"
448 |       ],
449 |       "metadata": {
450 |         "id": "xwTwYSSjRihM"
451 |       },
452 |       "execution_count": null,
453 |       "outputs": []
454 |     },
455 |     {
456 |       "cell_type": "code",
457 |       "source": [
458 |         "from llama_index import VectorStoreIndex\n",
459 |         "from llama_index.objects import ObjectIndex, SimpleToolNodeMapping\n",
460 |         "\n",
461 |         "tool_mapping = SimpleToolNodeMapping.from_objects(all_tools)\n",
462 |         "obj_index = ObjectIndex.from_objects(\n",
463 |         "    all_tools,\n",
464 |         "    tool_mapping,\n",
465 |         "    VectorStoreIndex,\n",
466 |         ")"
467 |       ],
468 |       "metadata": {
469 |         "id": "gRNaPGDyRieS"
470 |       },
471 |       "execution_count": null,
472 |       "outputs": []
473 |     },
474 |     {
475 |       "cell_type": "code",
476 |       "source": [
477 |         "from llama_index.agent import FnRetrieverOpenAIAgent\n",
478 |         "\n",
479 |         "agent = FnRetrieverOpenAIAgent.from_retriever(\n",
480 |         "    obj_index.as_retriever(), verbose=True\n",
481 |         ")"
482 |       ],
483 |       "metadata": {
484 |         "id": "GNHxglUDKJ5L"
485 |       },
486 |       "execution_count": null,
487 |       "outputs": []
488 |     },
489 |     {
490 |       "cell_type": "code",
491 |       "source": [
492 |         "agent.chat(\"What's 12 multiplied by 22? Make sure to use Tools\")"
493 |       ],
494 |       "metadata": {
495 |         "colab": {
496 |           "base_uri": "https://localhost:8080/"
497 |         },
498 |         "id": "uSCd47OB2ZIN",
499 |         "outputId": "665317f3-6710-4a72-f940-96fd50800ef0"
500 |       },
501 |       "execution_count": null,
502 |       "outputs": [
503 |         {
504 |           "output_type": "stream",
505 |           "name": "stdout",
506 |           "text": [
507 |             "STARTING TURN 1\n",
508 |             "---------------\n",
509 |             "\n",
510 |             "=== Calling Function ===\n",
511 |             "Calling function: multiply with args: {\n",
512 |             "  \"a\": 12,\n",
513 |             "  \"b\": 22\n",
514 |             "}\n",
515 |             "Got output: 264\n",
516 |             "========================\n",
517 |             "\n",
518 |             "STARTING TURN 2\n",
519 |             "---------------\n",
520 |             "\n"
521 |           ]
522 |         },
523 |         {
524 |           "output_type": "execute_result",
525 |           "data": {
526 |             "text/plain": [
527 |               "AgentChatResponse(response='12 multiplied by 22 is 264.', sources=[ToolOutput(content='264', tool_name='multiply', raw_input={'args': (), 'kwargs': {'a': 12, 'b': 22}}, raw_output=264)], source_nodes=[])"
528 |             ]
529 |           },
530 |           "metadata": {},
531 |           "execution_count": 32
532 |         }
533 |       ]
534 |     },
535 |     {
536 |       "cell_type": "code",
537 |       "source": [
538 |         "agent.chat( \"What is 5 + 2?\", tool_choice=\"add\" )"
539 |       ],
540 |       "metadata": {
541 |         "colab": {
542 |           "base_uri": "https://localhost:8080/"
543 |         },
544 |         "id": "btL4lvUy2ZBK",
545 |         "outputId": "6b36a503-321e-489f-8101-e20890367ddb"
546 |       },
547 |       "execution_count": null,
548 |       "outputs": [
549 |         {
550 |           "output_type": "stream",
551 |           "name": "stdout",
552 |           "text": [
553 |             "STARTING TURN 1\n",
554 |             "---------------\n",
555 |             "\n",
556 |             "=== Calling Function ===\n",
557 |             "Calling function: add with args: {\n",
558 |             "  \"a\": 5,\n",
559 |             "  \"b\": 2\n",
560 |             "}\n",
561 |             "Got output: 7\n",
562 |             "========================\n",
563 |             "\n",
564 |             "STARTING TURN 2\n",
565 |             "---------------\n",
566 |             "\n"
567 |           ]
568 |         },
569 |         {
570 |           "output_type": "execute_result",
571 |           "data": {
572 |             "text/plain": [
573 |               "AgentChatResponse(response='5 + 2 is equal to 7.', sources=[ToolOutput(content='7', tool_name='add', raw_input={'args': (), 'kwargs': {'a': 5, 'b': 2}}, raw_output=7)], source_nodes=[])"
574 |             ]
575 |           },
576 |           "metadata": {},
577 |           "execution_count": 35
578 |         }
579 |       ]
580 |     },
581 |     {
582 |       "cell_type": "code",
583 |       "source": [],
584 |       "metadata": {
585 |         "id": "GHfXq66zEZ7B"
586 |       },
587 |       "execution_count": null,
588 |       "outputs": []
589 |     }
590 |   ]
591 | }


--------------------------------------------------------------------------------
/notebooks/Chapter 09 - Query and Zummarize a DB with LlamaIndex.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "provenance": []
  7 |     },
  8 |     "kernelspec": {
  9 |       "name": "python3",
 10 |       "display_name": "Python 3"
 11 |     },
 12 |     "language_info": {
 13 |       "name": "python"
 14 |     }
 15 |   },
 16 |   "cells": [
 17 |     {
 18 |       "cell_type": "code",
 19 |       "source": [
 20 |         "!pip install -q llama-index==0.9.14.post3 deeplake==3.8.8 openai==1.3.8 cohere==4.37"
 21 |       ],
 22 |       "metadata": {
 23 |         "id": "oLsz4honE_jq"
 24 |       },
 25 |       "execution_count": null,
 26 |       "outputs": []
 27 |     },
 28 |     {
 29 |       "cell_type": "code",
 30 |       "source": [
 31 |         "import os\n",
 32 |         "\n",
 33 |         "os.environ['OPENAI_API_KEY'] = '<YOUR_OPENAI_API_KEY>'\n",
 34 |         "os.environ['ACTIVELOOP_TOKEN'] = '<YOUR_ACTIVELOOP_API_KEY>'"
 35 |       ],
 36 |       "metadata": {
 37 |         "id": "Uh_M0Z0FFJPb"
 38 |       },
 39 |       "execution_count": null,
 40 |       "outputs": []
 41 |     },
 42 |     {
 43 |       "cell_type": "markdown",
 44 |       "source": [
 45 |         "# Prepare Indexes"
 46 |       ],
 47 |       "metadata": {
 48 |         "id": "e4xCp6-mN3xw"
 49 |       }
 50 |     },
 51 |     {
 52 |       "cell_type": "code",
 53 |       "source": [
 54 |         "!mkdir -p 'data/1k/'\n",
 55 |         "!wget 'https://github.com/idontcalculate/data-repo/blob/main/machine_to_end_war.txt' -O './data/1k/tesla.txt'\n",
 56 |         "!wget 'https://github.com/idontcalculate/data-repo/blob/main/prodigal_chapter10.txt' -O './data/1k/web.txt'"
 57 |       ],
 58 |       "metadata": {
 59 |         "colab": {
 60 |           "base_uri": "https://localhost:8080/"
 61 |         },
 62 |         "id": "mE86KzdvOPgX",
 63 |         "outputId": "becf09cb-35a4-4c6c-f26a-6cdb41fbacd5"
 64 |       },
 65 |       "execution_count": null,
 66 |       "outputs": [
 67 |         {
 68 |           "output_type": "stream",
 69 |           "name": "stdout",
 70 |           "text": [
 71 |             "--2023-12-15 16:23:50--  https://github.com/idontcalculate/data-repo/blob/main/machine_to_end_war.txt\n",
 72 |             "Resolving github.com (github.com)... 140.82.113.3\n",
 73 |             "Connecting to github.com (github.com)|140.82.113.3|:443... connected.\n",
 74 |             "HTTP request sent, awaiting response... 200 OK\n",
 75 |             "Length: 18616 (18K) [text/plain]\n",
 76 |             "Saving to: ‘./data/1k/tesla.txt’\n",
 77 |             "\n",
 78 |             "./data/1k/tesla.txt 100%[===================>]  18.18K  --.-KB/s    in 0.04s   \n",
 79 |             "\n",
 80 |             "2023-12-15 16:23:50 (424 KB/s) - ‘./data/1k/tesla.txt’ saved [18616/18616]\n",
 81 |             "\n",
 82 |             "--2023-12-15 16:23:50--  https://github.com/idontcalculate/data-repo/blob/main/prodigal_chapter10.txt\n",
 83 |             "Resolving github.com (github.com)... 140.82.113.4\n",
 84 |             "Connecting to github.com (github.com)|140.82.113.4|:443... connected.\n",
 85 |             "HTTP request sent, awaiting response... 200 OK\n",
 86 |             "Length: 28197 (28K) [text/plain]\n",
 87 |             "Saving to: ‘./data/1k/web.txt’\n",
 88 |             "\n",
 89 |             "./data/1k/web.txt   100%[===================>]  27.54K  --.-KB/s    in 0.04s   \n",
 90 |             "\n",
 91 |             "2023-12-15 16:23:51 (628 KB/s) - ‘./data/1k/web.txt’ saved [28197/28197]\n",
 92 |             "\n"
 93 |           ]
 94 |         }
 95 |       ]
 96 |     },
 97 |     {
 98 |       "cell_type": "markdown",
 99 |       "source": [
100 |         "### From VectorStore"
101 |       ],
102 |       "metadata": {
103 |         "id": "xAXcBI5COHWy"
104 |       }
105 |     },
106 |     {
107 |       "cell_type": "code",
108 |       "source": [
109 |         "from llama_index import SimpleDirectoryReader\n",
110 |         "\n",
111 |         "tesla_docs = SimpleDirectoryReader( input_files=[\"/content/data/1k/tesla.txt\"] ).load_data()"
112 |       ],
113 |       "metadata": {
114 |         "id": "ufXfJwfcORAa"
115 |       },
116 |       "execution_count": null,
117 |       "outputs": []
118 |     },
119 |     {
120 |       "cell_type": "code",
121 |       "source": [
122 |         "from llama_index.vector_stores import DeepLakeVectorStore\n",
123 |         "\n",
124 |         "my_activeloop_org_id = \"genai360\"\n",
125 |         "my_activeloop_dataset_name = \"LlamaIndex_tesla_predictions\"\n",
126 |         "dataset_path = f\"hub://{my_activeloop_org_id}/{my_activeloop_dataset_name}\"\n",
127 |         "\n",
128 |         "# Create an index over the documnts\n",
129 |         "vector_store = DeepLakeVectorStore(dataset_path=dataset_path, overwrite=False)"
130 |       ],
131 |       "metadata": {
132 |         "colab": {
133 |           "base_uri": "https://localhost:8080/"
134 |         },
135 |         "id": "--NnpT4UOQbv",
136 |         "outputId": "00b40ba0-ae85-412d-c897-8d5ce317b172"
137 |       },
138 |       "execution_count": null,
139 |       "outputs": [
140 |         {
141 |           "output_type": "stream",
142 |           "name": "stderr",
143 |           "text": [
144 |             "/usr/local/lib/python3.10/dist-packages/deeplake/util/check_latest_version.py:32: UserWarning: A newer version of deeplake (3.8.12) is available. It's recommended that you update to the latest version using `pip install -U deeplake`.\n",
145 |             "  warnings.warn(\n"
146 |           ]
147 |         },
148 |         {
149 |           "output_type": "stream",
150 |           "name": "stdout",
151 |           "text": [
152 |             "Your Deep Lake dataset has been successfully created!\n"
153 |           ]
154 |         },
155 |         {
156 |           "output_type": "stream",
157 |           "name": "stderr",
158 |           "text": []
159 |         }
160 |       ]
161 |     },
162 |     {
163 |       "cell_type": "code",
164 |       "source": [
165 |         "from llama_index.storage.storage_context import StorageContext\n",
166 |         "\n",
167 |         "storage_context = StorageContext.from_defaults(vector_store=vector_store)"
168 |       ],
169 |       "metadata": {
170 |         "id": "loC5XGShPtf_"
171 |       },
172 |       "execution_count": null,
173 |       "outputs": []
174 |     },
175 |     {
176 |       "cell_type": "code",
177 |       "source": [
178 |         "from llama_index import VectorStoreIndex\n",
179 |         "\n",
180 |         "tesla_index = VectorStoreIndex.from_documents(tesla_docs, storage_context=storage_context)"
181 |       ],
182 |       "metadata": {
183 |         "colab": {
184 |           "base_uri": "https://localhost:8080/"
185 |         },
186 |         "id": "7pPld5m3Ptco",
187 |         "outputId": "8b9f21fd-0780-4d61-8a6e-97d524d68232"
188 |       },
189 |       "execution_count": null,
190 |       "outputs": [
191 |         {
192 |           "output_type": "stream",
193 |           "name": "stdout",
194 |           "text": [
195 |             "Uploading data to deeplake dataset.\n"
196 |           ]
197 |         },
198 |         {
199 |           "output_type": "stream",
200 |           "name": "stderr",
201 |           "text": [
202 |             "100%|██████████| 5/5 [00:00<00:00,  7.17it/s]\n",
203 |             "/"
204 |           ]
205 |         },
206 |         {
207 |           "output_type": "stream",
208 |           "name": "stdout",
209 |           "text": [
210 |             "Dataset(path='hub://genai360/LlamaIndex_tesla_predictions', tensors=['text', 'metadata', 'embedding', 'id'])\n",
211 |             "\n",
212 |             "  tensor      htype      shape     dtype  compression\n",
213 |             "  -------    -------    -------   -------  ------- \n",
214 |             "   text       text      (5, 1)      str     None   \n",
215 |             " metadata     json      (5, 1)      str     None   \n",
216 |             " embedding  embedding  (5, 1536)  float32   None   \n",
217 |             "    id        text      (5, 1)      str     None   \n"
218 |           ]
219 |         },
220 |         {
221 |           "output_type": "stream",
222 |           "name": "stderr",
223 |           "text": [
224 |             "\r \r"
225 |           ]
226 |         }
227 |       ]
228 |     },
229 |     {
230 |       "cell_type": "markdown",
231 |       "source": [
232 |         "## From Local Index"
233 |       ],
234 |       "metadata": {
235 |         "id": "GIQLCDvuQPPU"
236 |       }
237 |     },
238 |     {
239 |       "cell_type": "code",
240 |       "source": [
241 |         "webtext_docs = SimpleDirectoryReader( input_files=[\"/content/data/1k/web.txt\"] ).load_data()"
242 |       ],
243 |       "metadata": {
244 |         "id": "7lPWnGE_PtaR"
245 |       },
246 |       "execution_count": null,
247 |       "outputs": []
248 |     },
249 |     {
250 |       "cell_type": "code",
251 |       "source": [
252 |         "try:\n",
253 |         "  # Try to load the index if it is already calculated\n",
254 |         "  storage_context = StorageContext.from_defaults( persist_dir=\"/content/storage/webtext\" )\n",
255 |         "  webtext_index = load_index_from_storage(storage_context)\n",
256 |         "  print(\"Loaded the pre-computed index.\")\n",
257 |         "except:\n",
258 |         "  # Otherwise, generate the indexes\n",
259 |         "  webtext_index = VectorStoreIndex.from_documents(webtext_docs)\n",
260 |         "  webtext_index.storage_context.persist(persist_dir=\"/content/storage/webtext\")\n",
261 |         "  print(\"Generated the index.\")"
262 |       ],
263 |       "metadata": {
264 |         "colab": {
265 |           "base_uri": "https://localhost:8080/"
266 |         },
267 |         "id": "P1ieq9i8QUa1",
268 |         "outputId": "f9da5d43-fbea-494c-9751-21134dc4156c"
269 |       },
270 |       "execution_count": null,
271 |       "outputs": [
272 |         {
273 |           "output_type": "stream",
274 |           "name": "stdout",
275 |           "text": [
276 |             "Generated the index.\n"
277 |           ]
278 |         }
279 |       ]
280 |     },
281 |     {
282 |       "cell_type": "markdown",
283 |       "source": [
284 |         "# Create Query Enginges"
285 |       ],
286 |       "metadata": {
287 |         "id": "dF3KH4bHRU9L"
288 |       }
289 |     },
290 |     {
291 |       "cell_type": "code",
292 |       "source": [
293 |         "tesla_engine = tesla_index.as_query_engine(similarity_top_k=3)\n",
294 |         "webtext_engine = webtext_index.as_query_engine(similarity_top_k=3)"
295 |       ],
296 |       "metadata": {
297 |         "id": "MR4BXAVqQnV8"
298 |       },
299 |       "execution_count": null,
300 |       "outputs": []
301 |     },
302 |     {
303 |       "cell_type": "markdown",
304 |       "source": [
305 |         "# Create the Tools"
306 |       ],
307 |       "metadata": {
308 |         "id": "YFbYTky1Rgu0"
309 |       }
310 |     },
311 |     {
312 |       "cell_type": "code",
313 |       "source": [
314 |         "from llama_index.tools import QueryEngineTool, ToolMetadata\n",
315 |         "\n",
316 |         "query_engine_tools = [\n",
317 |         "    QueryEngineTool(\n",
318 |         "        query_engine=tesla_engine,\n",
319 |         "        metadata=ToolMetadata(\n",
320 |         "            name=\"tesla_1k\",\n",
321 |         "            description=(\n",
322 |         "                \"Provides information about Tesla's statements that refers to future times and predictions. \"\n",
323 |         "                \"Use a detailed plain text question as input to the tool.\"\n",
324 |         "            ),\n",
325 |         "        ),\n",
326 |         "    ),\n",
327 |         "    QueryEngineTool(\n",
328 |         "        query_engine=webtext_engine,\n",
329 |         "        metadata=ToolMetadata(\n",
330 |         "            name=\"webtext_1k\",\n",
331 |         "            description=(\n",
332 |         "                \"Provides information about tesla's life and biographical data. \"\n",
333 |         "                \"Use a detailed plain text question as input to the tool.\"\n",
334 |         "            ),\n",
335 |         "        ),\n",
336 |         "    ),\n",
337 |         "]"
338 |       ],
339 |       "metadata": {
340 |         "id": "vMptZI0uPtVU"
341 |       },
342 |       "execution_count": null,
343 |       "outputs": []
344 |     },
345 |     {
346 |       "cell_type": "markdown",
347 |       "source": [
348 |         "# Define the Agent"
349 |       ],
350 |       "metadata": {
351 |         "id": "0CU3MaJ4Rt4R"
352 |       }
353 |     },
354 |     {
355 |       "cell_type": "code",
356 |       "source": [
357 |         "from llama_index.agent import OpenAIAgent\n",
358 |         "agent = OpenAIAgent.from_tools(query_engine_tools, verbose=True)"
359 |       ],
360 |       "metadata": {
361 |         "id": "RdBtZi50PtSe"
362 |       },
363 |       "execution_count": null,
364 |       "outputs": []
365 |     },
366 |     {
367 |       "cell_type": "code",
368 |       "source": [
369 |         "agent.chat_repl()"
370 |       ],
371 |       "metadata": {
372 |         "colab": {
373 |           "base_uri": "https://localhost:8080/"
374 |         },
375 |         "id": "M-0kHgKCRiq0",
376 |         "outputId": "8ab2c744-e818-46ec-be40-da17fb1d11f0"
377 |       },
378 |       "execution_count": null,
379 |       "outputs": [
380 |         {
381 |           "name": "stdout",
382 |           "output_type": "stream",
383 |           "text": [
384 |             "===== Entering Chat REPL =====\n",
385 |             "Type \"exit\" to exit.\n",
386 |             "\n",
387 |             "Human: What influenced Nikola Tesla to become an inventor?\n",
388 |             "STARTING TURN 1\n",
389 |             "---------------\n",
390 |             "\n",
391 |             "=== Calling Function ===\n",
392 |             "Calling function: webtext_1k with args: {\n",
393 |             "\"input\": \"What influenced Nikola Tesla to become an inventor?\"\n",
394 |             "}\n",
395 |             "Got output: Nikola Tesla was influenced to become an inventor by his studies of mechanical vibrations. He observed the selective response of objects to vibrations and realized the potential for producing effects of tremendous magnitude on physical objects. This led him to pursue research in the field of high-frequency and high-potential currents, which eventually resulted in his groundbreaking inventions.\n",
396 |             "========================\n",
397 |             "\n",
398 |             "STARTING TURN 2\n",
399 |             "---------------\n",
400 |             "\n",
401 |             "Assistant: Nikola Tesla was influenced to become an inventor by his studies of mechanical vibrations. He observed the selective response of objects to vibrations and realized the potential for producing effects of tremendous magnitude on physical objects. This led him to pursue research in the field of high-frequency and high-potential currents, which eventually resulted in his groundbreaking inventions.\n",
402 |             "\n",
403 |             "Human: exit\n"
404 |           ]
405 |         }
406 |       ]
407 |     },
408 |     {
409 |       "cell_type": "markdown",
410 |       "source": [
411 |         "# Agents with Tools"
412 |       ],
413 |       "metadata": {
414 |         "id": "AGakE3sO1F_5"
415 |       }
416 |     },
417 |     {
418 |       "cell_type": "code",
419 |       "source": [
420 |         "from llama_index.tools import FunctionTool\n",
421 |         "\n",
422 |         "def multiply(a: int, b: int) -> int:\n",
423 |         "    \"\"\"Multiply two integers and returns the result integer\"\"\"\n",
424 |         "    return a * b\n",
425 |         "\n",
426 |         "\n",
427 |         "def add(a: int, b: int) -> int:\n",
428 |         "    \"\"\"Add two integers and returns the result integer\"\"\"\n",
429 |         "    return a + b\n",
430 |         "\n",
431 |         "\n",
432 |         "multiply_tool = FunctionTool.from_defaults(fn=multiply, name=\"multiply\")\n",
433 |         "add_tool = FunctionTool.from_defaults(fn=add, name=\"add\")\n",
434 |         "\n",
435 |         "all_tools = [multiply_tool, add_tool]"
436 |       ],
437 |       "metadata": {
438 |         "id": "xwTwYSSjRihM"
439 |       },
440 |       "execution_count": null,
441 |       "outputs": []
442 |     },
443 |     {
444 |       "cell_type": "code",
445 |       "source": [
446 |         "from llama_index import VectorStoreIndex\n",
447 |         "from llama_index.objects import ObjectIndex, SimpleToolNodeMapping\n",
448 |         "\n",
449 |         "tool_mapping = SimpleToolNodeMapping.from_objects(all_tools)\n",
450 |         "obj_index = ObjectIndex.from_objects(\n",
451 |         "    all_tools,\n",
452 |         "    tool_mapping,\n",
453 |         "    VectorStoreIndex,\n",
454 |         ")"
455 |       ],
456 |       "metadata": {
457 |         "id": "gRNaPGDyRieS"
458 |       },
459 |       "execution_count": null,
460 |       "outputs": []
461 |     },
462 |     {
463 |       "cell_type": "code",
464 |       "source": [
465 |         "from llama_index.agent import FnRetrieverOpenAIAgent\n",
466 |         "\n",
467 |         "agent = FnRetrieverOpenAIAgent.from_retriever(\n",
468 |         "    obj_index.as_retriever(), verbose=True\n",
469 |         ")"
470 |       ],
471 |       "metadata": {
472 |         "id": "GNHxglUDKJ5L"
473 |       },
474 |       "execution_count": null,
475 |       "outputs": []
476 |     },
477 |     {
478 |       "cell_type": "code",
479 |       "source": [
480 |         "agent.chat(\"What's 12 multiplied by 22? Make sure to use Tools\")"
481 |       ],
482 |       "metadata": {
483 |         "colab": {
484 |           "base_uri": "https://localhost:8080/"
485 |         },
486 |         "id": "uSCd47OB2ZIN",
487 |         "outputId": "665317f3-6710-4a72-f940-96fd50800ef0"
488 |       },
489 |       "execution_count": null,
490 |       "outputs": [
491 |         {
492 |           "output_type": "stream",
493 |           "name": "stdout",
494 |           "text": [
495 |             "STARTING TURN 1\n",
496 |             "---------------\n",
497 |             "\n",
498 |             "=== Calling Function ===\n",
499 |             "Calling function: multiply with args: {\n",
500 |             "  \"a\": 12,\n",
501 |             "  \"b\": 22\n",
502 |             "}\n",
503 |             "Got output: 264\n",
504 |             "========================\n",
505 |             "\n",
506 |             "STARTING TURN 2\n",
507 |             "---------------\n",
508 |             "\n"
509 |           ]
510 |         },
511 |         {
512 |           "output_type": "execute_result",
513 |           "data": {
514 |             "text/plain": [
515 |               "AgentChatResponse(response='12 multiplied by 22 is 264.', sources=[ToolOutput(content='264', tool_name='multiply', raw_input={'args': (), 'kwargs': {'a': 12, 'b': 22}}, raw_output=264)], source_nodes=[])"
516 |             ]
517 |           },
518 |           "metadata": {},
519 |           "execution_count": 32
520 |         }
521 |       ]
522 |     },
523 |     {
524 |       "cell_type": "code",
525 |       "source": [
526 |         "agent.chat( \"What is 5 + 2?\", tool_choice=\"add\" )"
527 |       ],
528 |       "metadata": {
529 |         "colab": {
530 |           "base_uri": "https://localhost:8080/"
531 |         },
532 |         "id": "btL4lvUy2ZBK",
533 |         "outputId": "6b36a503-321e-489f-8101-e20890367ddb"
534 |       },
535 |       "execution_count": null,
536 |       "outputs": [
537 |         {
538 |           "output_type": "stream",
539 |           "name": "stdout",
540 |           "text": [
541 |             "STARTING TURN 1\n",
542 |             "---------------\n",
543 |             "\n",
544 |             "=== Calling Function ===\n",
545 |             "Calling function: add with args: {\n",
546 |             "  \"a\": 5,\n",
547 |             "  \"b\": 2\n",
548 |             "}\n",
549 |             "Got output: 7\n",
550 |             "========================\n",
551 |             "\n",
552 |             "STARTING TURN 2\n",
553 |             "---------------\n",
554 |             "\n"
555 |           ]
556 |         },
557 |         {
558 |           "output_type": "execute_result",
559 |           "data": {
560 |             "text/plain": [
561 |               "AgentChatResponse(response='5 + 2 is equal to 7.', sources=[ToolOutput(content='7', tool_name='add', raw_input={'args': (), 'kwargs': {'a': 5, 'b': 2}}, raw_output=7)], source_nodes=[])"
562 |             ]
563 |           },
564 |           "metadata": {},
565 |           "execution_count": 35
566 |         }
567 |       ]
568 |     },
569 |     {
570 |       "cell_type": "code",
571 |       "source": [],
572 |       "metadata": {
573 |         "id": "GHfXq66zEZ7B"
574 |       },
575 |       "execution_count": null,
576 |       "outputs": []
577 |     }
578 |   ]
579 | }


--------------------------------------------------------------------------------
/notebooks/Chapter 10 - Create_Dataset_For_Cohere_Fine_Tuning.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "provenance": [],
  7 |       "authorship_tag": "ABX9TyOPHu7S6DtYH8pxdgVFtm+3",
  8 |       "include_colab_link": true
  9 |     },
 10 |     "kernelspec": {
 11 |       "name": "python3",
 12 |       "display_name": "Python 3"
 13 |     },
 14 |     "language_info": {
 15 |       "name": "python"
 16 |     }
 17 |   },
 18 |   "cells": [
 19 |     {
 20 |       "cell_type": "markdown",
 21 |       "metadata": {
 22 |         "id": "view-in-github",
 23 |         "colab_type": "text"
 24 |       },
 25 |       "source": [
 26 |         "<a href=\"https://colab.research.google.com/github/towardsai/ragbook-notebooks/blob/main/notebooks/Chapter%2010%20-%20Create_Dataset_For_Cohere_Fine_Tuning.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 27 |       ]
 28 |     },
 29 |     {
 30 |       "cell_type": "code",
 31 |       "source": [
 32 |         "import json"
 33 |       ],
 34 |       "metadata": {
 35 |         "id": "ttUWh0ojOHnw"
 36 |       },
 37 |       "execution_count": null,
 38 |       "outputs": []
 39 |     },
 40 |     {
 41 |       "cell_type": "code",
 42 |       "source": [
 43 |         "chemicals = True\n",
 44 |         "diseases = True\n",
 45 |         "\n",
 46 |         "if chemicals and diseases: output = \"both\"\n",
 47 |         "elif chemicals: output = \"chemical\"\n",
 48 |         "elif diseases: output = \"disease\""
 49 |       ],
 50 |       "metadata": {
 51 |         "id": "Pzl9rdzEAR8r"
 52 |       },
 53 |       "execution_count": null,
 54 |       "outputs": []
 55 |     },
 56 |     {
 57 |       "cell_type": "code",
 58 |       "source": [
 59 |         "!wget https://raw.githubusercontent.com/towardsai/rag-ebook-files/main/bc5cdr.json"
 60 |       ],
 61 |       "metadata": {
 62 |         "id": "ioswvNcj0cDq"
 63 |       },
 64 |       "execution_count": null,
 65 |       "outputs": []
 66 |     },
 67 |     {
 68 |       "cell_type": "code",
 69 |       "source": [
 70 |         "with open('bc5cdr.json') as json_file:\n",
 71 |         "    data = json.load(json_file)"
 72 |       ],
 73 |       "metadata": {
 74 |         "id": "ugqgWcnsykAX"
 75 |       },
 76 |       "execution_count": null,
 77 |       "outputs": []
 78 |     },
 79 |     {
 80 |       "cell_type": "code",
 81 |       "source": [
 82 |         "train = 0\n",
 83 |         "test = 0\n",
 84 |         "develop = 0\n",
 85 |         "for item in data:\n",
 86 |         "  if item['dataset_type'] == \"train\": train+=1\n",
 87 |         "  if item['dataset_type'] == \"test\": test+=1\n",
 88 |         "  if item['dataset_type'] == \"develop\": develop+=1\n",
 89 |         "\n",
 90 |         "print(train, test, develop)"
 91 |       ],
 92 |       "metadata": {
 93 |         "colab": {
 94 |           "base_uri": "https://localhost:8080/"
 95 |         },
 96 |         "id": "aSByWlK1ywwo",
 97 |         "outputId": "6ce140f8-d41a-40a8-9fef-e4715985a690"
 98 |       },
 99 |       "execution_count": null,
100 |       "outputs": [
101 |         {
102 |           "output_type": "stream",
103 |           "name": "stdout",
104 |           "text": [
105 |             "500 500 500\n"
106 |           ]
107 |         }
108 |       ]
109 |     },
110 |     {
111 |       "cell_type": "markdown",
112 |       "source": [
113 |         "# Get Diseases/Chemicals"
114 |       ],
115 |       "metadata": {
116 |         "id": "bg1zmJIg567d"
117 |       }
118 |     },
119 |     {
120 |       "cell_type": "code",
121 |       "source": [
122 |         "instruction = \"The following article contains technical terms including diseases, drugs and chemicals. Create a list only of the {} mentioned.\\n\\n\"\n",
123 |         "outstruction = \"\\n\\nList of extracted {}:\\n\""
124 |       ],
125 |       "metadata": {
126 |         "id": "CtTh6-yF6m4x"
127 |       },
128 |       "execution_count": null,
129 |       "outputs": []
130 |     },
131 |     {
132 |       "cell_type": "code",
133 |       "source": [
134 |         "the_list = []\n",
135 |         "for item in data:\n",
136 |         "  chems = []\n",
137 |         "  dis = []\n",
138 |         "\n",
139 |         "  if item['dataset_type'] == \"test\": continue;\n",
140 |         "\n",
141 |         "  for ent in item['passages'][1]['entities']:\n",
142 |         "    if ent['type'] == \"Chemical\":\n",
143 |         "      if ent['text'][0] not in chems:\n",
144 |         "        chems.append( ent['text'][0] )\n",
145 |         "\n",
146 |         "    if ent['type'] == \"Disease\":\n",
147 |         "      if ent['text'][0] not in dis:\n",
148 |         "        dis.append( ent['text'][0] )\n",
149 |         "\n",
150 |         "  if chemicals: the_list.append({'prompt': instruction.format(\"chemicals\") + item['passages'][1]['text'] + outstruction.format(\"chemicals\"), 'completion': \"- \"+ \"\\n- \".join(chems)})\n",
151 |         "  if diseases: the_list.append({'prompt': instruction.format(\"diseases\") + item['passages'][1]['text'] + outstruction.format(\"diseases\"), 'completion': \"- \"+ \"\\n- \".join(dis)})"
152 |       ],
153 |       "metadata": {
154 |         "id": "7c5vtBfm2ePM"
155 |       },
156 |       "execution_count": null,
157 |       "outputs": []
158 |     },
159 |     {
160 |       "cell_type": "markdown",
161 |       "source": [
162 |         "# Get Relationships"
163 |       ],
164 |       "metadata": {
165 |         "id": "sqqlDfEI56QC"
166 |       }
167 |     },
168 |     {
169 |       "cell_type": "code",
170 |       "source": [
171 |         "instruction = \"The following article contains technical terms including diseases, drugs and chemicals. Create a list only of the influences between the chemicals and diseases mentioned.\\n\\n\"\n",
172 |         "outstruction = \"\\n\\nList of extracted influences:\\n\""
173 |       ],
174 |       "metadata": {
175 |         "id": "13YPqlxTK7QI"
176 |       },
177 |       "execution_count": null,
178 |       "outputs": []
179 |     },
180 |     {
181 |       "cell_type": "code",
182 |       "source": [
183 |         "the_list_rel = []\n",
184 |         "for item in data:\n",
185 |         "  if item['dataset_type'] == \"test\": continue;\n",
186 |         "\n",
187 |         "  the_relations = []\n",
188 |         "  for rel in item['passages'][1]['relations']:\n",
189 |         "    chem_found = False\n",
190 |         "    dis_found = False\n",
191 |         "    chem = None\n",
192 |         "    dis = None\n",
193 |         "    for ent in item['passages'][0]['entities'] + item['passages'][1]['entities']:\n",
194 |         "      if len( ent['normalized'] ):\n",
195 |         "        if rel['arg1_id'] == ent['normalized'][0]['db_id'] and not chem_found: # Chemical\n",
196 |         "          chem = ent['text'][0]\n",
197 |         "          chem_found = True\n",
198 |         "\n",
199 |         "        if rel['arg2_id'] == ent['normalized'][0]['db_id'] and not dis_found: # Disease\n",
200 |         "          dis_found = True\n",
201 |         "          dis = ent['text'][0]\n",
202 |         "\n",
203 |         "    the_relations.append( f\"- Chemical {chem} influences disease {dis}\" )\n",
204 |         "\n",
205 |         "  the_list_rel.append( {\"prompt\": instruction + item['passages'][1]['text'] + outstruction, \"completion\": \"\\n\".join(the_relations)} )"
206 |       ],
207 |       "metadata": {
208 |         "id": "cViLt2aJ5_YP"
209 |       },
210 |       "execution_count": null,
211 |       "outputs": []
212 |     },
213 |     {
214 |       "cell_type": "code",
215 |       "source": [
216 |         "the_list = the_list + the_list_rel"
217 |       ],
218 |       "metadata": {
219 |         "id": "fwQL1Nj8Rg7E"
220 |       },
221 |       "execution_count": null,
222 |       "outputs": []
223 |     },
224 |     {
225 |       "cell_type": "code",
226 |       "source": [
227 |         "len( the_list )"
228 |       ],
229 |       "metadata": {
230 |         "colab": {
231 |           "base_uri": "https://localhost:8080/"
232 |         },
233 |         "id": "Pq25an-5RyW2",
234 |         "outputId": "91067c32-83a6-4305-d223-5ffad70ec44f"
235 |       },
236 |       "execution_count": null,
237 |       "outputs": [
238 |         {
239 |           "output_type": "execute_result",
240 |           "data": {
241 |             "text/plain": [
242 |               "3000"
243 |             ]
244 |           },
245 |           "metadata": {},
246 |           "execution_count": 103
247 |         }
248 |       ]
249 |     },
250 |     {
251 |       "cell_type": "code",
252 |       "source": [
253 |         "# Writing to sample.json\n",
254 |         "with open(f\"{output}_rel_instruct_all.jsonl\", \"w\") as outfile:\n",
255 |         "  for item in the_list:\n",
256 |         "    outfile.write(json.dumps(item) + \"\\n\")"
257 |       ],
258 |       "metadata": {
259 |         "id": "VxpnDPVm2eFc"
260 |       },
261 |       "execution_count": null,
262 |       "outputs": []
263 |     }
264 |   ]
265 | }


--------------------------------------------------------------------------------
/notebooks/Chapter 10 - Fine_Tuning_using_Cohere_for_Medical_Data.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "provenance": [],
  7 |       "authorship_tag": "ABX9TyN6utP1sq5+xhggpPsvNz13",
  8 |       "include_colab_link": true
  9 |     },
 10 |     "kernelspec": {
 11 |       "name": "python3",
 12 |       "display_name": "Python 3"
 13 |     },
 14 |     "language_info": {
 15 |       "name": "python"
 16 |     }
 17 |   },
 18 |   "cells": [
 19 |     {
 20 |       "cell_type": "markdown",
 21 |       "metadata": {
 22 |         "id": "view-in-github",
 23 |         "colab_type": "text"
 24 |       },
 25 |       "source": [
 26 |         "<a href=\"https://colab.research.google.com/github/towardsai/ragbook-notebooks/blob/main/notebooks/Chapter%2010%20-%20Fine_Tuning_using_Cohere_for_Medical_Data.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 27 |       ]
 28 |     },
 29 |     {
 30 |       "cell_type": "code",
 31 |       "execution_count": null,
 32 |       "metadata": {
 33 |         "colab": {
 34 |           "base_uri": "https://localhost:8080/"
 35 |         },
 36 |         "id": "pARS0h0AFaNS",
 37 |         "outputId": "474f9625-3ddc-4b90-fc3e-c27701fdbecd"
 38 |       },
 39 |       "outputs": [
 40 |         {
 41 |           "output_type": "stream",
 42 |           "name": "stdout",
 43 |           "text": [
 44 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m47.6/47.6 kB\u001b[0m \u001b[31m835.0 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
 45 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.7/2.7 MB\u001b[0m \u001b[31m29.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
 46 |             "\u001b[?25h"
 47 |           ]
 48 |         }
 49 |       ],
 50 |       "source": [
 51 |         "!pip -q install cohere"
 52 |       ]
 53 |     },
 54 |     {
 55 |       "cell_type": "code",
 56 |       "source": [
 57 |         "import cohere\n",
 58 |         "co = cohere.Client(\"<API_KEY>\")"
 59 |       ],
 60 |       "metadata": {
 61 |         "id": "Aq0K8Pg7Fdrm"
 62 |       },
 63 |       "execution_count": null,
 64 |       "outputs": []
 65 |     },
 66 |     {
 67 |       "cell_type": "markdown",
 68 |       "source": [
 69 |         "# 1. Diseases"
 70 |       ],
 71 |       "metadata": {
 72 |         "id": "KqokYfIqORlT"
 73 |       }
 74 |     },
 75 |     {
 76 |       "cell_type": "code",
 77 |       "source": [
 78 |         "prompt = \"\"\"The following article contains technical terms including diseases, drugs and chemicals. Create a list only of the diseases mentioned.\n",
 79 |         "\n",
 80 |         "Progressive neurodegeneration of the optic nerve and the loss of retinal ganglion cells is a hallmark of glaucoma, the leading cause of irreversible blindness worldwide, with primary open-angle glaucoma (POAG) being the most frequent form of glaucoma in the Western world. While some genetic mutations have been identified for some glaucomas, those associated with POAG are limited and for most POAG patients, the etiology is still unclear. Unfortunately, treatment of this neurodegenerative disease and other retinal degenerative diseases is lacking. For POAG, most of the treatments focus on reducing aqueous humor formation, enhancing uveoscleral or conventional outflow, or lowering intraocular pressure through surgical means. These efforts, in some cases, do not always lead to a prevention of vision loss and therefore other strategies are needed to reduce or reverse the progressive neurodegeneration. In this review, we will highlight some of the ocular pharmacological approaches that are being tested to reduce neurodegeneration and provide some form of neuroprotection.\n",
 81 |         "\n",
 82 |         "List of extracted diseases:\"\"\""
 83 |       ],
 84 |       "metadata": {
 85 |         "id": "R0zkL7RPFsB9"
 86 |       },
 87 |       "execution_count": null,
 88 |       "outputs": []
 89 |     },
 90 |     {
 91 |       "cell_type": "markdown",
 92 |       "source": [
 93 |         "#### Base (Cohere)"
 94 |       ],
 95 |       "metadata": {
 96 |         "id": "76u6s9KiKmAy"
 97 |       }
 98 |     },
 99 |     {
100 |       "cell_type": "code",
101 |       "source": [
102 |         "response = co.generate(\n",
103 |         "    model='command-nightly',\n",
104 |         "    prompt = prompt,\n",
105 |         "    max_tokens=200,\n",
106 |         "    temperature=0.750)\n",
107 |         "\n",
108 |         "base_model = response.generations[0].text\n",
109 |         "\n",
110 |         "print( base_model )"
111 |       ],
112 |       "metadata": {
113 |         "colab": {
114 |           "base_uri": "https://localhost:8080/"
115 |         },
116 |         "id": "Pcdw-STVFtyr",
117 |         "outputId": "0e5391e9-d900-466f-b874-ebf687cac85a"
118 |       },
119 |       "execution_count": null,
120 |       "outputs": [
121 |         {
122 |           "output_type": "stream",
123 |           "name": "stdout",
124 |           "text": [
125 |             " - glaucoma\n",
126 |             "- primary open-angle glaucoma\n"
127 |           ]
128 |         }
129 |       ]
130 |     },
131 |     {
132 |       "cell_type": "markdown",
133 |       "source": [
134 |         "#### Custom"
135 |       ],
136 |       "metadata": {
137 |         "id": "2C_tf7zdKkRU"
138 |       }
139 |     },
140 |     {
141 |       "cell_type": "code",
142 |       "source": [
143 |         "response = co.generate(\n",
144 |         "    model='1715f1cb-3294-46a2-8631-5182031e6a5d-ft'\n",
145 |         "    prompt = prompt,\n",
146 |         "    max_tokens=200,\n",
147 |         "    temperature=0.750)\n",
148 |         "\n",
149 |         "diesease_model = response.generations[0].text\n",
150 |         "\n",
151 |         "print( diesease_model )"
152 |       ],
153 |       "metadata": {
154 |         "colab": {
155 |           "base_uri": "https://localhost:8080/"
156 |         },
157 |         "id": "spQ_RRH4Kf4n",
158 |         "outputId": "734acc5c-4ee3-4348-c754-b12ae35c07cc"
159 |       },
160 |       "execution_count": null,
161 |       "outputs": [
162 |         {
163 |           "output_type": "stream",
164 |           "name": "stdout",
165 |           "text": [
166 |             " - neurodegeneration of the optic nerve\n",
167 |             "- loss of retinal ganglion cells\n",
168 |             "- glaucoma\n",
169 |             "- blindness\n",
170 |             "- open-angle glaucoma\n",
171 |             "- POAG\n",
172 |             "- retinal degenerative diseases\n",
173 |             "- aqueous humor\n",
174 |             "- intraocular pressure\n"
175 |           ]
176 |         }
177 |       ]
178 |     },
179 |     {
180 |       "cell_type": "markdown",
181 |       "source": [
182 |         "# 2. Chemicals"
183 |       ],
184 |       "metadata": {
185 |         "id": "enlhjCUwOVMy"
186 |       }
187 |     },
188 |     {
189 |       "cell_type": "code",
190 |       "source": [
191 |         "prompt = \"\"\"The following article contains technical terms including diseases, drugs and chemicals. Create a list only of the chemicals mentioned.\n",
192 |         "\n",
193 |         "To test the validity of the hypothesis that hypomethylation of DNA plays an important role in the initiation of carcinogenic process, 5-azacytidine (5-AzC) (10 mg/kg), an inhibitor of DNA methylation, was given to rats during the phase of repair synthesis induced by the three carcinogens, benzo[a]-pyrene (200 mg/kg), N-methyl-N-nitrosourea (60 mg/kg) and 1,2-dimethylhydrazine (1,2-DMH) (100 mg/kg). The initiated hepatocytes in the liver were assayed as the gamma-glutamyltransferase (gamma-GT) positive foci formed following a 2-week selection regimen consisting of dietary 0.02% 2-acetylaminofluorene coupled with a necrogenic dose of CCl4. The results obtained indicate that with all three carcinogens, administration of 5-AzC during repair synthesis increased the incidence of initiated hepatocytes, for example 10-20 foci/cm2 in 5-AzC and carcinogen-treated rats compared with 3-5 foci/cm2 in rats treated with carcinogen only. Administration of [3H]-5-azadeoxycytidine during the repair synthesis induced by 1,2-DMH further showed that 0.019 mol % of cytosine residues in DNA were substituted by the analogue, indicating that incorporation of 5-AzC occurs during repair synthesis. In the absence of the carcinogen, 5-AzC given after a two thirds partial hepatectomy, when its incorporation should be maximum, failed to induce any gamma-GT positive foci. The results suggest that hypomethylation of DNA per se may not be sufficient for initiation. Perhaps two events might be necessary for initiation, the first caused by the carcinogen and a second involving hypomethylation of DNA.\n",
194 |         "\n",
195 |         "List of extracted chemicals:\"\"\""
196 |       ],
197 |       "metadata": {
198 |         "id": "Xf0Ow1usOjRk"
199 |       },
200 |       "execution_count": null,
201 |       "outputs": []
202 |     },
203 |     {
204 |       "cell_type": "markdown",
205 |       "source": [
206 |         "#### Base (Cohere)"
207 |       ],
208 |       "metadata": {
209 |         "id": "8HqecRCAOalV"
210 |       }
211 |     },
212 |     {
213 |       "cell_type": "code",
214 |       "source": [
215 |         "response = co.generate(\n",
216 |         "    model='command-nightly',\n",
217 |         "    prompt = prompt,\n",
218 |         "    max_tokens=200,\n",
219 |         "    temperature=0.750)\n",
220 |         "\n",
221 |         "base_model = response.generations[0].text\n",
222 |         "\n",
223 |         "print( base_model )"
224 |       ],
225 |       "metadata": {
226 |         "colab": {
227 |           "base_uri": "https://localhost:8080/"
228 |         },
229 |         "id": "jGUgvIQXOcBP",
230 |         "outputId": "9cde699a-b2a2-47d0-b4d6-6cc64b35f7a1"
231 |       },
232 |       "execution_count": null,
233 |       "outputs": [
234 |         {
235 |           "output_type": "stream",
236 |           "name": "stdout",
237 |           "text": [
238 |             " - 5-azacytidine (5-AzC)\n",
239 |             "- benzo[a]-pyrene\n",
240 |             "- N-methyl-N-nitrosourea\n",
241 |             "- 1,2-dimethylhydrazine\n",
242 |             "- CCl4\n",
243 |             "- 2-acetylaminofluorene\n"
244 |           ]
245 |         }
246 |       ]
247 |     },
248 |     {
249 |       "cell_type": "markdown",
250 |       "source": [
251 |         "#### Custom"
252 |       ],
253 |       "metadata": {
254 |         "id": "SQBzoUkGOaZY"
255 |       }
256 |     },
257 |     {
258 |       "cell_type": "code",
259 |       "source": [
260 |         "response = co.generate(\n",
261 |         "    model='1715f1cb-3294-46a2-8631-5182031e6a5d-ft',\n",
262 |         "    prompt = prompt,\n",
263 |         "    max_tokens=200,\n",
264 |         "    temperature=0.750)\n",
265 |         "\n",
266 |         "diesease_model = response.generations[0].text\n",
267 |         "\n",
268 |         "print( diesease_model )"
269 |       ],
270 |       "metadata": {
271 |         "colab": {
272 |           "base_uri": "https://localhost:8080/"
273 |         },
274 |         "id": "RxPYTMk3ObiP",
275 |         "outputId": "3a3ce5ab-922c-4a42-b895-1c455741769d"
276 |       },
277 |       "execution_count": null,
278 |       "outputs": [
279 |         {
280 |           "output_type": "stream",
281 |           "name": "stdout",
282 |           "text": [
283 |             " - 5-azacytidine\n",
284 |             "- 5-AzC\n",
285 |             "- benzo[a]-pyrene\n",
286 |             "- N-methyl-N-nitrosourea\n",
287 |             "- 1,2-dimethylhydrazine\n",
288 |             "- 1,2-DMH\n",
289 |             "- 2-acetylaminofluorene\n",
290 |             "- CCl4\n",
291 |             "- [3H]-5-azadeoxycytidine\n",
292 |             "- cytosine\n"
293 |           ]
294 |         }
295 |       ]
296 |     },
297 |     {
298 |       "cell_type": "markdown",
299 |       "source": [
300 |         "# 3. Relationships"
301 |       ],
302 |       "metadata": {
303 |         "id": "fRDuQN19C-Gf"
304 |       }
305 |     },
306 |     {
307 |       "cell_type": "code",
308 |       "source": [
309 |         "prompt = \"\"\"The following article contains technical terms including diseases, drugs and chemicals. Create a list only of the influences between the chemicals and diseases mentioned.\n",
310 |         "\n",
311 |         "The yield of severe cirrhosis of the liver (defined as a shrunken finely nodular liver with micronodular histology, ascites greater than 30 ml, plasma albumin less than 2.2 g/dl, splenomegaly 2-3 times normal, and testicular atrophy approximately half normal weight) after 12 doses of carbon tetrachloride given intragastrically in the phenobarbitone-primed rat was increased from 25% to 56% by giving the initial \"calibrating\" dose of carbon tetrachloride at the peak of the phenobarbitone-induced enlargement of the liver. At this point it was assumed that the cytochrome P450/CCl4 toxic state was both maximal and stable. The optimal rat size to begin phenobarbitone was determined as 100 g, and this size as a group had a mean maximum relative liver weight increase 47% greater than normal rats of the same body weight. The optimal time for the initial dose of carbon tetrachloride was after 14 days on phenobarbitone.\n",
312 |         "\n",
313 |         "nList of extracted influences:\"\"\""
314 |       ],
315 |       "metadata": {
316 |         "id": "Wb8v-WqbDELT"
317 |       },
318 |       "execution_count": null,
319 |       "outputs": []
320 |     },
321 |     {
322 |       "cell_type": "markdown",
323 |       "source": [
324 |         "#### Base (Cohere)"
325 |       ],
326 |       "metadata": {
327 |         "id": "00NsBCjMDSxr"
328 |       }
329 |     },
330 |     {
331 |       "cell_type": "code",
332 |       "source": [
333 |         "response = co.generate(\n",
334 |         "    model='command-nightly',\n",
335 |         "    prompt = prompt,\n",
336 |         "    max_tokens=200,\n",
337 |         "    temperature=0.750)\n",
338 |         "\n",
339 |         "base_model = response.generations[0].text\n",
340 |         "\n",
341 |         "print( base_model )"
342 |       ],
343 |       "metadata": {
344 |         "colab": {
345 |           "base_uri": "https://localhost:8080/"
346 |         },
347 |         "id": "MAUGHhvyDQ-t",
348 |         "outputId": "83aab5dc-3d34-4369-a8a3-6e63de21abb7"
349 |       },
350 |       "execution_count": null,
351 |       "outputs": [
352 |         {
353 |           "output_type": "stream",
354 |           "name": "stdout",
355 |           "text": [
356 |             " severe cirrhosis of the liver influences shrinking, finely nodular, ascites, plasma albumin, splenomegaly, testicular atrophy, carbon tetrachloride, phenobarbitone\n"
357 |           ]
358 |         }
359 |       ]
360 |     },
361 |     {
362 |       "cell_type": "markdown",
363 |       "source": [
364 |         "#### Custom"
365 |       ],
366 |       "metadata": {
367 |         "id": "ciFL2-IpDUki"
368 |       }
369 |     },
370 |     {
371 |       "cell_type": "code",
372 |       "source": [
373 |         "response = co.generate(\n",
374 |         "    model='1715f1cb-3294-46a2-8631-5182031e6a5d-ft',\n",
375 |         "    prompt = prompt,\n",
376 |         "    max_tokens=200,\n",
377 |         "    temperature=0.750)\n",
378 |         "\n",
379 |         "diesease_model = response.generations[0].text\n",
380 |         "\n",
381 |         "print( diesease_model )"
382 |       ],
383 |       "metadata": {
384 |         "colab": {
385 |           "base_uri": "https://localhost:8080/"
386 |         },
387 |         "id": "AOcDc0maDSaP",
388 |         "outputId": "5391b5fd-ab34-427b-87a9-d3c207090df7"
389 |       },
390 |       "execution_count": null,
391 |       "outputs": [
392 |         {
393 |           "output_type": "stream",
394 |           "name": "stdout",
395 |           "text": [
396 |             " - Chemical phenobarbitone influences disease cirrhosis of the liver\n",
397 |             "- Chemical carbon tetrachloride influences disease cirrhosis of the liver\n"
398 |           ]
399 |         }
400 |       ]
401 |     },
402 |     {
403 |       "cell_type": "code",
404 |       "source": [],
405 |       "metadata": {
406 |         "id": "qG0YpKk-DSP1"
407 |       },
408 |       "execution_count": null,
409 |       "outputs": []
410 |     }
411 |   ]
412 | }


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | langchain==0.0.208 
2 | openai==0.27.8 
3 | python-dotenv
4 | ipykernel


--------------------------------------------------------------------------------