├── .DS_Store ├── .env.example ├── .gitignore ├── .ipynb_checkpoints └── Untitled-checkpoint.ipynb ├── .vscode └── settings.json ├── Makefile ├── README.md ├── notebooks ├── .DS_Store ├── 1.0-Intro-ChatGPT-API-prompt-basics.ipynb ├── 1.1-chatgpt-api-structured-outputs-pydantic.ipynb ├── 1.2-intro-openai-function-calling.ipynb ├── 1.3-extraction-use-case.ipynb ├── 1.4-gpt4o-vision-capabilities.ipynb ├── 2.0-prompt-eng-techniques.ipynb ├── 3.0-fine-tuning-chatgpt-api.ipynb ├── 4.0-intro-to-langchain.ipynb ├── 4.1-qa-with-langchain.ipynb ├── 4.2-quiz-pdf-demo2.ipynb ├── 4.2-quiz-pdf1.ipynb ├── 4.3-langchain-deploy-recipe-walkthrough.ipynb ├── 4.4-langchain-adept-demo.ipynb ├── 5.0-quiz_generator_app.ipynb ├── 7.1-pydantic-pdf-extraction.ipynb ├── 7.2-langchain-pydantic-basics.ipynb ├── 8.0-pydantic-query-planner-and-app-developer-examples.ipynb ├── 9.0-intro-langsmith.ipynb ├── Drunk OWL_cover_image.png ├── Funky Penguin_cover_image (1).png ├── Funky Penguin_cover_image.png ├── Punk Elmo_cover_image.png ├── Reinventing-explanation.md ├── Reinventing-explanation.txt ├── Users │ └── greatmaster │ │ └── Desktop │ │ └── projects │ │ └── oreilly-live-trainings │ │ └── oreilly_live_training_llm_apps │ │ └── notebooks │ │ └── example-notes-files │ │ ├── Invoice │ │ └── note2.txt │ │ ├── Invoices │ │ └── note2.txt │ │ ├── Personal Notes and Reminders │ │ └── note1.txt │ │ ├── Personal Notes │ │ └── Reminders │ │ │ └── note1.txt │ │ └── Research Paper │ │ └── paper-pancakes.pdf ├── __pycache__ │ └── quiz_templates.cpython-311.pyc ├── assets-resources │ ├── .DS_Store │ ├── agent_loop.svg │ ├── attention-architecture.png │ ├── attention-paper.pdf │ ├── backend_llm_app.png │ ├── bear.png │ ├── cat.png │ ├── chatgpt-demo.png │ ├── components_input_output.png │ ├── embeddings-similarity.png │ ├── embeddings-similarity2.png │ ├── embeddings.png │ ├── fine-tune-response-planning.png │ ├── fine-tuning-slides │ │ ├── fine-tuning-slides.001.jpeg │ │ ├── fine-tuning-slides.002.jpeg │ │ ├── fine-tuning-slides.003.jpeg │ │ ├── fine-tuning-slides.004.jpeg │ │ ├── fine-tuning-slides.005.jpeg │ │ ├── fine-tuning-slides.006.jpeg │ │ ├── fine-tuning-slides.007.jpeg │ │ ├── fine-tuning-slides.008.jpeg │ │ ├── fine-tuning-slides.009.jpeg │ │ ├── fine-tuning-slides.010.jpeg │ │ ├── fine-tuning-slides.011.jpeg │ │ ├── fine-tuning-slides.012.jpeg │ │ ├── fine-tuning-slides.013.jpeg │ │ ├── fine-tuning-slides.014.jpeg │ │ ├── fine-tuning-slides.015.jpeg │ │ ├── fine-tuning-slides.016.jpeg │ │ ├── fine-tuning-slides.017.jpeg │ │ ├── fine-tuning-slides.018.jpeg │ │ ├── fine-tuning-slides.019.jpeg │ │ ├── fine-tuning-slides.020.jpeg │ │ ├── fine-tuning-slides.021.jpeg │ │ ├── fine-tuning-slides.022.jpeg │ │ ├── fine-tuning-slides.023.jpeg │ │ ├── fine-tuning-slides.024.jpeg │ │ ├── fine-tuning-slides.025.jpeg │ │ └── fine-tuning-slides.026.jpeg │ ├── fine_tune_model.png │ ├── frontend_backend.png │ ├── frontend_llm_app.png │ ├── google-icon.png │ ├── google-translate.png │ ├── langchain-icon.svg │ ├── lcel-image.png │ ├── llm_paper_know_dont_know.pdf │ ├── llm_paper_know_dont_know.pkl │ ├── llm_predicts_pancakes.png │ ├── loading.gif │ ├── neural_net.png │ ├── openai-function-calling-diagram.png │ ├── owl-customer-support.png │ ├── owl.png │ ├── pancakes-delicious.jpg │ ├── panda-letters-app-demo.png │ ├── panda_letter.png │ ├── prompt-basics │ │ ├── prompt-basics.001.jpeg │ │ ├── prompt-basics.002.jpeg │ │ ├── prompt-basics.003.jpeg │ │ ├── prompt-basics.004.jpeg │ │ ├── prompt-basics.005.jpeg │ │ ├── prompt-basics.006.jpeg │ │ ├── prompt-basics.007.jpeg │ │ └── prompt-basics.008.jpeg │ ├── prompt-constrains-space.png │ ├── prompt-strategy-2.png │ ├── prompt-strategy3.png │ ├── prompt-strategy4.png │ ├── prompt-strategy5.png │ ├── prompt-strategy6.png │ ├── rag-docs.png │ ├── rag-langchain-retrieval.png │ ├── rag-langchain.png │ ├── sheep.png │ ├── transformer-process.png │ ├── transformers-architecture.png │ ├── unigram.jpg │ ├── vectordb.png │ └── what-is-langchain.png ├── chunk-size-report.md ├── customer-tickets │ ├── ticket1.txt │ ├── ticket2.txt │ ├── ticket3.txt │ ├── ticket4.txt │ └── ticket5.txt ├── dataset.jsonl ├── demo-apps.py ├── docs │ ├── james.txt │ ├── maria.txt │ ├── robert.txt │ ├── sofia.txt │ └── tom.txt ├── example-notes-files │ ├── note1.txt │ ├── note2.txt │ └── paper-pancakes.pdf ├── extra-notebooks │ ├── 2.0-applying-prompt-engineering-strategies.ipynb │ ├── 2.1-prompt-engineering-guide.ipynb │ ├── 2.4-prompt-engineering-techniques-knowledge-generation.ipynb │ ├── 7.0-prompts-as-composable-primitives.ipynb │ └── quiz-app-new.py ├── extracted_ticket_issues.csv ├── images │ ├── 2023-07-17-12-48-28.png │ ├── 2023-07-17-12-55-18.png │ ├── 2023-07-17-12-55-35.png │ ├── 2023-07-17-12-59-36.png │ ├── 2023-07-17-13-00-49.png │ ├── 2023-07-21-15-55-05.png │ ├── 2023-07-24-10-52-10.png │ ├── 2023-07-28-18-38-27.png │ ├── 2023-07-30-19-29-27.png │ ├── 2023-07-30-19-32-13.png │ ├── 2023-07-30-19-34-48.png │ ├── 2023-08-17-14-48-39.png │ ├── ChatGPT-statistical-model.jpg │ ├── ChatGPT-statistical-model.png │ ├── Notebook_4-dynamic_prompt.png │ ├── Notebook_4-prompt_chaining.png │ ├── ToT_framework.png │ ├── backend_llm_app1.png │ ├── backend_llm_app2.png │ ├── bear.png │ ├── bigram-transition-probs.png │ ├── cat.png │ ├── embeddings.png │ ├── embeddings2.png │ ├── frontend_backend.png │ ├── frontend_llm_app.png │ ├── owl.png │ ├── prob_distribution_over_next_words.png │ ├── prompt_ai.png │ ├── sheep.png │ └── transition_probs.png ├── intro-to-langchain-live-demo.ipynb ├── langchain-deploy.py ├── notes.txt ├── notes_summarizer_app.py ├── pages │ ├── demo-app-panda-letters.py │ ├── demo-app-qa.py │ ├── demo-app-translation.py │ ├── demo-langchain-basics.ipynb │ ├── demo-quiz-app.py │ └── demo-summarization-app.py ├── prompt_constrain_space_visualization.py ├── prompt_engineering_results.csv ├── prompt_engineering_results2.csv ├── qa_pdf.py ├── quiz_app.py ├── quiz_templates.py ├── semantic-search-demo.ipynb ├── summary.txt └── superheroes.csv ├── presentation-slides ├── .DS_Store ├── chatgpt-api-presentation.pdf └── presentation.html └── requirements ├── requirements.in └── requirements.txt /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/.DS_Store -------------------------------------------------------------------------------- /.env.example: -------------------------------------------------------------------------------- 1 | OPENAI_API_KEY="YOUR API KEY" -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .env 2 | *.key 3 | .streamlit/ 4 | **/.streamlit/ 5 | *.toml -------------------------------------------------------------------------------- /.ipynb_checkpoints/Untitled-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [], 3 | "metadata": {}, 4 | "nbformat": 4, 5 | "nbformat_minor": 5 6 | } 7 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "makefile.configureOnOpen": false, 3 | "liveServer.settings.port": 5501 4 | } -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | ENV_NAME ?= oreilly-chatgpt-apps 2 | PYTHON_VERSION ?= 3.11 3 | 4 | # Install exact Python and CUDA versions 5 | conda-create: 6 | conda create -n $(ENV_NAME) python=$(PYTHON_VERSION) 7 | 8 | repo-setup: 9 | mkdir requirements 10 | touch requirements/requirements.in 11 | pip install uv 12 | uv pip install pip-tools 13 | 14 | notebook-setup: 15 | python -m ipykernel install --user --name=$(ENV_NAME) 16 | 17 | env-update: 18 | uv pip compile ./requirements/requirements.in -o ./requirements/requirements.txt 19 | uv pip sync ./requirements/requirements.txt 20 | 21 | pip-tools-setup: 22 | pip install uv 23 | uv pip install pip-tools setuptools -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # O'Reilly Live Trainining - Building Text Based Applications with the ChatGPT API and Langchain 2 | 3 | ## Setup 4 | 5 | **Conda** 6 | 7 | - Install [anaconda](https://www.anaconda.com/download) 8 | - This repo was tested on a Mac with python=3.11. 9 | - Create an environment: `conda create -n oreilly-chatgpt-apps python=3.11` 10 | - Activate your environment with: `conda activate oreilly-chatgpt-apps` 11 | - Install requirements with: `pip install -r requirements/requirements.txt` 12 | - Setup your openai [API key](https://platform.openai.com/) 13 | 14 | **Pip** 15 | 16 | 17 | 1. **Create a Virtual Environment:** 18 | Navigate to your project directory. Make sure you have python 3.11 installed! 19 | If using Python 3's built-in `venv`: 20 | ```bash 21 | python -m venv oreilly-chatgpt-apps 22 | ``` 23 | If you're using `virtualenv`: 24 | ```bash 25 | virtualenv oreilly-chatgpt-apps 26 | ``` 27 | 28 | 2. **Activate the Virtual Environment:** 29 | - **On Windows:** 30 | ```bash 31 | .\oreilly-chatgpt-apps\Scripts\activate 32 | ``` 33 | - **On macOS and Linux:** 34 | ```bash 35 | source oreilly-chatgpt-apps/bin/activate 36 | ``` 37 | 38 | 3. **Install Dependencies from `requirements.txt`:** 39 | ```bash 40 | pip install python-dotenv 41 | pip install -r requirements.txt 42 | ``` 43 | 44 | 4. Setup your openai [API key](https://platform.openai.com/) 45 | 46 | Remember to deactivate the virtual environment once you're done by simply typing: 47 | ```bash 48 | deactivate 49 | ``` 50 | 51 | ## Setup your .env file 52 | 53 | - Change the `.env.example` file to `.env` and add your OpenAI API key. 54 | 55 | ## To use this Environment with Jupyter Notebooks: 56 | 57 | - ```pip install jupyter``` 58 | - ```python3 -m ipykernel install --user --name=oreilly-chatgpt-apps``` 59 | 60 | 61 | ## Notebooks 62 | 63 | Here are the notebooks available in the `notebooks/` folder: 64 | 65 | 1. [Intro to ChatGPT API & Prompt Basics](notebooks/1.0-Intro-ChatGPT-API-prompt-basics.ipynb) 66 | 67 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/EnkrateiaLucca/oreilly_live_training_llm_apps/blob/main/notebooks/1.0-Intro-ChatGPT-API-prompt-basics.ipynb) 68 | 69 | 2. [Intro to OpenAI Function Calling](notebooks/1.2-intro-openai-function-calling.ipynb) 70 | 71 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/EnkrateiaLucca/oreilly_live_training_llm_apps/blob/main/notebooks/1.2-intro-openai-function-calling.ipynb) 72 | 73 | 3. [Extraction Use Case](notebooks/1.3-extraction-use-case.ipynb) 74 | 75 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/EnkrateiaLucca/oreilly_live_training_llm_apps/blob/main/notebooks/1.3-extraction-use-case.ipynb) 76 | 77 | 4. [Applying Prompt Engineering Strategies](notebooks/2.0-applying-prompt-engineering-strategies.ipynb) 78 | 79 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/EnkrateiaLucca/oreilly_live_training_llm_apps/blob/main/notebooks/2.0-applying-prompt-engineering-strategies.ipynb) 80 | 81 | 5. [Fine-tuning ChatGPT API](notebooks/3.0-fine-tuning-chatgpt-api.ipynb) 82 | 83 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/EnkrateiaLucca/oreilly_live_training_llm_apps/blob/main/notebooks/3.0-fine-tuning-chatgpt-api.ipynb) 84 | 85 | 6. [Intro to LangChain](notebooks/4.0-intro-to-langchain.ipynb) 86 | 87 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/EnkrateiaLucca/oreilly_live_training_llm_apps/blob/main/notebooks/4.0-intro-to-langchain.ipynb) 88 | 89 | 7. [Q&A with LangChain](notebooks/4.1-qa-with-langchain.ipynb) 90 | 91 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/EnkrateiaLucca/oreilly_live_training_llm_apps/blob/main/notebooks/4.1-qa-with-langchain.ipynb) 92 | 93 | 8. [Quiz PDF with LangChain](notebooks/4.2-quiz-pdf.ipynb) 94 | 95 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/EnkrateiaLucca/oreilly_live_training_llm_apps/blob/main/notebooks/4.2-quiz-pdf.ipynb) 96 | 97 | 9. [LangChain Deploy Recipe Walkthrough](notebooks/4.3-langchain-deploy-recipe-walkthrough.ipynb) 98 | 99 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/EnkrateiaLucca/oreilly_live_training_llm_apps/blob/main/notebooks/4.3-langchain-deploy-recipe-walkthrough.ipynb) 100 | 101 | 10. [LangChain Adept Demo](notebooks/4.4-langchain-adept-demo.ipynb) 102 | 103 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/EnkrateiaLucca/oreilly_live_training_llm_apps/blob/main/notebooks/4.4-langchain-adept-demo.ipynb) 104 | 105 | 11. [Quiz Generator App](notebooks/5.0-quiz_generator_app.ipynb) 106 | 107 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/EnkrateiaLucca/oreilly_live_training_llm_apps/blob/main/notebooks/5.0-quiz_generator_app.ipynb) 108 | 109 | 12. [General Intro to LLMs](notebooks/general-intro-to-llms.ipynb) 110 | 111 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/EnkrateiaLucca/oreilly_live_training_llm_apps/blob/main/notebooks/general-intro-to-llms.ipynb) 112 | 113 | 13. [Prompt Engineering Techniques: Knowledge Generation](notebooks/prompt-engineering-techniques-knowledge-generation.ipynb) 114 | 115 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/EnkrateiaLucca/oreilly_live_training_llm_apps/blob/main/notebooks/prompt-engineering-techniques-knowledge-generation.ipynb) 116 | 117 | 14. [Prompt Engineering Techniques](notebooks/prompt-engineering-techniques.ipynb) 118 | 119 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/EnkrateiaLucca/oreilly_live_training_llm_apps/blob/main/notebooks/prompt-engineering-techniques.ipynb) -------------------------------------------------------------------------------- /notebooks/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/.DS_Store -------------------------------------------------------------------------------- /notebooks/1.2-intro-openai-function-calling.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": { 7 | "execution": { 8 | "iopub.execute_input": "2025-01-08T03:31:02.579696Z", 9 | "iopub.status.busy": "2025-01-08T03:31:02.579236Z", 10 | "iopub.status.idle": "2025-01-08T03:31:04.815989Z", 11 | "shell.execute_reply": "2025-01-08T03:31:04.815444Z" 12 | } 13 | }, 14 | "outputs": [], 15 | "source": [ 16 | "%pip install openai\n", 17 | "%pip install python-dotenv" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 3, 23 | "metadata": { 24 | "execution": { 25 | "iopub.execute_input": "2025-01-08T03:31:04.818599Z", 26 | "iopub.status.busy": "2025-01-08T03:31:04.818345Z", 27 | "iopub.status.idle": "2025-01-08T03:31:04.985868Z", 28 | "shell.execute_reply": "2025-01-08T03:31:04.985499Z" 29 | } 30 | }, 31 | "outputs": [], 32 | "source": [ 33 | "import os\n", 34 | "from dotenv import load_dotenv\n", 35 | "from openai import OpenAI\n", 36 | "\n", 37 | "# loading from a .env file\n", 38 | "# load_dotenv(dotenv_path=\"/full/path/to/your/.env\")\n", 39 | "\n", 40 | "# or \n", 41 | "# if you're on google colab just uncomment below and replace with your openai api key\n", 42 | "# os.environ[\"OPENAI_API_KEY\"] = \"\"" 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": {}, 48 | "source": [ 49 | "# OpenAI Functions" 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "metadata": {}, 55 | "source": [ 56 | "Ok, let's first understand how [OpenAI](https://openai.com/) the company behind ChatGPT, allows for these function call implementations in its API.\n", 57 | "\n", 58 | "OpenAI implemented a [function calling API](https://platform.openai.com/docs/guides/function-calling) which is a standard way to connect their models to outside tools like in the very simple example we did above.\n", 59 | "\n", 60 | "According to their [official documentation](https://platform.openai.com/docs/guides/function-calling#:~:text=The%20basic%20sequence,to%20the%20user.) the sequence of steps for function calling is as follows:\n", 61 | "\n", 62 | "" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "Below is an example taken from their official documentation:" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 4, 75 | "metadata": { 76 | "execution": { 77 | "iopub.execute_input": "2025-01-08T03:31:04.987810Z", 78 | "iopub.status.busy": "2025-01-08T03:31:04.987702Z", 79 | "iopub.status.idle": "2025-01-08T03:31:05.003665Z", 80 | "shell.execute_reply": "2025-01-08T03:31:05.003259Z" 81 | } 82 | }, 83 | "outputs": [], 84 | "source": [ 85 | "from openai import OpenAI\n", 86 | "import json\n", 87 | "\n", 88 | "client = OpenAI()" 89 | ] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "metadata": {}, 94 | "source": [ 95 | "Let's look at how our previous model with those three simple functions: `create_directory()`, `create_file()`, and `list_files()` would be implemented using OpenAI's function calling approach:" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 10, 101 | "metadata": { 102 | "execution": { 103 | "iopub.execute_input": "2025-01-08T03:31:05.005111Z", 104 | "iopub.status.busy": "2025-01-08T03:31:05.005036Z", 105 | "iopub.status.idle": "2025-01-08T03:31:05.007162Z", 106 | "shell.execute_reply": "2025-01-08T03:31:05.006963Z" 107 | } 108 | }, 109 | "outputs": [], 110 | "source": [ 111 | "import json\n", 112 | "import subprocess\n", 113 | "\n", 114 | "def create_directory(directory_name):\n", 115 | " \"\"\"Function that creates a directory given a directory name.\"\"\"\"\"\n", 116 | " subprocess.run([\"mkdir\", directory_name])\n", 117 | " return json.dumps({\"directory_name\": directory_name})\n", 118 | "\n", 119 | "\n", 120 | "tool_create_directory = {\n", 121 | " \"type\": \"function\",\n", 122 | " \"function\": {\n", 123 | " \"name\": \"create_directory\",\n", 124 | " \"description\": \"Create a directory given a directory name.\",\n", 125 | " \"parameters\": {\n", 126 | " \"type\": \"object\",\n", 127 | " \"properties\": {\n", 128 | " \"directory_name\": {\n", 129 | " \"type\": \"string\",\n", 130 | " \"description\": \"The name of the directory to create.\",\n", 131 | " }\n", 132 | " },\n", 133 | " \"required\": [\"directory_name\"],\n", 134 | " },\n", 135 | " },\n", 136 | "}\n", 137 | "\n", 138 | "tools = [tool_create_directory] " 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": 11, 144 | "metadata": { 145 | "execution": { 146 | "iopub.execute_input": "2025-01-08T03:31:05.011257Z", 147 | "iopub.status.busy": "2025-01-08T03:31:05.011181Z", 148 | "iopub.status.idle": "2025-01-08T03:31:06.420733Z", 149 | "shell.execute_reply": "2025-01-08T03:31:06.419656Z" 150 | } 151 | }, 152 | "outputs": [ 153 | { 154 | "data": { 155 | "text/plain": [ 156 | "ChatCompletion(id='chatcmpl-AnTy37AS8wl9Fyd5NZel2A41bH0hO', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=\"The folder called 'sp-asked-a-great-question' has been successfully created.\", refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1736356631, model='gpt-4o-mini-2024-07-18', object='chat.completion', service_tier=None, system_fingerprint='fp_d02d531b47', usage=CompletionUsage(completion_tokens=18, prompt_tokens=62, total_tokens=80, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))" 157 | ] 158 | }, 159 | "execution_count": 11, 160 | "metadata": {}, 161 | "output_type": "execute_result" 162 | } 163 | ], 164 | "source": [ 165 | "import json\n", 166 | "\n", 167 | "def run_terminal_task():\n", 168 | " messages = [{\"role\": \"user\", \"content\": \"Create a folder called 'sp-asked-a-great-question'.\"}]\n", 169 | " tools = [tool_create_directory] \n", 170 | " response = client.chat.completions.create(\n", 171 | " model=\"gpt-4o-mini\",\n", 172 | " messages=messages,\n", 173 | " tools=tools,\n", 174 | " tool_choice=\"auto\", # auto is default, but we'll be explicit\n", 175 | " )\n", 176 | " response_message = response.choices[0].message\n", 177 | " tool_calls = response_message.tool_calls\n", 178 | " # Step 2: check if the model wanted to call a function\n", 179 | " \n", 180 | " if tool_calls:\n", 181 | " # Step 3: call the function\n", 182 | " # Note: the JSON response may not always be valid; be sure to handle errors\n", 183 | " available_functions = {\n", 184 | " \"create_directory\": create_directory,\n", 185 | " }\n", 186 | " messages.append(response_message)\n", 187 | " # Step 4: send the info for each function call and function response to the model\n", 188 | " for tool_call in tool_calls:\n", 189 | " function_name = tool_call.function.name\n", 190 | " function_to_call = available_functions[function_name]\n", 191 | " function_args = json.loads(tool_call.function.arguments)\n", 192 | " function_response = function_to_call(\n", 193 | " directory_name=function_args.get(\"directory_name\"),\n", 194 | " )\n", 195 | " messages.append(\n", 196 | " {\n", 197 | " \"tool_call_id\": tool_call.id,\n", 198 | " \"role\": \"tool\",\n", 199 | " \"name\": function_name,\n", 200 | " \"content\": function_response,\n", 201 | " }\n", 202 | " )\n", 203 | " second_response = client.chat.completions.create(\n", 204 | " model=\"gpt-4o-mini\",\n", 205 | " messages=messages,\n", 206 | " )\n", 207 | " return second_response\n", 208 | "\n", 209 | "output = run_terminal_task()\n", 210 | "output" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": 12, 216 | "metadata": { 217 | "execution": { 218 | "iopub.execute_input": "2025-01-08T03:31:06.425975Z", 219 | "iopub.status.busy": "2025-01-08T03:31:06.425513Z", 220 | "iopub.status.idle": "2025-01-08T03:31:06.432322Z", 221 | "shell.execute_reply": "2025-01-08T03:31:06.431485Z" 222 | } 223 | }, 224 | "outputs": [ 225 | { 226 | "data": { 227 | "text/plain": [ 228 | "\"The folder called 'sp-asked-a-great-question' has been successfully created.\"" 229 | ] 230 | }, 231 | "execution_count": 12, 232 | "metadata": {}, 233 | "output_type": "execute_result" 234 | } 235 | ], 236 | "source": [ 237 | "output.choices[0].message.content" 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": 13, 243 | "metadata": { 244 | "execution": { 245 | "iopub.execute_input": "2025-01-08T03:31:06.435159Z", 246 | "iopub.status.busy": "2025-01-08T03:31:06.434937Z", 247 | "iopub.status.idle": "2025-01-08T03:31:06.564787Z", 248 | "shell.execute_reply": "2025-01-08T03:31:06.563749Z" 249 | } 250 | }, 251 | "outputs": [ 252 | { 253 | "name": "stdout", 254 | "output_type": "stream", 255 | "text": [ 256 | "\u001b[1m\u001b[36msp-asked-a-great-question/\u001b[m\u001b[m\n" 257 | ] 258 | } 259 | ], 260 | "source": [ 261 | "!ls -d */ | grep sp-asked" 262 | ] 263 | }, 264 | { 265 | "cell_type": "markdown", 266 | "metadata": {}, 267 | "source": [ 268 | "Great! We implemented openai function calling for creating directories! We could evolve this approach but let's stop for now.\n", 269 | "\n", 270 | "See more info on these examples from OpenAI's [official cookbook](https://cookbook.openai.com/examples/how_to_call_functions_with_chat_models)." 271 | ] 272 | } 273 | ], 274 | "metadata": { 275 | "kernelspec": { 276 | "display_name": "oreilly-chatgpt-apps", 277 | "language": "python", 278 | "name": "oreilly-chatgpt-apps" 279 | }, 280 | "language_info": { 281 | "codemirror_mode": { 282 | "name": "ipython", 283 | "version": 3 284 | }, 285 | "file_extension": ".py", 286 | "mimetype": "text/x-python", 287 | "name": "python", 288 | "nbconvert_exporter": "python", 289 | "pygments_lexer": "ipython3", 290 | "version": "3.11.10" 291 | } 292 | }, 293 | "nbformat": 4, 294 | "nbformat_minor": 2 295 | } 296 | -------------------------------------------------------------------------------- /notebooks/1.4-gpt4o-vision-capabilities.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "data": { 10 | "text/plain": [ 11 | "'The capital of France is Paris.'" 12 | ] 13 | }, 14 | "execution_count": 1, 15 | "metadata": {}, 16 | "output_type": "execute_result" 17 | } 18 | ], 19 | "source": [ 20 | "from openai import OpenAI\n", 21 | "\n", 22 | "client = OpenAI()\n", 23 | "\n", 24 | "def get_response(prompt_question):\n", 25 | " response = client.chat.completions.create(\n", 26 | " model=\"gpt-4o-mini\",\n", 27 | " messages=[{\"role\": \"system\", \"content\": \"You are a helpful research and programming assistant\"},\n", 28 | " {\"role\": \"user\", \"content\": prompt_question}]\n", 29 | " )\n", 30 | " \n", 31 | " return response.choices[0].message.content\n", 32 | "\n", 33 | "get_response(\"What is the capital of France?\")" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "# What IF I want an IMAGE to Go IN and an Text TO COME OUT?" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 2, 53 | "metadata": {}, 54 | "outputs": [ 55 | { 56 | "data": { 57 | "text/plain": [ 58 | "{'id': 'chatcmpl-AnTPhtfa0ZEzw0sua9AYFkrq3vEyu',\n", 59 | " 'object': 'chat.completion',\n", 60 | " 'created': 1736354501,\n", 61 | " 'model': 'gpt-4o-2024-08-06',\n", 62 | " 'choices': [{'index': 0,\n", 63 | " 'message': {'role': 'assistant',\n", 64 | " 'content': '```\\nCaption: Diagram of a Transformer model architecture, illustrating the encoder and decoder components with multi-head attention and feed-forward layers.\\n```',\n", 65 | " 'refusal': None},\n", 66 | " 'logprobs': None,\n", 67 | " 'finish_reason': 'stop'}],\n", 68 | " 'usage': {'prompt_tokens': 893,\n", 69 | " 'completion_tokens': 28,\n", 70 | " 'total_tokens': 921,\n", 71 | " 'prompt_tokens_details': {'cached_tokens': 0, 'audio_tokens': 0},\n", 72 | " 'completion_tokens_details': {'reasoning_tokens': 0,\n", 73 | " 'audio_tokens': 0,\n", 74 | " 'accepted_prediction_tokens': 0,\n", 75 | " 'rejected_prediction_tokens': 0}},\n", 76 | " 'system_fingerprint': 'fp_f785eb5f47'}" 77 | ] 78 | }, 79 | "execution_count": 2, 80 | "metadata": {}, 81 | "output_type": "execute_result" 82 | } 83 | ], 84 | "source": [ 85 | "import os\n", 86 | "import base64\n", 87 | "import requests\n", 88 | "api_key = os.environ.get(\"OPENAI_API_KEY\")\n", 89 | "# Function to encode the image\n", 90 | "def encode_image(image_path):\n", 91 | " with open(image_path, \"rb\") as image_file:\n", 92 | " return base64.b64encode(image_file.read()).decode('utf-8')\n", 93 | "\n", 94 | "# Path to your image\n", 95 | "image_path = \"./assets-resources/attention-architecture.png\"\n", 96 | "\n", 97 | "\n", 98 | "# Getting the base64 string\n", 99 | "base64_image = encode_image(image_path)\n", 100 | "\n", 101 | "headers = {\n", 102 | " \"Content-Type\": \"application/json\",\n", 103 | " \"Authorization\": f\"Bearer {api_key}\"\n", 104 | "}\n", 105 | "\n", 106 | "payload = {\n", 107 | " \"model\": \"gpt-4o\",\n", 108 | " \"messages\": [\n", 109 | " {\n", 110 | " \"role\": \"user\",\n", 111 | " \"content\": [\n", 112 | " {\n", 113 | " \"type\": \"text\",\n", 114 | " \"text\": f\"\"\"\n", 115 | " Write me a caption for this image.\n", 116 | " Output format:\n", 117 | " '''\n", 118 | " Caption: \n", 119 | " '''\n", 120 | " \"\"\"\n", 121 | " },\n", 122 | " {\n", 123 | " \"type\": \"image_url\",\n", 124 | " \"image_url\": {\n", 125 | " \"url\": f\"data:image/jpeg;base64,{base64_image}\"\n", 126 | " }\n", 127 | " }\n", 128 | " ]\n", 129 | " }\n", 130 | " ],\n", 131 | " \"max_tokens\": 2500\n", 132 | "}\n", 133 | "response = requests.post(\"https://api.openai.com/v1/chat/completions\", headers=headers, json=payload)\n", 134 | "output = response.json()\n", 135 | "output" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 3, 141 | "metadata": {}, 142 | "outputs": [ 143 | { 144 | "data": { 145 | "text/plain": [ 146 | "'```\\nCaption: Diagram of a Transformer model architecture, illustrating the encoder and decoder components with multi-head attention and feed-forward layers.\\n```'" 147 | ] 148 | }, 149 | "execution_count": 3, 150 | "metadata": {}, 151 | "output_type": "execute_result" 152 | } 153 | ], 154 | "source": [ 155 | "captions_output = output['choices'][0]['message']['content']\n", 156 | "captions_output" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": 4, 162 | "metadata": {}, 163 | "outputs": [ 164 | { 165 | "data": { 166 | "text/markdown": [ 167 | "```\n", 168 | "Caption: Diagram of a Transformer model architecture, illustrating the encoder and decoder components with multi-head attention and feed-forward layers.\n", 169 | "```" 170 | ], 171 | "text/plain": [ 172 | "" 173 | ] 174 | }, 175 | "execution_count": 4, 176 | "metadata": {}, 177 | "output_type": "execute_result" 178 | } 179 | ], 180 | "source": [ 181 | "from IPython.display import Markdown\n", 182 | "Markdown(captions_output)" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": null, 188 | "metadata": {}, 189 | "outputs": [], 190 | "source": [] 191 | } 192 | ], 193 | "metadata": { 194 | "kernelspec": { 195 | "display_name": "oreilly-chatgpt-apps", 196 | "language": "python", 197 | "name": "oreilly-chatgpt-apps" 198 | }, 199 | "language_info": { 200 | "codemirror_mode": { 201 | "name": "ipython", 202 | "version": 3 203 | }, 204 | "file_extension": ".py", 205 | "mimetype": "text/x-python", 206 | "name": "python", 207 | "nbconvert_exporter": "python", 208 | "pygments_lexer": "ipython3", 209 | "version": "3.11.10" 210 | } 211 | }, 212 | "nbformat": 4, 213 | "nbformat_minor": 2 214 | } 215 | -------------------------------------------------------------------------------- /notebooks/7.1-pydantic-pdf-extraction.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 7, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "!pip install openai\n", 10 | "!pip install langchain\n", 11 | "!pip install langchain_community\n", 12 | "!pip install langchain_openai\n", 13 | "!pip install tiktoken\n", 14 | "!pip install pydantic\n", 15 | "!pip install python-dotenv" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": null, 21 | "metadata": {}, 22 | "outputs": [], 23 | "source": [ 24 | "import os\n", 25 | "from dotenv import load_dotenv\n", 26 | "from openai import OpenAI\n", 27 | "\n", 28 | "# loading from a .env file\n", 29 | "# load_dotenv(dotenv_path=\"/full/path/to/your/.env\")\n", 30 | "\n", 31 | "# or \n", 32 | "# if you're on google colab just uncomment below and replace with your openai api key\n", 33 | "# os.environ[\"OPENAI_API_KEY\"] = \"\"" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 2, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "from pydantic import BaseModel, Field\n", 43 | "import openai\n", 44 | "import os\n", 45 | "from langchain.utils.openai_functions import convert_pydantic_to_openai_function\n", 46 | "from langchain_openai.chat_models import ChatOpenAI\n", 47 | "\n", 48 | "class InvoiceData(BaseModel):\n", 49 | " \"\"\"Call this function to extract invoice data with specific output structure\"\"\"\n", 50 | " invoice_number: str = Field(..., description=\"The invoice number extracted from the PDF\")\n", 51 | " vat_registration_number: str = Field(..., description=\"The VAT registration number extracted from the PDF\")\n", 52 | "\n", 53 | "fake_invoice_data = \"\"\"\n", 54 | "Invoice Summary\n", 55 | "==============\n", 56 | "Invoice No: INV-123456\n", 57 | "Date: 2024-02-11\n", 58 | "Billing To: ACME Corporation\n", 59 | "Address: 123 Business Rd, Business City, BC1234\n", 60 | "\n", 61 | "Product Description Quantity Unit Price Total\n", 62 | "-----------------------------------------------------------\n", 63 | "Widget A 10 $15.00 $150.00\n", 64 | "Gadget B 5 $20.00 $100.00\n", 65 | "\n", 66 | "Subtotal: $250.00\n", 67 | "Tax (10%): $25.00\n", 68 | "Total: $275.00\n", 69 | "\n", 70 | "VAT Registration Number: VAT-7890123\n", 71 | "\n", 72 | "Thank you for your business!\n", 73 | "\n", 74 | "\"\"\"\n", 75 | "\n", 76 | "\n", 77 | "invoice_extractor = convert_pydantic_to_openai_function(InvoiceData)\n", 78 | "\n", 79 | "\n", 80 | "llm_chat = ChatOpenAI()\n", 81 | "\n", 82 | "output_invoice = llm_chat.invoke(f\"Invoice: {fake_invoice_data}. Extracted data:\\n\",functions=[invoice_extractor])" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 6, 88 | "metadata": {}, 89 | "outputs": [ 90 | { 91 | "data": { 92 | "text/plain": [ 93 | "{'arguments': '{\"invoice_number\":\"INV-123456\",\"vat_registration_number\":\"VAT-7890123\"}',\n", 94 | " 'name': 'InvoiceData'}" 95 | ] 96 | }, 97 | "execution_count": 6, 98 | "metadata": {}, 99 | "output_type": "execute_result" 100 | } 101 | ], 102 | "source": [ 103 | "output_invoice.additional_kwargs[\"function_call\"]" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "metadata": {}, 110 | "outputs": [], 111 | "source": [] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": 1, 116 | "metadata": {}, 117 | "outputs": [], 118 | "source": [] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": 7, 123 | "metadata": {}, 124 | "outputs": [], 125 | "source": [] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 11, 130 | "metadata": {}, 131 | "outputs": [ 132 | { 133 | "data": { 134 | "text/plain": [ 135 | "AIMessage(content='', additional_kwargs={'function_call': {'name': 'InvoiceData', 'arguments': '{\\n \"invoice_number\": \"INV-123456\",\\n \"vat_registration_number\": \"VAT-7890123\"\\n}'}})" 136 | ] 137 | }, 138 | "execution_count": 11, 139 | "metadata": {}, 140 | "output_type": "execute_result" 141 | } 142 | ], 143 | "source": [] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": null, 148 | "metadata": {}, 149 | "outputs": [], 150 | "source": [] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": null, 155 | "metadata": {}, 156 | "outputs": [], 157 | "source": [] 158 | } 159 | ], 160 | "metadata": { 161 | "kernelspec": { 162 | "display_name": "oreilly-env", 163 | "language": "python", 164 | "name": "oreilly-env" 165 | }, 166 | "language_info": { 167 | "codemirror_mode": { 168 | "name": "ipython", 169 | "version": 3 170 | }, 171 | "file_extension": ".py", 172 | "mimetype": "text/x-python", 173 | "name": "python", 174 | "nbconvert_exporter": "python", 175 | "pygments_lexer": "ipython3", 176 | "version": "3.11.8" 177 | } 178 | }, 179 | "nbformat": 4, 180 | "nbformat_minor": 2 181 | } 182 | -------------------------------------------------------------------------------- /notebooks/9.0-intro-langsmith.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import os\n", 10 | "import getpass\n", 11 | "\n", 12 | "os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n", 13 | "os.environ[\"LANGCHAIN_ENDPOINT\"] = \"https://api.smith.langchain.com\"\n", 14 | "os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n", 15 | "os.environ[\"LANGCHAIN_PROJECT\"] = \"langchain-chatgpt-course-test\"" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 2, 21 | "metadata": {}, 22 | "outputs": [ 23 | { 24 | "data": { 25 | "text/plain": [ 26 | "AIMessage(content=\"Building a Large Language Model (LLM) app involves several steps, from understanding your requirements to deploying the app. Here's a structured approach to guide you through the process:\\n\\n### 1. Define the Purpose and Scope\\n- **Identify the Use Case**: Determine what problem your app will solve. Examples include chatbots, content generation, sentiment analysis, etc.\\n- **Target Audience**: Understand who will use the app and tailor the features accordingly.\\n\\n### 2. Choose the Right LLM\\n- **Select a Pre-trained Model**: Options include OpenAI's GPT series, Google's BERT, Meta's LLaMA, etc. Consider the model's capabilities, licensing, and cost.\\n- **Model Size and Complexity**: Balance between model capabilities and computational resources.\\n\\n### 3. Set Up the Development Environment\\n- **Hardware Requirements**: Ensure you have adequate computational resources (CPU/GPU) depending on the model size.\\n- **Software Requirements**: Install necessary libraries and frameworks like TensorFlow, PyTorch, or Hugging Face Transformers.\\n\\n### 4. Fine-tune the Model (if necessary)\\n- **Data Collection**: Gather domain-specific data for fine-tuning.\\n- **Training**: Use machine learning frameworks to fine-tune the model on your dataset, adjusting hyperparameters as needed.\\n\\n### 5. Develop the Application\\n- **Backend**: Create APIs to interact with the LLM. Use frameworks like Flask, FastAPI, or Django.\\n- **Frontend**: Develop the user interface using web technologies (React, Angular, Vue) or mobile frameworks (Flutter, React Native).\\n\\n### 6. Integrate the LLM\\n- **API Integration**: If using a third-party LLM service, integrate their API.\\n- **Model Hosting**: Deploy your model on a server if you are hosting it yourself.\\n\\n### 7. Optimize for Performance\\n- **Latency Reduction**: Use techniques like model quantization or distillation to reduce response time.\\n- **Scalability**: Implement load balancing and consider cloud services like AWS, Google Cloud, or Azure for scaling.\\n\\n### 8. Testing\\n- **Functional Testing**: Ensure the app meets all functional requirements.\\n- **Performance Testing**: Evaluate the app's speed, scalability, and reliability.\\n- **User Testing**: Gather feedback from real users to identify and fix UX issues.\\n\\n### 9. Deployment\\n- **Deployment Platform**: Choose between cloud platforms, on-premises servers, or hybrid solutions.\\n- **Continuous Integration/Continuous Deployment (CI/CD)**: Set up pipelines for automatic testing and deployment.\\n\\n### 10. Monitor and Maintain\\n- **Logging and Monitoring**: Implement tools to track app performance and errors.\\n- **Regular Updates**: Update the model and app to incorporate new features and improvements.\\n\\n### 11. Address Ethical and Privacy Concerns\\n- **Data Privacy**: Ensure compliance with data protection regulations (e.g., GDPR).\\n- **Bias Mitigation**: Regularly audit your model for biases and take corrective actions.\\n\\n### Additional Tips\\n- **Community and Support**: Engage with developer communities for support and updates.\\n- **Documentation**: Maintain clear documentation for both users and developers.\\n\\nBy following these steps, you can systematically develop an LLM-based application that meets the needs of your users while leveraging the powerful capabilities of language models.\", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 686, 'prompt_tokens': 15, 'total_tokens': 701, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_5f20662549', 'finish_reason': 'stop', 'logprobs': None}, id='run-09482dd1-04a8-4320-a936-f5688ac6c4ec-0', usage_metadata={'input_tokens': 15, 'output_tokens': 686, 'total_tokens': 701, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})" 27 | ] 28 | }, 29 | "execution_count": 2, 30 | "metadata": {}, 31 | "output_type": "execute_result" 32 | } 33 | ], 34 | "source": [ 35 | "from langchain_openai import ChatOpenAI\n", 36 | "\n", 37 | "llm = ChatOpenAI(model=\"gpt-4o\")\n", 38 | "\n", 39 | "llm.invoke(\"How to build an LLM app?\")" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 3, 45 | "metadata": {}, 46 | "outputs": [ 47 | { 48 | "data": { 49 | "text/plain": [ 50 | "AIMessage(content='The classic answer is: \"To get to the other side.\" It\\'s a well-known joke that plays on the simplicity of the setup, leading to an unexpectedly straightforward punchline.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 36, 'prompt_tokens': 15, 'total_tokens': 51, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_703d4ff298', 'finish_reason': 'stop', 'logprobs': None}, id='run-df95c87b-f181-431d-baa7-2719a53e5777-0', usage_metadata={'input_tokens': 15, 'output_tokens': 36, 'total_tokens': 51, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})" 51 | ] 52 | }, 53 | "execution_count": 3, 54 | "metadata": {}, 55 | "output_type": "execute_result" 56 | } 57 | ], 58 | "source": [ 59 | "llm.invoke(\"Why did the chicken cross the street?\")" 60 | ] 61 | } 62 | ], 63 | "metadata": { 64 | "kernelspec": { 65 | "display_name": "oreilly-chatgpt-apps", 66 | "language": "python", 67 | "name": "oreilly-chatgpt-apps" 68 | }, 69 | "language_info": { 70 | "codemirror_mode": { 71 | "name": "ipython", 72 | "version": 3 73 | }, 74 | "file_extension": ".py", 75 | "mimetype": "text/x-python", 76 | "name": "python", 77 | "nbconvert_exporter": "python", 78 | "pygments_lexer": "ipython3", 79 | "version": "3.11.10" 80 | } 81 | }, 82 | "nbformat": 4, 83 | "nbformat_minor": 2 84 | } 85 | -------------------------------------------------------------------------------- /notebooks/Drunk OWL_cover_image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/Drunk OWL_cover_image.png -------------------------------------------------------------------------------- /notebooks/Funky Penguin_cover_image (1).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/Funky Penguin_cover_image (1).png -------------------------------------------------------------------------------- /notebooks/Funky Penguin_cover_image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/Funky Penguin_cover_image.png -------------------------------------------------------------------------------- /notebooks/Punk Elmo_cover_image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/Punk Elmo_cover_image.png -------------------------------------------------------------------------------- /notebooks/Reinventing-explanation.md: -------------------------------------------------------------------------------- 1 | [source](https://michaelnielsen.org/reinventing_explanation/index.html) 2 | Media for thought. #intelligence #learning #problemsolving 3 | [[A dynamic problem solving medium]] 4 | 5 | Miachel Nielsen in his [[Reinventing-explanation]], says: 6 | 7 | > I believe it's worth taking non-traditional media seriously not just as a vehicle for popularization or education, which is how they are often viewed, but as an opportunity for explanations which can be, in important ways, deeper. Nielsen, [[Reinventing-explanation]], conclusion 8 | 9 | > This agglomeration of ideas has turned maps into a powerful _medium for thought_. Consider the famous map of the London Underground, excerpted here: 10 | 11 | > we internalize the map: the representations used in the map become the representations we use to think about the Underground. This is the sense in which this map is a medium for thought. 12 | 13 | > He's created a vocabulary of operations which can be used to understand and manipulate and, most crucially of all, _play_ with difference equations. And with enough exposure to this medium, we'd begin internalizing these operations: we'd begin to think about difference equations in this way. 14 | 15 | Nielsen's see this approach as the future of thinking about mathematics for example where mediums like these could expand how we think about mathematical objects. 16 | 17 | He now turns his attention to the design of a tool like this. 18 | > In the remainder of this essay I will focus on the design of a particular type of media for thought, namely, ***the design of media to _explain_ scientific ideas.*** 19 | 20 | Using non-traditional media to create deeper explanations of scientific ideas. 21 | 22 | > In fact, we don't yet have even the basic vocabulary of digital explanation. My instinct is that such a vocabulary will be developed in the decades to come. But that is far too big a goal to attack directly. Instead, we can make progress by constructing prototypes, and learning from what they have to tell us. That's what we'll do in this essay. Nielsen *"Reinventing Explanation"* 23 | 24 | It's not about making a concept popular for the "masses"... 25 | > (...)it's about understanding **the potential of non-traditional media for serious explanations**, the sort of explanations scientists use amongst themselves. So while it happens to be true that the explanations we'll discuss are accessible to a broad audience, what matters is that those explanations are, in some important ways, deeper than conventional verbal and symbolic explanations. 26 | 27 | Simpson's paradox: 28 | 29 | Some paradox where the overall probability favors one event as the local probabilities all individually favor the other event in a universe of just these two events. 30 | 31 | I could write about what it means to understand something from the perspective of these medium for thought tools. 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | Here we are talking about optimization. 41 | 42 | > To reiterate, the loss function lets us quantify the quality of any particular set of weights W. The goal of optimization is to find W that minimizes the loss function. We will now motivate and slowly develop an approach to optimizing the loss function. 43 | -------------------------------------------------------------------------------- /notebooks/Reinventing-explanation.txt: -------------------------------------------------------------------------------- 1 | [source](https://michaelnielsen.org/reinventing_explanation/index.html) 2 | Media for thought. #intelligence #learning #problemsolving 3 | [[A dynamic problem solving medium]] 4 | 5 | Miachel Nielsen in his [[Reinventing-explanation]], says: 6 | 7 | > I believe it's worth taking non-traditional media seriously not just as a vehicle for popularization or education, which is how they are often viewed, but as an opportunity for explanations which can be, in important ways, deeper. Nielsen, [[Reinventing-explanation]], conclusion 8 | 9 | > This agglomeration of ideas has turned maps into a powerful _medium for thought_. Consider the famous map of the London Underground, excerpted here: 10 | 11 | > we internalize the map: the representations used in the map become the representations we use to think about the Underground. This is the sense in which this map is a medium for thought. 12 | 13 | > He's created a vocabulary of operations which can be used to understand and manipulate and, most crucially of all, _play_ with difference equations. And with enough exposure to this medium, we'd begin internalizing these operations: we'd begin to think about difference equations in this way. 14 | 15 | Nielsen's see this approach as the future of thinking about mathematics for example where mediums like these could expand how we think about mathematical objects. 16 | 17 | He now turns his attention to the design of a tool like this. 18 | > In the remainder of this essay I will focus on the design of a particular type of media for thought, namely, ***the design of media to _explain_ scientific ideas.*** 19 | 20 | Using non-traditional media to create deeper explanations of scientific ideas. 21 | 22 | > In fact, we don't yet have even the basic vocabulary of digital explanation. My instinct is that such a vocabulary will be developed in the decades to come. But that is far too big a goal to attack directly. Instead, we can make progress by constructing prototypes, and learning from what they have to tell us. That's what we'll do in this essay. Nielsen *"Reinventing Explanation"* 23 | 24 | It's not about making a concept popular for the "masses"... 25 | > (...)it's about understanding **the potential of non-traditional media for serious explanations**, the sort of explanations scientists use amongst themselves. So while it happens to be true that the explanations we'll discuss are accessible to a broad audience, what matters is that those explanations are, in some important ways, deeper than conventional verbal and symbolic explanations. 26 | 27 | Simpson's paradox: 28 | 29 | Some paradox where the overall probability favors one event as the local probabilities all individually favor the other event in a universe of just these two events. 30 | 31 | I could write about what it means to understand something from the perspective of these medium for thought tools. 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | Here we are talking about optimization. 41 | 42 | > To reiterate, the loss function lets us quantify the quality of any particular set of weights W. The goal of optimization is to find W that minimizes the loss function. We will now motivate and slowly develop an approach to optimizing the loss function. 43 | -------------------------------------------------------------------------------- /notebooks/Users/greatmaster/Desktop/projects/oreilly-live-trainings/oreilly_live_training_llm_apps/notebooks/example-notes-files/Invoice/note2.txt: -------------------------------------------------------------------------------- 1 | Certainly! Below is an example of a synthetic data invoice for the purchase of a large quantity of pancakes. 2 | 3 | --- 4 | 5 | **INVOICE** 6 | 7 | **Seller:** 8 | Fluffy Pancake Co. 9 | 123 Breakfast Blvd. 10 | Pancake City, PC 12345 11 | Phone: (555) 123-4567 12 | Email: sales@fluffypancakes.com 13 | 14 | **Buyer:** 15 | Giant Breakfast Warehouse 16 | 456 Meal St. 17 | Foodland, FL 67890 18 | Phone: (555) 987-6543 19 | Email: orders@giantbreakfast.com 20 | 21 | --- 22 | 23 | **Invoice Number:** 2023-INV-98765 24 | **Invoice Date:** October 22, 2023 25 | **Due Date:** November 5, 2023 26 | 27 | --- 28 | 29 | **Description of Goods:** 30 | 31 | | Item Description | Quantity | Unit Price ($) | Total ($) | 32 | |--------------------------------------|----------|----------------|-----------------| 33 | | Fluffy Classic Pancakes (Frozen) | 10,000 | 0.50 | 5,000.00 | 34 | | Strawberry Delight Pancakes (Frozen) | 5,000 | 0.70 | 3,500.00 | 35 | | Blueberry Bliss Pancakes (Frozen) | 5,000 | 0.75 | 3,750.00 | 36 | | Chocolate Chip Pancakes (Frozen) | 5,000 | 0.80 | 4,000.00 | 37 | 38 | --- 39 | 40 | **Subtotal:** $16,250.00 41 | **Sales Tax (8%):** $1,300.00 42 | **Shipping & Handling:** $200.00 43 | **Total Amount Due:** **$17,750.00** 44 | 45 | --- 46 | 47 | **Payment Instructions:** 48 | Please make the payment to the following account: 49 | Bank: Pancake National Bank 50 | Account Name: Fluffy Pancake Co. 51 | Account Number: 123456789 52 | Routing Number: 987654321 53 | 54 | --- 55 | 56 | **Notes:** 57 | Thank you for your order! Please allow 5-7 business days for processing and delivery. If you have any questions about this invoice, feel free to contact us. 58 | 59 | --- 60 | 61 | **End of Invoice** 62 | 63 | --- 64 | 65 | This example includes all necessary components of an invoice while creatively illustrating the purchase of a sizable quantity of pancakes. 66 | -------------------------------------------------------------------------------- /notebooks/Users/greatmaster/Desktop/projects/oreilly-live-trainings/oreilly_live_training_llm_apps/notebooks/example-notes-files/Invoices/note2.txt: -------------------------------------------------------------------------------- 1 | Certainly! Below is an example of a synthetic data invoice for the purchase of a large quantity of pancakes. 2 | 3 | --- 4 | 5 | **INVOICE** 6 | 7 | **Seller:** 8 | Fluffy Pancake Co. 9 | 123 Breakfast Blvd. 10 | Pancake City, PC 12345 11 | Phone: (555) 123-4567 12 | Email: sales@fluffypancakes.com 13 | 14 | **Buyer:** 15 | Giant Breakfast Warehouse 16 | 456 Meal St. 17 | Foodland, FL 67890 18 | Phone: (555) 987-6543 19 | Email: orders@giantbreakfast.com 20 | 21 | --- 22 | 23 | **Invoice Number:** 2023-INV-98765 24 | **Invoice Date:** October 22, 2023 25 | **Due Date:** November 5, 2023 26 | 27 | --- 28 | 29 | **Description of Goods:** 30 | 31 | | Item Description | Quantity | Unit Price ($) | Total ($) | 32 | |--------------------------------------|----------|----------------|-----------------| 33 | | Fluffy Classic Pancakes (Frozen) | 10,000 | 0.50 | 5,000.00 | 34 | | Strawberry Delight Pancakes (Frozen) | 5,000 | 0.70 | 3,500.00 | 35 | | Blueberry Bliss Pancakes (Frozen) | 5,000 | 0.75 | 3,750.00 | 36 | | Chocolate Chip Pancakes (Frozen) | 5,000 | 0.80 | 4,000.00 | 37 | 38 | --- 39 | 40 | **Subtotal:** $16,250.00 41 | **Sales Tax (8%):** $1,300.00 42 | **Shipping & Handling:** $200.00 43 | **Total Amount Due:** **$17,750.00** 44 | 45 | --- 46 | 47 | **Payment Instructions:** 48 | Please make the payment to the following account: 49 | Bank: Pancake National Bank 50 | Account Name: Fluffy Pancake Co. 51 | Account Number: 123456789 52 | Routing Number: 987654321 53 | 54 | --- 55 | 56 | **Notes:** 57 | Thank you for your order! Please allow 5-7 business days for processing and delivery. If you have any questions about this invoice, feel free to contact us. 58 | 59 | --- 60 | 61 | **End of Invoice** 62 | 63 | --- 64 | 65 | This example includes all necessary components of an invoice while creatively illustrating the purchase of a sizable quantity of pancakes. 66 | -------------------------------------------------------------------------------- /notebooks/Users/greatmaster/Desktop/projects/oreilly-live-trainings/oreilly_live_training_llm_apps/notebooks/example-notes-files/Personal Notes and Reminders/note1.txt: -------------------------------------------------------------------------------- 1 | **Personal Note with Reminders** 2 | 3 | --- 4 | 5 | **Date:** [Insert Date] 6 | 7 | **Dear [Your Name],** 8 | 9 | Here are some important reminders and notes for the upcoming days. Stay organized and make time for what matters! 10 | 11 | --- 12 | 13 | **To-Do List:** 14 | 15 | 1. **Work Tasks:** 16 | - [ ] Submit project report by **Friday at 5 PM** 17 | - [ ] Prepare for team meeting on **Monday** (notes and presentation) 18 | - [ ] Follow up with [Colleague's Name] regarding [specific issue] 19 | 20 | 2. **Personal Errands:** 21 | - [ ] Grocery shopping on **Saturday morning** 22 | - [ ] Pick up dry cleaning on **Saturday afternoon** 23 | - [ ] Schedule dentist appointment for [Date] 24 | 25 | 3. **Family & Friends:** 26 | - [ ] Call Mom on **Sunday** to check in 27 | - [ ] Plan movie night with [Friend's Name] for **next week** 28 | - [ ] Send birthday card to [Friend/Family Member] by [Date] 29 | 30 | 4. **Self-Care:** 31 | - [ ] Exercise at least **3 times** this week 32 | - [ ] Set aside **30 minutes** for reading daily 33 | - [ ] Try the new recipe for dinner on **Thursday** 34 | 35 | 5. **Upcoming Events:** 36 | - **[Event 1]**: Date, time, and location 37 | - **[Event 2]**: Date, time, and location 38 | - **[Special Occasion/Reminder]**: Date and what to prepare 39 | 40 | --- 41 | 42 | **Motivational Note:** 43 | “Remember to take a deep breath—progress, not perfection. You're doing great!” 44 | 45 | --- 46 | 47 | **Ideas to Explore:** 48 | - [ ] Take a pottery class 49 | - [ ] Start a new book series 50 | - [ ] Plan a weekend getaway for [Month] 51 | 52 | --- 53 | 54 | **End of Note** 55 | 56 | Stay focused, and don’t forget to celebrate your small victories! 57 | 58 | --- 59 | 60 | Feel free to customize this note as needed! 61 | -------------------------------------------------------------------------------- /notebooks/Users/greatmaster/Desktop/projects/oreilly-live-trainings/oreilly_live_training_llm_apps/notebooks/example-notes-files/Personal Notes/Reminders/note1.txt: -------------------------------------------------------------------------------- 1 | **Personal Note with Reminders** 2 | 3 | --- 4 | 5 | **Date:** [Insert Date] 6 | 7 | **Dear [Your Name],** 8 | 9 | Here are some important reminders and notes for the upcoming days. Stay organized and make time for what matters! 10 | 11 | --- 12 | 13 | **To-Do List:** 14 | 15 | 1. **Work Tasks:** 16 | - [ ] Submit project report by **Friday at 5 PM** 17 | - [ ] Prepare for team meeting on **Monday** (notes and presentation) 18 | - [ ] Follow up with [Colleague's Name] regarding [specific issue] 19 | 20 | 2. **Personal Errands:** 21 | - [ ] Grocery shopping on **Saturday morning** 22 | - [ ] Pick up dry cleaning on **Saturday afternoon** 23 | - [ ] Schedule dentist appointment for [Date] 24 | 25 | 3. **Family & Friends:** 26 | - [ ] Call Mom on **Sunday** to check in 27 | - [ ] Plan movie night with [Friend's Name] for **next week** 28 | - [ ] Send birthday card to [Friend/Family Member] by [Date] 29 | 30 | 4. **Self-Care:** 31 | - [ ] Exercise at least **3 times** this week 32 | - [ ] Set aside **30 minutes** for reading daily 33 | - [ ] Try the new recipe for dinner on **Thursday** 34 | 35 | 5. **Upcoming Events:** 36 | - **[Event 1]**: Date, time, and location 37 | - **[Event 2]**: Date, time, and location 38 | - **[Special Occasion/Reminder]**: Date and what to prepare 39 | 40 | --- 41 | 42 | **Motivational Note:** 43 | “Remember to take a deep breath—progress, not perfection. You're doing great!” 44 | 45 | --- 46 | 47 | **Ideas to Explore:** 48 | - [ ] Take a pottery class 49 | - [ ] Start a new book series 50 | - [ ] Plan a weekend getaway for [Month] 51 | 52 | --- 53 | 54 | **End of Note** 55 | 56 | Stay focused, and don’t forget to celebrate your small victories! 57 | 58 | --- 59 | 60 | Feel free to customize this note as needed! 61 | -------------------------------------------------------------------------------- /notebooks/Users/greatmaster/Desktop/projects/oreilly-live-trainings/oreilly_live_training_llm_apps/notebooks/example-notes-files/Research Paper/paper-pancakes.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/Users/greatmaster/Desktop/projects/oreilly-live-trainings/oreilly_live_training_llm_apps/notebooks/example-notes-files/Research Paper/paper-pancakes.pdf -------------------------------------------------------------------------------- /notebooks/__pycache__/quiz_templates.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/__pycache__/quiz_templates.cpython-311.pyc -------------------------------------------------------------------------------- /notebooks/assets-resources/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/.DS_Store -------------------------------------------------------------------------------- /notebooks/assets-resources/agent_loop.svg: -------------------------------------------------------------------------------- 1 |
User
User
Action/Tools
Action/Tools
LLM
LLM
Observation
Observation
Output
Output
Text is not SVG - cannot display
-------------------------------------------------------------------------------- /notebooks/assets-resources/attention-architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/attention-architecture.png -------------------------------------------------------------------------------- /notebooks/assets-resources/attention-paper.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/attention-paper.pdf -------------------------------------------------------------------------------- /notebooks/assets-resources/backend_llm_app.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/backend_llm_app.png -------------------------------------------------------------------------------- /notebooks/assets-resources/bear.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/bear.png -------------------------------------------------------------------------------- /notebooks/assets-resources/cat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/cat.png -------------------------------------------------------------------------------- /notebooks/assets-resources/chatgpt-demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/chatgpt-demo.png -------------------------------------------------------------------------------- /notebooks/assets-resources/components_input_output.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/components_input_output.png -------------------------------------------------------------------------------- /notebooks/assets-resources/embeddings-similarity.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/embeddings-similarity.png -------------------------------------------------------------------------------- /notebooks/assets-resources/embeddings-similarity2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/embeddings-similarity2.png -------------------------------------------------------------------------------- /notebooks/assets-resources/embeddings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/embeddings.png -------------------------------------------------------------------------------- /notebooks/assets-resources/fine-tune-response-planning.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/fine-tune-response-planning.png -------------------------------------------------------------------------------- /notebooks/assets-resources/fine-tuning-slides/fine-tuning-slides.001.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/fine-tuning-slides/fine-tuning-slides.001.jpeg -------------------------------------------------------------------------------- /notebooks/assets-resources/fine-tuning-slides/fine-tuning-slides.002.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/fine-tuning-slides/fine-tuning-slides.002.jpeg -------------------------------------------------------------------------------- /notebooks/assets-resources/fine-tuning-slides/fine-tuning-slides.003.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/fine-tuning-slides/fine-tuning-slides.003.jpeg -------------------------------------------------------------------------------- /notebooks/assets-resources/fine-tuning-slides/fine-tuning-slides.004.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/fine-tuning-slides/fine-tuning-slides.004.jpeg -------------------------------------------------------------------------------- /notebooks/assets-resources/fine-tuning-slides/fine-tuning-slides.005.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/fine-tuning-slides/fine-tuning-slides.005.jpeg -------------------------------------------------------------------------------- /notebooks/assets-resources/fine-tuning-slides/fine-tuning-slides.006.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/fine-tuning-slides/fine-tuning-slides.006.jpeg -------------------------------------------------------------------------------- /notebooks/assets-resources/fine-tuning-slides/fine-tuning-slides.007.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/fine-tuning-slides/fine-tuning-slides.007.jpeg -------------------------------------------------------------------------------- /notebooks/assets-resources/fine-tuning-slides/fine-tuning-slides.008.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/fine-tuning-slides/fine-tuning-slides.008.jpeg -------------------------------------------------------------------------------- /notebooks/assets-resources/fine-tuning-slides/fine-tuning-slides.009.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/fine-tuning-slides/fine-tuning-slides.009.jpeg -------------------------------------------------------------------------------- /notebooks/assets-resources/fine-tuning-slides/fine-tuning-slides.010.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/fine-tuning-slides/fine-tuning-slides.010.jpeg -------------------------------------------------------------------------------- /notebooks/assets-resources/fine-tuning-slides/fine-tuning-slides.011.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/fine-tuning-slides/fine-tuning-slides.011.jpeg -------------------------------------------------------------------------------- /notebooks/assets-resources/fine-tuning-slides/fine-tuning-slides.012.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/fine-tuning-slides/fine-tuning-slides.012.jpeg -------------------------------------------------------------------------------- /notebooks/assets-resources/fine-tuning-slides/fine-tuning-slides.013.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/fine-tuning-slides/fine-tuning-slides.013.jpeg -------------------------------------------------------------------------------- /notebooks/assets-resources/fine-tuning-slides/fine-tuning-slides.014.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/fine-tuning-slides/fine-tuning-slides.014.jpeg -------------------------------------------------------------------------------- /notebooks/assets-resources/fine-tuning-slides/fine-tuning-slides.015.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/fine-tuning-slides/fine-tuning-slides.015.jpeg -------------------------------------------------------------------------------- /notebooks/assets-resources/fine-tuning-slides/fine-tuning-slides.016.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/fine-tuning-slides/fine-tuning-slides.016.jpeg -------------------------------------------------------------------------------- /notebooks/assets-resources/fine-tuning-slides/fine-tuning-slides.017.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/fine-tuning-slides/fine-tuning-slides.017.jpeg -------------------------------------------------------------------------------- /notebooks/assets-resources/fine-tuning-slides/fine-tuning-slides.018.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/fine-tuning-slides/fine-tuning-slides.018.jpeg -------------------------------------------------------------------------------- /notebooks/assets-resources/fine-tuning-slides/fine-tuning-slides.019.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/fine-tuning-slides/fine-tuning-slides.019.jpeg -------------------------------------------------------------------------------- /notebooks/assets-resources/fine-tuning-slides/fine-tuning-slides.020.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/fine-tuning-slides/fine-tuning-slides.020.jpeg -------------------------------------------------------------------------------- /notebooks/assets-resources/fine-tuning-slides/fine-tuning-slides.021.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/fine-tuning-slides/fine-tuning-slides.021.jpeg -------------------------------------------------------------------------------- /notebooks/assets-resources/fine-tuning-slides/fine-tuning-slides.022.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/fine-tuning-slides/fine-tuning-slides.022.jpeg -------------------------------------------------------------------------------- /notebooks/assets-resources/fine-tuning-slides/fine-tuning-slides.023.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/fine-tuning-slides/fine-tuning-slides.023.jpeg -------------------------------------------------------------------------------- /notebooks/assets-resources/fine-tuning-slides/fine-tuning-slides.024.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/fine-tuning-slides/fine-tuning-slides.024.jpeg -------------------------------------------------------------------------------- /notebooks/assets-resources/fine-tuning-slides/fine-tuning-slides.025.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/fine-tuning-slides/fine-tuning-slides.025.jpeg -------------------------------------------------------------------------------- /notebooks/assets-resources/fine-tuning-slides/fine-tuning-slides.026.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/fine-tuning-slides/fine-tuning-slides.026.jpeg -------------------------------------------------------------------------------- /notebooks/assets-resources/fine_tune_model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/fine_tune_model.png -------------------------------------------------------------------------------- /notebooks/assets-resources/frontend_backend.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/frontend_backend.png -------------------------------------------------------------------------------- /notebooks/assets-resources/frontend_llm_app.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/frontend_llm_app.png -------------------------------------------------------------------------------- /notebooks/assets-resources/google-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/google-icon.png -------------------------------------------------------------------------------- /notebooks/assets-resources/google-translate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/google-translate.png -------------------------------------------------------------------------------- /notebooks/assets-resources/lcel-image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/lcel-image.png -------------------------------------------------------------------------------- /notebooks/assets-resources/llm_paper_know_dont_know.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/llm_paper_know_dont_know.pdf -------------------------------------------------------------------------------- /notebooks/assets-resources/llm_paper_know_dont_know.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/llm_paper_know_dont_know.pkl -------------------------------------------------------------------------------- /notebooks/assets-resources/llm_predicts_pancakes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/llm_predicts_pancakes.png -------------------------------------------------------------------------------- /notebooks/assets-resources/loading.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/loading.gif -------------------------------------------------------------------------------- /notebooks/assets-resources/neural_net.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/neural_net.png -------------------------------------------------------------------------------- /notebooks/assets-resources/openai-function-calling-diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/openai-function-calling-diagram.png -------------------------------------------------------------------------------- /notebooks/assets-resources/owl-customer-support.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/owl-customer-support.png -------------------------------------------------------------------------------- /notebooks/assets-resources/owl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/owl.png -------------------------------------------------------------------------------- /notebooks/assets-resources/pancakes-delicious.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/pancakes-delicious.jpg -------------------------------------------------------------------------------- /notebooks/assets-resources/panda-letters-app-demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/panda-letters-app-demo.png -------------------------------------------------------------------------------- /notebooks/assets-resources/panda_letter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/panda_letter.png -------------------------------------------------------------------------------- /notebooks/assets-resources/prompt-basics/prompt-basics.001.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/prompt-basics/prompt-basics.001.jpeg -------------------------------------------------------------------------------- /notebooks/assets-resources/prompt-basics/prompt-basics.002.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/prompt-basics/prompt-basics.002.jpeg -------------------------------------------------------------------------------- /notebooks/assets-resources/prompt-basics/prompt-basics.003.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/prompt-basics/prompt-basics.003.jpeg -------------------------------------------------------------------------------- /notebooks/assets-resources/prompt-basics/prompt-basics.004.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/prompt-basics/prompt-basics.004.jpeg -------------------------------------------------------------------------------- /notebooks/assets-resources/prompt-basics/prompt-basics.005.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/prompt-basics/prompt-basics.005.jpeg -------------------------------------------------------------------------------- /notebooks/assets-resources/prompt-basics/prompt-basics.006.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/prompt-basics/prompt-basics.006.jpeg -------------------------------------------------------------------------------- /notebooks/assets-resources/prompt-basics/prompt-basics.007.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/prompt-basics/prompt-basics.007.jpeg -------------------------------------------------------------------------------- /notebooks/assets-resources/prompt-basics/prompt-basics.008.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/prompt-basics/prompt-basics.008.jpeg -------------------------------------------------------------------------------- /notebooks/assets-resources/prompt-constrains-space.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/prompt-constrains-space.png -------------------------------------------------------------------------------- /notebooks/assets-resources/prompt-strategy-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/prompt-strategy-2.png -------------------------------------------------------------------------------- /notebooks/assets-resources/prompt-strategy3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/prompt-strategy3.png -------------------------------------------------------------------------------- /notebooks/assets-resources/prompt-strategy4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/prompt-strategy4.png -------------------------------------------------------------------------------- /notebooks/assets-resources/prompt-strategy5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/prompt-strategy5.png -------------------------------------------------------------------------------- /notebooks/assets-resources/prompt-strategy6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/prompt-strategy6.png -------------------------------------------------------------------------------- /notebooks/assets-resources/rag-docs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/rag-docs.png -------------------------------------------------------------------------------- /notebooks/assets-resources/rag-langchain-retrieval.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/rag-langchain-retrieval.png -------------------------------------------------------------------------------- /notebooks/assets-resources/rag-langchain.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/rag-langchain.png -------------------------------------------------------------------------------- /notebooks/assets-resources/sheep.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/sheep.png -------------------------------------------------------------------------------- /notebooks/assets-resources/transformer-process.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/transformer-process.png -------------------------------------------------------------------------------- /notebooks/assets-resources/transformers-architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/transformers-architecture.png -------------------------------------------------------------------------------- /notebooks/assets-resources/unigram.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/unigram.jpg -------------------------------------------------------------------------------- /notebooks/assets-resources/vectordb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/vectordb.png -------------------------------------------------------------------------------- /notebooks/assets-resources/what-is-langchain.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/assets-resources/what-is-langchain.png -------------------------------------------------------------------------------- /notebooks/chunk-size-report.md: -------------------------------------------------------------------------------- 1 | Guide to Optimal Chunk Size for RAG Applications 2 | 3 | # Introduction 4 | When developing Retrieval-Augmented Generation (RAG) systems with large language models (LLMs), determining the right chunk size is a crucial balance between retrieval accuracy, computational efficiency, and response quality. This guide provides an overview of chunking principles, strategies, and factors influencing chunk size selection to optimize the performance of RAG systems. 5 | 6 | Chunking in RAG Systems Chunking involves dividing documents into smaller, manageable pieces ("chunks") for processing and retrieval. Effective chunking helps enhance the relevance of retrieved information, ensuring that LLMs generate coherent, contextually accurate responses. Selecting the right chunk size ensures that chunks are both informative and computationally efficient, maintaining semantic integrity. 7 | 8 | # Strategies for Determining Optimal Chunk Size 9 | 10 | [Fixed-Size Chunking/Naive Chunking](https://ar5iv.labs.arxiv.org/html/2401.07883): Splitting text by a set word or token count is straightforward but may disrupt semantic units, leading to fragmented or less coherent responses. 11 | 12 | [Semantic Chunking](https://arc.net/l/quote/erghdulb): Using natural language processing tools to divide the text at logical boundaries preserves meaning and helps the LLM produce more coherent outputs. 13 | 14 | [Adaptive Chunking](https://vectify.ai/blog/LargeDocumentSummarization?utm_source=substack&utm_medium=email): Adjusting chunk size based on the document’s length and structure balances consistency across chunks, optimizing both retrieval and generation. 15 | # Factors Influencing Chunk Size Selection 16 | 17 | LLM Context Window: LLMs are limited by a fixed context window size, such as 4k or 8k tokens. To use this full context effectively without truncation, chunks must be small enough to fit within the model's window. 18 | 19 | Embedding Model Constraints: Embedding models also have context length limitations. Chunks must fit these constraints to ensure accurate similarity searches during retrieval. 20 | 21 | Document Structure and Content: The coherence of the text is critical. Chunks should ideally align with natural boundaries—such as sentences or paragraphs—to maintain the meaning and coherence of the original document. 22 | 23 | Retrieval Precision vs. Recall: Smaller chunks can improve precision, providing highly relevant segments, but may require more retrievals to complete a response, affecting recall. Larger chunks may improve recall but risk introducing less relevant information. 24 | 25 | Overlap Between Chunks: Introducing overlaps between chunks helps maintain context at the boundaries, which is essential when the context crosses chunk borders, thereby improving retrieval accuracy. 26 | 27 | Computational Efficiency: Smaller chunks lead to increased retrieval operations and embedding computations, which adds to the computational cost. Balancing chunk size to reduce the retrieval load while ensuring good response quality is essential. 28 | 29 | Domain-Specific Requirements: Technical documents might benefit from larger chunks (e.g., entire sections) for completeness, while conversational data might require more granular, smaller chunks for precision. 30 | 31 | # A Rough Intuitive Guide for Finding Optimal Chunk Size 32 | 33 | 1. [Consider task’s property](https://arc.net/l/quote/tpizvina): Different tasks may benefit from different kinds of retrieval chunks. For example, question-answer tasks may prefer short phrases, while summarization tasks may prefer long documents. 34 | 2. [Encoder’s property. Different encoder models have varying encoding capabilities on texts with different lengths. For example, models in the sentence-transformer (Reimers and Gurevych, 2019) behave better on a single sentence, while the text-embedding-ada-002 (OpenAI, 2022) is good at longer texts.](https://arc.net/l/quote/spmjsukq) 35 | 1. In general we'll use [llm-based encoders](https://arc.net/l/quote/fznujufc) 36 | 3. [(3) Query’s property. The length of the user’s queries should be aligned with the chunking size, which implicitly aligns the amount of contextual information in chunks with that in queries, thus improving the relevance between queries and retrievals. For example, a retrieval database built on short phrases may be useless for queries with long documents.](https://arc.net/l/quote/wymtpkmo) 37 | 4. Experimentation: Experiment with different chunk sizes to identify what works best for your specific application and dataset. Tools like LlamaIndex can assist in assessing chunking strategies. 38 | 5. Monitor Metrics: Regularly evaluate metrics like retrieval accuracy, response coherence, and computational efficiency. Adjust your chunking strategy as required to improve performance. 39 | 6. Context Window Limits: Ensure the total tokens (retrieved text plus prompt) fit within the LLM’s context window. This allows the LLM to process multiple chunks simultaneously, enhancing contextual richness. 40 | 7. Semantic Completeness: Make chunks self-contained units of meaning—ideally whole paragraphs or logical segments—to help maintain semantic integrity during retrieval and generation. 41 | 8. Information Density: Balance information density and conciseness. Chunks should have enough information to convey a complete idea but remain manageable for the model’s context window. A typical range is 100-300 tokens, depending on the data type. 42 | 43 | 44 | # Resources 45 | - https://zilliz.com/blog/exploring-rag-chunking-llms-and-evaluations 46 | - [Optimal chunk size for Large Document Summarization](https://vectify.ai/blog/LargeDocumentSummarization?utm_source=substack&utm_medium=email) 47 | - https://www.llamaindex.ai/blog/evaluating-the-ideal-chunk-size-for-a-rag-system-using-llamaindex-6207e5d3fec5 48 | - https://antematter.io/blogs/optimizing-rag-advanced-chunking-techniques-study 49 | - https://www.analyticsvidhya.com/blog/2024/10/chunking-techniques-to-build-exceptional-rag-systems/ 50 | - https://www.databricks.com/blog/long-context-rag-performance-llms 51 | - https://www.datacamp.com/tutorial/how-to-improve-rag-performance-5-key-techniques-with-examples 52 | - https://chatgen.ai/blog/the-ultimate-guide-on-chunking-strategies-rag-part-3/ 53 | - https://developer.ibm.com/articles/awb-retrieval-augmented-generation-rag-with-llms-from-watsonx 54 | - https://docs.mistral.ai/guides/rag/ 55 | - https://www.rungalileo.io/blog/mastering-rag-advanced-chunking-techniques-for-llm-applications 56 | - https://unstructured.io/blog/chunking-for-rag-best-practices 57 | - https://mallahyari.github.io/rag-ebook/04_advanced_rag.html 58 | - https://learn.microsoft.com/en-us/azure/architecture/ai-ml/guide/rag/rag-chunking-phase 59 | - [RAG Survey](https://ar5iv.labs.arxiv.org/html/2407.13193) 60 | - [RAG for financial documents and recursive chunking strategies](https://ar5iv.labs.arxiv.org/html/2404.07221) 61 | - [Mix-of-Granularity: Optimize the Chunking Granularity for Retrieval-Augmented Generation]() 62 | - [[NAIVE RAG Overview]] 63 | -------------------------------------------------------------------------------- /notebooks/customer-tickets/ticket1.txt: -------------------------------------------------------------------------------- 1 | Ticket 1: 2 | I am experiencing frequent disconnections with my internet service. 3 | This occurs multiple times a day, and it's becoming increasingly frustrating as I rely on a stable connection for remote work. -------------------------------------------------------------------------------- /notebooks/customer-tickets/ticket2.txt: -------------------------------------------------------------------------------- 1 | Ticket 2: 2 | I received the wrong item in my recent order, and I would like to initiate a return. The item I was sent does not match the one I ordered, and I have attached a photo for reference. -------------------------------------------------------------------------------- /notebooks/customer-tickets/ticket3.txt: -------------------------------------------------------------------------------- 1 | Ticket 3: 2 | I am unable to access my account after attempting to reset my password. Each time I follow the reset instructions, I receive an error message that says my email is not recognized in your system. -------------------------------------------------------------------------------- /notebooks/customer-tickets/ticket4.txt: -------------------------------------------------------------------------------- 1 | Ticket 4: 2 | My subscription to your service has expired, but I was never notified about it. I want to reactivate my account and ensure that I do not lose any of my saved data during the process. -------------------------------------------------------------------------------- /notebooks/customer-tickets/ticket5.txt: -------------------------------------------------------------------------------- 1 | Ticket 5: 2 | The app continuously crashes whenever I try to open it on my smartphone. I've tried uninstalling and reinstalling, but the problem persists, making it impossible for me to use the service. -------------------------------------------------------------------------------- /notebooks/dataset.jsonl: -------------------------------------------------------------------------------- 1 | {"messages": [{"role": "system", "content": "You are a helpful planning assistant"}, {"role": "user", "content": "- I plan to finish writing the first draft of my novel by the end of next week. I will dedicate at least two hours every day to writing and revising to ensure that I stay on track and meet my goal."}, {"role": "assistant", "content": "- Set a specific daily writing schedule, allocating at least two hours each day for writing and revising.\n- Break down the novel into manageable sections or chapters to work on each day.\n- Create a checklist of tasks for each writing session, such as outlining, drafting, revising, and editing.\n- Set specific milestones for completing each section of the novel to track progress.\n- Eliminate distractions during writing time to maximize productivity.\n- Seek feedback from trusted individuals to ensure the quality and coherence of the novel.\n- Allocate time for rest and relaxation to avoid burnout and maintain creativity."}]} 2 | {"messages": [{"role": "system", "content": "You are a helpful planning assistant"}, {"role": "user", "content": "- My goal for the upcoming weeks is to complete the online course I enrolled in and earn a certificate in digital marketing. I will allocate time each day to watch the lectures, complete the assignments, and participate in the discussion forums to maximize my learning."}, {"role": "assistant", "content": "- Allocate specific time each day to watch the lectures\n- Complete the assignments on time\n- Actively participate in the discussion forums\n- Review and revise the course material regularly\n- Seek additional resources or help if needed\n- Stay organized and focused on the course material\n- Prepare for any assessments or exams\n- Stay updated with the latest trends and developments in digital marketing"}]} 3 | {"messages": [{"role": "system", "content": "You are a helpful planning assistant"}, {"role": "user", "content": "- I aim to improve my physical fitness by incorporating a new workout routine into my schedule. I will focus on strength training three times a week and incorporate yoga and stretching exercises to improve flexibility and reduce stress."}, {"role": "assistant", "content": "- Research and choose a suitable strength training routine\n- Schedule three strength training sessions per week\n- Research and choose a suitable yoga routine\n- Schedule yoga sessions to be incorporated into weekly routine\n- Research and choose stretching exercises to improve flexibility\n- Schedule stretching exercises to be incorporated into weekly routine\n- Set specific goals for physical fitness improvement\n- Monitor progress and adjust routine as needed to achieve goals"}]} 4 | {"messages": [{"role": "system", "content": "You are a helpful planning assistant"}, {"role": "user", "content": "- I have set a goal to declutter and organize my entire home over the next few weeks. I will tackle one room at a time, sorting through items, donating or discarding what I no longer need, and creating a more functional and peaceful living space."}, {"role": "assistant", "content": "- Assess the entire home and create a plan to tackle one room at a time\n- Gather necessary organizing supplies such as bins, baskets, and labels\n- Sort through items in each room, deciding what to keep, donate, or discard\n- Clean and organize the items that are being kept\n- Donate or discard items that are no longer needed\n- Create a system for maintaining organization in each room\n- Enjoy the functional and peaceful living space created by the decluttering and organizing efforts"}]} 5 | {"messages": [{"role": "system", "content": "You are a helpful planning assistant"}, {"role": "user", "content": "- My task for the upcoming weeks is to research and plan a budget-friendly vacation for my family. I will compare travel options, accommodations, and activities to ensure that we have a memorable and affordable trip."}, {"role": "assistant", "content": "- Research travel options, including flights, train, and car rental\n- Compare accommodations such as hotels, vacation rentals, and hostels\n- Research and plan activities and attractions for the vacation destination\n- Create a budget spreadsheet to track expenses and ensure affordability\n- Consider cost-saving measures such as booking in advance or using travel rewards\n- Consult with family members to gather input and preferences for the vacation\n- Make reservations for transportation, accommodations, and activities\n- Continuously review and adjust the plan to ensure it remains budget-friendly"}]} 6 | {"messages": [{"role": "system", "content": "You are a helpful planning assistant"}, {"role": "user", "content": "- I am committed to learning a new language, and my goal for the upcoming weeks is to practice speaking and listening for at least 30 minutes each day. I will use language learning apps, watch foreign films, and engage in conversations with native speakers to improve my skills."}, {"role": "assistant", "content": "- Use language learning apps to practice speaking and listening for at least 30 minutes each day.\n- Watch foreign films to improve listening skills and exposure to the language.\n- Engage in conversations with native speakers to practice speaking and listening in real-life situations."}]} 7 | {"messages": [{"role": "system", "content": "You are a helpful planning assistant"}, {"role": "user", "content": "- I plan to enhance my culinary skills by trying out new recipes and cooking techniques. I will set aside time each week to experiment with different cuisines and expand my cooking repertoire."}, {"role": "assistant", "content": "- Research and collect new recipes and cooking techniques from various sources such as cookbooks, cooking websites, and cooking shows.\n- Set aside a specific day and time each week dedicated to experimenting with new recipes and cooking techniques.\n- Create a list of different cuisines and dishes to try, ensuring a diverse range of cooking experiences.\n- Gather necessary ingredients and kitchen tools for each new recipe or cooking technique.\n- Document the results of each cooking experiment, noting successes and areas for improvement.\n- Continuously seek feedback from friends and family to improve culinary skills.\n- Reflect on the progress and adjust the plan as needed to expand the cooking repertoire."}]} 8 | {"messages": [{"role": "system", "content": "You are a helpful planning assistant"}, {"role": "user", "content": "- My goal for the upcoming weeks is to establish a daily meditation practice to promote mindfulness and reduce stress. I will start with short sessions and gradually increase the duration to incorporate meditation into my daily routine."}, {"role": "assistant", "content": "- Set a specific time each day for meditation\n- Start with short 5-10 minute meditation sessions\n- Find a quiet and comfortable space for meditation\n- Use guided meditation apps or videos to help with the practice\n- Gradually increase the duration of meditation sessions\n- Keep a journal to track progress and experiences with meditation\n- Stay consistent and committed to the daily practice"}]} 9 | {"messages": [{"role": "system", "content": "You are a helpful planning assistant"}, {"role": "user", "content": "- I aim to improve my time management skills by creating a detailed schedule for each day and prioritizing tasks based on their importance and deadlines. I will use time-tracking tools to monitor my progress and make adjustments as needed."}, {"role": "assistant", "content": "- Create a detailed schedule for each day\n- Prioritize tasks based on importance and deadlines\n- Use time-tracking tools to monitor progress\n- Make adjustments to the schedule as needed\n- Set specific goals for improving time management skills"}]} 10 | {"messages": [{"role": "system", "content": "You are a helpful planning assistant"}, {"role": "user", "content": "- I have set a goal to read at least two books per month, and I will dedicate time each day to immerse myself in literature and expand my knowledge and imagination."}, {"role": "assistant", "content": "- Schedule dedicated time each day for reading\n- Create a list of books to read for the month\n- Set aside time for book selection and research\n- Track progress and adjust reading schedule as needed\n- Find a comfortable and quiet reading space\n- Take notes and reflect on the books read\n- Explore different genres and authors to expand knowledge and imagination"}]} 11 | {"messages": [{"role": "system", "content": "You are a helpful planning assistant"}, {"role": "user", "content": "- My task for the upcoming weeks is to develop a personal finance plan to save money and achieve my financial goals. I will review my expenses, create a budget, and explore investment opportunities to secure my financial future."}, {"role": "assistant", "content": "- Review expenses\n- Create a budget\n- Explore investment opportunities\n- Develop a personal finance plan\n- Save money\n- Achieve financial goals\n- Secure financial future"}]} 12 | {"messages": [{"role": "system", "content": "You are a helpful planning assistant"}, {"role": "user", "content": "- I plan to enhance my professional skills by enrolling in an online course related to my field of work. I will dedicate time each day to study the course materials and apply the new knowledge to my job."}, {"role": "assistant", "content": "- Research and select a relevant online course related to my field of work\n- Enroll in the chosen online course\n- Set aside dedicated time each day to study the course materials\n- Take notes and actively engage with the course content\n- Apply the new knowledge gained from the course to my job tasks and projects"}]} 13 | {"messages": [{"role": "system", "content": "You are a helpful planning assistant"}, {"role": "user", "content": "- My goal for the upcoming weeks is to improve my networking skills by reaching out to industry professionals and attending virtual networking events. I will set specific goals for each interaction and follow up with contacts to build meaningful connections."}, {"role": "assistant", "content": "- Research industry professionals and identify potential contacts\n- Reach out to industry professionals via email or LinkedIn\n- Set specific goals for each networking interaction, such as learning about a specific aspect of the industry or gaining insights into a particular company\n- Attend virtual networking events and actively participate in discussions\n- Follow up with contacts after networking interactions to express gratitude and continue building the relationship\n- Keep track of networking activities and connections made for future reference and follow-up"}]} 14 | {"messages": [{"role": "system", "content": "You are a helpful planning assistant"}, {"role": "user", "content": "- I aim to prioritize self-care by incorporating regular exercise, healthy eating, and relaxation techniques into my daily routine. I will create a self-care plan and commit to practicing self-care activities consistently."}, {"role": "assistant", "content": "- Create a self-care plan\n- Schedule regular exercise sessions\n- Plan and prepare healthy meals\n- Incorporate relaxation techniques into daily routine\n- Commit to practicing self-care activities consistently"}]} 15 | {"messages": [{"role": "system", "content": "You are a helpful planning assistant"}, {"role": "user", "content": "- I have set a goal to volunteer at a local charity organization and contribute to meaningful causes in my community. I will research volunteer opportunities and dedicate my time and skills to make a positive impact."}, {"role": "assistant", "content": "- Research local charity organizations and their volunteer opportunities\n- Contact the chosen charity organization to inquire about their volunteer needs and application process\n- Schedule a time to volunteer and commit to the agreed-upon schedule\n- Prepare necessary skills or training required for the volunteer work\n- Dedicate time and effort to contribute to the meaningful causes of the charity organization\n- Reflect on the impact made and consider further involvement or support"}]} 16 | -------------------------------------------------------------------------------- /notebooks/demo-apps.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | 3 | st.set_page_config( 4 | page_title="Educational Demo Apps", 5 | page_icon="🎓", 6 | layout="wide" 7 | ) 8 | 9 | st.title("🎓 Educational Demo Applications") 10 | st.sidebar.success("Select a demo from the sidebar above.") 11 | 12 | st.markdown(""" 13 | ## Welcome to the AI-Powered Educational Tools Collection! 14 | 15 | This collection showcases various applications of AI in education and content management. 16 | 17 | ### Available Demos: 18 | 19 | #### 📊 Quiz App 20 | - AI-powered quiz generation from any content 21 | - Multiple-choice question format 22 | - Score tracking and performance visualization 23 | - Export results to CSV 24 | 25 | #### 🌎 Translation Hub 26 | - Multi-language translation capabilities 27 | - Support for 6 different languages 28 | - Parallel translation viewing 29 | - Professional-grade translations using AI 30 | 31 | #### 📚 PDF Summarizer 32 | - Multi-level text summarization 33 | - Various summary styles (Narrative, Bullet Points, etc.) 34 | - Interactive length adjustment 35 | - Summary statistics and visualization 36 | - PDF document support 37 | 38 | #### 💬 Q&A System 39 | - Interactive question-answering 40 | - Context-aware responses 41 | - Document-based knowledge base 42 | - Customer support simulation 43 | 44 | ### Getting Started 45 | 1. Select any demo from the sidebar menu 👈 46 | 2. Each app will require an OpenAI API key 47 | 3. Follow the in-app instructions for specific features 48 | 49 | ### Note 50 | These demos are educational tools designed to showcase the capabilities of AI in various educational contexts. They're built using Streamlit and powered by OpenAI's language models. 51 | """) 52 | 53 | # Add a footer with additional information 54 | st.markdown("---") 55 | st.markdown(""" 56 |
57 | Built with Streamlit and OpenAI • For Educational Purposes 58 |
59 | """, unsafe_allow_html=True) -------------------------------------------------------------------------------- /notebooks/docs/james.txt: -------------------------------------------------------------------------------- 1 | James is a tall, athletic man in his early thirties, with a cheerful disposition 2 | that lights up the room. Born and raised in the bustling city of San Francisco, 3 | he made the unlikely transition to rural life when he adopted an energetic Border Collie named Laddie. 4 | The duo is often spotted trekking the hilly terrain, with James capturing the beauty of nature through his lens. 5 | James is a professional wildlife photographer, and he considers Laddie not just his pet, but his partner. The collaboration of this man and his dog have led to some of the most breathtaking shots that perfectly encapsulate the spirit of the wilderness. -------------------------------------------------------------------------------- /notebooks/docs/maria.txt: -------------------------------------------------------------------------------- 1 | Maria is a confident, passionate woman in her forties 2 | with a fiery spirit that is mirrored by her Red Siberian Husky, Blaze. Originally from the chilly landscapes of Alaska, Maria found her calling as a professional musher. With Blaze leading her team, Maria competes in numerous sled races each year. Their formidable partnership has not only earned them several championships but also led to a unique bond that transcends beyond the icy trails and echoes in their shared love for the Alaskan wilderness. -------------------------------------------------------------------------------- /notebooks/docs/robert.txt: -------------------------------------------------------------------------------- 1 | Robert, an intellectual man in his sixties, possesses a calm aura and a curiosity that keeps him forever youthful. A retired professor from Boston, Robert found a new companion in his old age - a wise-looking Basset Hound named Aristotle. Robert spends his time penning philosophy books, with Aristotle always snoozing by his feet. Their peaceful existence is a perfect blend of companionship and intellectual pursuit, where every day is a philosophical conversation with his silent, yet understanding friend. -------------------------------------------------------------------------------- /notebooks/docs/sofia.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/docs/sofia.txt -------------------------------------------------------------------------------- /notebooks/docs/tom.txt: -------------------------------------------------------------------------------- 1 | Tom, a soft-spoken man with a kind smile and gentle eyes, 2 | hails from the heartland of Kansas. His loyal companion is a Golden Retriever named Sunny, reflecting the dog's vibrant personality and sunny disposition. Tom is a dedicated farmer, lovingly tending to the wheat fields that have been in his family for generations. Sunny is always at Tom's side, whether it's chasing birds or lazing under the Kansas sun. Their bond is a testament to the enduring friendship between a man and his dog, reflected in the golden hues of the fields they call home. -------------------------------------------------------------------------------- /notebooks/example-notes-files/note1.txt: -------------------------------------------------------------------------------- 1 | **Personal Note with Reminders** 2 | 3 | --- 4 | 5 | **Date:** [Insert Date] 6 | 7 | **Dear [Your Name],** 8 | 9 | Here are some important reminders and notes for the upcoming days. Stay organized and make time for what matters! 10 | 11 | --- 12 | 13 | **To-Do List:** 14 | 15 | 1. **Work Tasks:** 16 | - [ ] Submit project report by **Friday at 5 PM** 17 | - [ ] Prepare for team meeting on **Monday** (notes and presentation) 18 | - [ ] Follow up with [Colleague's Name] regarding [specific issue] 19 | 20 | 2. **Personal Errands:** 21 | - [ ] Grocery shopping on **Saturday morning** 22 | - [ ] Pick up dry cleaning on **Saturday afternoon** 23 | - [ ] Schedule dentist appointment for [Date] 24 | 25 | 3. **Family & Friends:** 26 | - [ ] Call Mom on **Sunday** to check in 27 | - [ ] Plan movie night with [Friend's Name] for **next week** 28 | - [ ] Send birthday card to [Friend/Family Member] by [Date] 29 | 30 | 4. **Self-Care:** 31 | - [ ] Exercise at least **3 times** this week 32 | - [ ] Set aside **30 minutes** for reading daily 33 | - [ ] Try the new recipe for dinner on **Thursday** 34 | 35 | 5. **Upcoming Events:** 36 | - **[Event 1]**: Date, time, and location 37 | - **[Event 2]**: Date, time, and location 38 | - **[Special Occasion/Reminder]**: Date and what to prepare 39 | 40 | --- 41 | 42 | **Motivational Note:** 43 | “Remember to take a deep breath—progress, not perfection. You're doing great!” 44 | 45 | --- 46 | 47 | **Ideas to Explore:** 48 | - [ ] Take a pottery class 49 | - [ ] Start a new book series 50 | - [ ] Plan a weekend getaway for [Month] 51 | 52 | --- 53 | 54 | **End of Note** 55 | 56 | Stay focused, and don’t forget to celebrate your small victories! 57 | 58 | --- 59 | 60 | Feel free to customize this note as needed! 61 | -------------------------------------------------------------------------------- /notebooks/example-notes-files/note2.txt: -------------------------------------------------------------------------------- 1 | Certainly! Below is an example of a synthetic data invoice for the purchase of a large quantity of pancakes. 2 | 3 | --- 4 | 5 | **INVOICE** 6 | 7 | **Seller:** 8 | Fluffy Pancake Co. 9 | 123 Breakfast Blvd. 10 | Pancake City, PC 12345 11 | Phone: (555) 123-4567 12 | Email: sales@fluffypancakes.com 13 | 14 | **Buyer:** 15 | Giant Breakfast Warehouse 16 | 456 Meal St. 17 | Foodland, FL 67890 18 | Phone: (555) 987-6543 19 | Email: orders@giantbreakfast.com 20 | 21 | --- 22 | 23 | **Invoice Number:** 2023-INV-98765 24 | **Invoice Date:** October 22, 2023 25 | **Due Date:** November 5, 2023 26 | 27 | --- 28 | 29 | **Description of Goods:** 30 | 31 | | Item Description | Quantity | Unit Price ($) | Total ($) | 32 | |--------------------------------------|----------|----------------|-----------------| 33 | | Fluffy Classic Pancakes (Frozen) | 10,000 | 0.50 | 5,000.00 | 34 | | Strawberry Delight Pancakes (Frozen) | 5,000 | 0.70 | 3,500.00 | 35 | | Blueberry Bliss Pancakes (Frozen) | 5,000 | 0.75 | 3,750.00 | 36 | | Chocolate Chip Pancakes (Frozen) | 5,000 | 0.80 | 4,000.00 | 37 | 38 | --- 39 | 40 | **Subtotal:** $16,250.00 41 | **Sales Tax (8%):** $1,300.00 42 | **Shipping & Handling:** $200.00 43 | **Total Amount Due:** **$17,750.00** 44 | 45 | --- 46 | 47 | **Payment Instructions:** 48 | Please make the payment to the following account: 49 | Bank: Pancake National Bank 50 | Account Name: Fluffy Pancake Co. 51 | Account Number: 123456789 52 | Routing Number: 987654321 53 | 54 | --- 55 | 56 | **Notes:** 57 | Thank you for your order! Please allow 5-7 business days for processing and delivery. If you have any questions about this invoice, feel free to contact us. 58 | 59 | --- 60 | 61 | **End of Invoice** 62 | 63 | --- 64 | 65 | This example includes all necessary components of an invoice while creatively illustrating the purchase of a sizable quantity of pancakes. 66 | -------------------------------------------------------------------------------- /notebooks/example-notes-files/paper-pancakes.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/example-notes-files/paper-pancakes.pdf -------------------------------------------------------------------------------- /notebooks/extra-notebooks/2.4-prompt-engineering-techniques-knowledge-generation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "!pip install openai\n", 10 | "!pip install python-dotenv" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "import os\n", 20 | "# loading from a .env file\n", 21 | "# load_dotenv(dotenv_path=\"/full/path/to/your/.env\")\n", 22 | "\n", 23 | "# or \n", 24 | "# if you're on google colab just uncomment below and replace with your openai api key\n", 25 | "# os.environ[\"OPENAI_API_KEY\"] = \"\"" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "# Prompt Engineering Technique: Knowledge Generation\n", 33 | "\n", 34 | "\n", 35 | "There are two basics steps:\n", 36 | "1. Prompt to generate knowledge or facts about the problem at hand\n", 37 | "\n", 38 | "2. Prompt integrating your question with the facts that were generated to elicit better performance " 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 9, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "import openai\n", 48 | "\n", 49 | "def get_response(prompt_question,model_name=\"gpt-4o-mini\",system_message=\"You are a helpful assistant\"):\n", 50 | " response = openai.ChatCompletion.create(\n", 51 | " model=model_name,\n", 52 | " messages=[{\"role\": \"system\", \"content\": system_message},\n", 53 | " {\"role\": \"user\", \"content\": prompt_question}]\n", 54 | " )\n", 55 | " \n", 56 | " return response[\"choices\"][0][\"message\"][\"content\"]" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 8, 62 | "metadata": {}, 63 | "outputs": [ 64 | { 65 | "data": { 66 | "text/plain": [ 67 | "'The approximate sum of the populations of France, Portugal, Germany, and Japan is around 269 million people.'" 68 | ] 69 | }, 70 | "execution_count": 8, 71 | "metadata": {}, 72 | "output_type": "execute_result" 73 | } 74 | ], 75 | "source": [ 76 | "question = \"What is the approximate sum of the populations of France, Portugal, Germany and Japan?\"\n", 77 | "# japan - 127\n", 78 | "# portugal - 10\n", 79 | "# germany - 83\n", 80 | "# france - 67\n", 81 | "get_response(question)" 82 | ] 83 | }, 84 | { 85 | "cell_type": "markdown", 86 | "metadata": {}, 87 | "source": [ 88 | "This answer is incorrect given that Japan population in 2021 was around 125 million. Let's see if we can use the generate knowledge approach to improve this answer." 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 10, 94 | "metadata": {}, 95 | "outputs": [ 96 | { 97 | "data": { 98 | "text/plain": [ 99 | "'The approximate sum of the populations of France, Portugal, Germany, and Japan is 286.3 million.'" 100 | ] 101 | }, 102 | "execution_count": 10, 103 | "metadata": {}, 104 | "output_type": "execute_result" 105 | } 106 | ], 107 | "source": [ 108 | "prompt_generate_knowledge = \"Generate some demographic data for the following countries: France, Portugal, Germany and Japan.\"\n", 109 | "\n", 110 | "output_generate_knowledge = get_response(prompt_generate_knowledge)\n", 111 | "\n", 112 | "prompt_integrate_knowledge_question = f\"Integrate the information from here:{output_generate_knowledge} with what you already know and answer this question: {question}\"\n", 113 | "\n", 114 | "output_integrate_knowledge_question = get_response(prompt_integrate_knowledge_question)\n", 115 | "\n", 116 | "output_integrate_knowledge_question" 117 | ] 118 | }, 119 | { 120 | "cell_type": "markdown", 121 | "metadata": {}, 122 | "source": [ 123 | "Yep! This answer is much better and close to the correct approximation of the aforementioned populations" 124 | ] 125 | }, 126 | { 127 | "cell_type": "markdown", 128 | "metadata": {}, 129 | "source": [ 130 | "# References\n", 131 | "- https://arxiv.org/pdf/2110.08387.pdf" 132 | ] 133 | }, 134 | { 135 | "cell_type": "markdown", 136 | "metadata": {}, 137 | "source": [] 138 | } 139 | ], 140 | "metadata": { 141 | "kernelspec": { 142 | "display_name": "oreilly_llama2", 143 | "language": "python", 144 | "name": "oreilly_llama2" 145 | }, 146 | "language_info": { 147 | "codemirror_mode": { 148 | "name": "ipython", 149 | "version": 3 150 | }, 151 | "file_extension": ".py", 152 | "mimetype": "text/x-python", 153 | "name": "python", 154 | "nbconvert_exporter": "python", 155 | "pygments_lexer": "ipython3", 156 | "version": "3.11.5" 157 | } 158 | }, 159 | "nbformat": 4, 160 | "nbformat_minor": 2 161 | } 162 | -------------------------------------------------------------------------------- /notebooks/extra-notebooks/quiz-app-new.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import streamlit as st 4 | from langchain.prompts import ChatPromptTemplate 5 | from langchain_openai import ChatOpenAI 6 | from langchain.schema import StrOutputParser 7 | 8 | # Set the default encoding to utf-8 9 | os.environ['PYTHONIOENCODING'] = 'utf-8' 10 | sys.stdout.reconfigure(encoding='utf-8') 11 | sys.stderr.reconfigure(encoding='utf-8') 12 | 13 | def create_the_quiz_prompt_template(): 14 | template = """ 15 | You are an expert quiz maker for technical fields. Let's think step by step and 16 | create a quiz with {num_questions} {quiz_type} questions about the following concept/content: {quiz_context}. 17 | 18 | The format of the quiz could be one of the following: 19 | - Multiple-choice: 20 | - Questions: 21 | : , , , 22 | : , , , 23 | .... 24 | - Answers: 25 | : 26 | : 27 | .... 28 | Example: 29 | - Questions: 30 | - 1. What is the time complexity of a binary search tree? 31 | a. O(n) 32 | b. O(log n) 33 | c. O(n^2) 34 | d. O(1) 35 | - Answers: 36 | 1. b 37 | - True-false: 38 | - Questions: 39 | : 40 | : 41 | ..... 42 | - Answers: 43 | : 44 | : 45 | ..... 46 | Example: 47 | - Questions: 48 | - 1. What is a binary search tree? 49 | - 2. How are binary search trees implemented? 50 | - Answers: 51 | - 1. True 52 | - 2. False 53 | - Open-ended: 54 | - Questions: 55 | : 56 | : 57 | - Answers: 58 | : 59 | : 60 | Example: 61 | Questions: 62 | - 1. What is a binary search tree? 63 | - 2. How are binary search trees implemented? 64 | 65 | - Answers: 66 | 1. A binary search tree is a data structure that is used to store data in a sorted manner. 67 | 2. Binary search trees are implemented using linked lists. 68 | """ 69 | prompt = ChatPromptTemplate.from_template(template) 70 | prompt.format(num_questions=3, quiz_type="multiple-choice", quiz_context="Data Structures in Python Programming") 71 | 72 | return prompt 73 | 74 | 75 | def create_quiz_chain(prompt_template, llm, openai_api_key): 76 | return prompt_template | llm | StrOutputParser() 77 | 78 | def split_questions_answers(quiz_response): 79 | parts = quiz_response.split("- Answers:") 80 | if len(parts) < 2: 81 | print("Error: quiz_response does not contain '- Answers:' marker.") 82 | return [], {} 83 | 84 | questions_parts = parts[0].split("- Questions:") 85 | if len(questions_parts) < 2: 86 | print("Error: quiz_response does not contain '- Questions:' marker.") 87 | return [], {} 88 | 89 | questions_part = questions_parts[1] 90 | answers_part = parts[1] 91 | 92 | questions = [q.strip() for q in questions_part.split("\n") if q.strip() != ""] 93 | answers = [a.strip() for a in answers_part.split("\n") if a.strip() != ""] 94 | 95 | structured_questions = [] 96 | for question in questions: 97 | if ':' in question: 98 | question_parts = question.split(":") 99 | q_text = question_parts[0].strip() 100 | q_options = ':'.join(question_parts[1:]).strip() 101 | structured_questions.append({"question": q_text, "options": q_options}) 102 | else: 103 | print(f"Skipping malformed question: {question}") 104 | 105 | structured_answers = {} 106 | for answer in answers: 107 | question_num, correct_option = answer.split(".") 108 | structured_answers[question_num.strip()] = correct_option.strip() 109 | 110 | return structured_questions, structured_answers 111 | 112 | 113 | def main(): 114 | st.title("Quiz App") 115 | st.write("This app generates a quiz based on a given context.") 116 | 117 | openai_api_key = st.sidebar.text_input("Enter your OpenAI API key", type="password") 118 | if openai_api_key != "": 119 | os.environ["OPENAI_API_KEY"] = openai_api_key 120 | else: 121 | st.error("Please enter your OpenAI API key") 122 | return 123 | 124 | prompt_template = create_the_quiz_prompt_template() 125 | llm = ChatOpenAI(temperature=0.0, model="gpt-4o") 126 | chain = create_quiz_chain(prompt_template, llm, openai_api_key) 127 | 128 | context = st.text_area("Enter the concept/context for the quiz") 129 | num_questions = st.number_input("Enter the number of questions", min_value=1, max_value=10, value=3) 130 | quiz_type = st.selectbox("Select the quiz type", ["multiple-choice", "true-false", "open-ended"]) 131 | 132 | if st.button("Generate Quiz"): 133 | quiz_response = chain.invoke({"quiz_type": quiz_type, "num_questions": num_questions, "quiz_context": context}) 134 | structured_questions, structured_answers = split_questions_answers(quiz_response) 135 | 136 | user_answers = {} 137 | 138 | if quiz_type == "multiple-choice": 139 | for question in structured_questions: 140 | q_num = question["question"].split()[0] 141 | st.write(f'{question["question"]}:') 142 | for option in question["options"].split(","): 143 | option_key = f"{q_num}_{option.strip()[0]}" 144 | user_answers[option_key] = st.checkbox(option.strip(), key=option_key) 145 | 146 | st.session_state["user_answers"] = user_answers 147 | st.session_state["structured_questions"] = structured_questions 148 | st.session_state["correct_answers"] = structured_answers 149 | 150 | if st.button("Check Answers"): 151 | score = 0 152 | for q_num, correct_answer in st.session_state["correct_answers"].items(): 153 | if st.session_state["user_answers"].get(f"{q_num}_{correct_answer}", False): 154 | score += 1 155 | 156 | total_questions = len(st.session_state["correct_answers"]) 157 | st.success(f"Your score is {score}/{total_questions}!") 158 | 159 | if __name__ == "__main__": 160 | main() -------------------------------------------------------------------------------- /notebooks/extracted_ticket_issues.csv: -------------------------------------------------------------------------------- 1 | customer_name,issue_description,priority 2 | Jane Doe,Customer was charged twice for the same transaction.,High 3 | John Smith,"Customer unable to log into their account, facing a password reset issue.",Medium 4 | Alice Johnson,Customer wants more information about product specifications.,Low 5 | Bob Brown,"Customer has not received the order yet, tracking information shows a delay.",High 6 | Michael Lee,Customer wants to return a product and needs assistance with the return label.,Medium 7 | -------------------------------------------------------------------------------- /notebooks/images/2023-07-17-12-48-28.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/images/2023-07-17-12-48-28.png -------------------------------------------------------------------------------- /notebooks/images/2023-07-17-12-55-18.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/images/2023-07-17-12-55-18.png -------------------------------------------------------------------------------- /notebooks/images/2023-07-17-12-55-35.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/images/2023-07-17-12-55-35.png -------------------------------------------------------------------------------- /notebooks/images/2023-07-17-12-59-36.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/images/2023-07-17-12-59-36.png -------------------------------------------------------------------------------- /notebooks/images/2023-07-17-13-00-49.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/images/2023-07-17-13-00-49.png -------------------------------------------------------------------------------- /notebooks/images/2023-07-21-15-55-05.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/images/2023-07-21-15-55-05.png -------------------------------------------------------------------------------- /notebooks/images/2023-07-24-10-52-10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/images/2023-07-24-10-52-10.png -------------------------------------------------------------------------------- /notebooks/images/2023-07-28-18-38-27.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/images/2023-07-28-18-38-27.png -------------------------------------------------------------------------------- /notebooks/images/2023-07-30-19-29-27.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/images/2023-07-30-19-29-27.png -------------------------------------------------------------------------------- /notebooks/images/2023-07-30-19-32-13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/images/2023-07-30-19-32-13.png -------------------------------------------------------------------------------- /notebooks/images/2023-07-30-19-34-48.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/images/2023-07-30-19-34-48.png -------------------------------------------------------------------------------- /notebooks/images/2023-08-17-14-48-39.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/images/2023-08-17-14-48-39.png -------------------------------------------------------------------------------- /notebooks/images/ChatGPT-statistical-model.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/images/ChatGPT-statistical-model.jpg -------------------------------------------------------------------------------- /notebooks/images/ChatGPT-statistical-model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/images/ChatGPT-statistical-model.png -------------------------------------------------------------------------------- /notebooks/images/Notebook_4-dynamic_prompt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/images/Notebook_4-dynamic_prompt.png -------------------------------------------------------------------------------- /notebooks/images/Notebook_4-prompt_chaining.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/images/Notebook_4-prompt_chaining.png -------------------------------------------------------------------------------- /notebooks/images/ToT_framework.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/images/ToT_framework.png -------------------------------------------------------------------------------- /notebooks/images/backend_llm_app1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/images/backend_llm_app1.png -------------------------------------------------------------------------------- /notebooks/images/backend_llm_app2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/images/backend_llm_app2.png -------------------------------------------------------------------------------- /notebooks/images/bear.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/images/bear.png -------------------------------------------------------------------------------- /notebooks/images/bigram-transition-probs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/images/bigram-transition-probs.png -------------------------------------------------------------------------------- /notebooks/images/cat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/images/cat.png -------------------------------------------------------------------------------- /notebooks/images/embeddings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/images/embeddings.png -------------------------------------------------------------------------------- /notebooks/images/embeddings2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/images/embeddings2.png -------------------------------------------------------------------------------- /notebooks/images/frontend_backend.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/images/frontend_backend.png -------------------------------------------------------------------------------- /notebooks/images/frontend_llm_app.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/images/frontend_llm_app.png -------------------------------------------------------------------------------- /notebooks/images/owl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/images/owl.png -------------------------------------------------------------------------------- /notebooks/images/prob_distribution_over_next_words.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/images/prob_distribution_over_next_words.png -------------------------------------------------------------------------------- /notebooks/images/prompt_ai.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/images/prompt_ai.png -------------------------------------------------------------------------------- /notebooks/images/sheep.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/images/sheep.png -------------------------------------------------------------------------------- /notebooks/images/transition_probs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/notebooks/images/transition_probs.png -------------------------------------------------------------------------------- /notebooks/langchain-deploy.py: -------------------------------------------------------------------------------- 1 | # source: https://python.langchain.com/docs/tutorials/llm_chain/#:~:text=Server%E2%80%8B,with%20langserve.add_routes 2 | #!/usr/bin/env python 3 | from fastapi import FastAPI 4 | from langchain_core.prompts import ChatPromptTemplate 5 | from langchain_core.output_parsers import StrOutputParser 6 | from langchain_openai import ChatOpenAI 7 | from langserve import add_routes 8 | import os 9 | 10 | os.environ["LANGCHAIN_TRACING_V2"] = "true" 11 | os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com" 12 | # os.environ["LANGCHAIN_API_KEY"] = "YOUR API KEY" 13 | os.environ["LANGCHAIN_PROJECT"] = "langchain-chatgpt-course-test" 14 | 15 | # 1. Create prompt template 16 | system_template = "Translate the following into {language}:" 17 | prompt_template = ChatPromptTemplate.from_messages([ 18 | ('system', system_template), 19 | ('user', '{text}') 20 | ]) 21 | 22 | # 2. Create model 23 | model = ChatOpenAI(model="gpt-4o-mini", temperature=0) 24 | 25 | # 3. Create parser 26 | parser = StrOutputParser() 27 | 28 | # 4. Create chain 29 | chain = prompt_template | model | parser 30 | 31 | 32 | # 4. App definition 33 | app = FastAPI(title="LangChain Server", 34 | version="1.0", 35 | description="A simple API server using LangChain's Runnable interfaces for translation.",) 36 | # 5. Adding chain route 37 | add_routes(app,chain,path="/chain",) 38 | 39 | if __name__ == "__main__": 40 | import uvicorn 41 | 42 | uvicorn.run(app, host="localhost", port=8000) -------------------------------------------------------------------------------- /notebooks/notes.txt: -------------------------------------------------------------------------------- 1 | Title: Project Launch Meeting 2 | 3 | Date: July 23, 2023 4 | 5 | Attendees: John Doe, Jane Smith, Michael Brown, Lisa Johnson 6 | 7 | Summary: 8 | 9 | Today we held the kickoff meeting for our new project, CodeName X. The team is very excited to get started. John Doe gave an overview of the project, explaining that the goal is to develop a new AI model that will significantly improve our product's capabilities. The estimated timeline for the project is 18 months, but John stressed that this is a rough estimate and will likely be refined as we delve deeper into the project. 10 | 11 | Jane Smith spoke about the resources we have at our disposal, including a robust development team and state-of-the-art AI training facilities. She also mentioned the possibility of bringing in some external consultants for specific tasks, which was met with approval from the rest of the team. 12 | 13 | Michael Brown, our lead developer, gave a technical overview of how we might approach the development of the AI model. He proposed a two-stage process: an initial exploratory phase to investigate different methods, followed by a focused development phase where we will construct and fine-tune the final model. 14 | 15 | Lisa Johnson talked about project management and highlighted the importance of maintaining good communication throughout the project. She suggested using Agile methodology to manage the project, which the team agreed upon. 16 | 17 | Next Steps: 18 | 19 | John will send out a project brief by the end of this week. Jane will look into potential consultants we might want to bring on board. Michael will begin setting up a sandbox for the development team to use during the exploratory phase. Lisa will set up the project in our project management tool and will schedule a project planning meeting for next week. 20 | 21 | -------------------------------------------------------------------------------- /notebooks/notes_summarizer_app.py: -------------------------------------------------------------------------------- 1 | from langchain.prompts import PromptTemplate 2 | from langchain.chat_models import ChatOpenAI 3 | from langchain.chains.summarize import load_summarize_chain 4 | from langchain.document_loaders import TextLoader 5 | from langchain.schema import Document 6 | import os 7 | import requests 8 | import streamlit as st 9 | 10 | def load_text_file(uploaded_file): 11 | content = uploaded_file.read().decode("utf-8") 12 | content = [Document(page_content=content)] 13 | return content 14 | 15 | 16 | def setup_summarization_chain(docs,openai_api_key): 17 | if openai_api_key != "": 18 | os.environ["OPENAI_API_KEY"] = openai_api_key 19 | else: 20 | st.error("Please enter your OpenAI API key") 21 | return 22 | llm = ChatOpenAI() 23 | prompt_template = """Write a concise summary of the following text: 24 | 25 | {text} 26 | 27 | The summary should be in this style: {style}. 28 | Let's think step by step. 29 | 30 | SUMMARY:""" 31 | 32 | prompt_summarization = PromptTemplate(template=prompt_template, 33 | input_variables=["text", "style"]) 34 | 35 | chain = load_summarize_chain(llm, chain_type="stuff", prompt=prompt_summarization) 36 | return chain 37 | 38 | def summarize_content(docs, chain, summary_option): 39 | """ 40 | Runs the summarization chain with 41 | the option specificed by the user. 42 | """ 43 | summary = chain.run({"input_documents": docs, "style": summary_option}) 44 | return summary 45 | 46 | def main(): 47 | st.title('Notes Summarizer using ChatGPT') 48 | # Option to upload a file or input text directly 49 | openai_api_key = st.sidebar.text_input("Enter your OpenAI API key", type="password") 50 | option = st.sidebar.radio("Choose an input method:", ["Upload a .txt file", "Input text directly"]) 51 | if option == "Upload a .txt file": 52 | uploaded_file = st.file_uploader("Choose a .txt file", type=['txt']) 53 | docs = "" 54 | if uploaded_file: 55 | docs = load_text_file(uploaded_file) 56 | else: 57 | docs_text = st.text_area("Enter your text here") 58 | docs = [Document(page_content=docs_text)] 59 | 60 | summary_option = st.sidebar.selectbox( 61 | 'How do you want your summary?', 62 | ('Simple', 'Bullet points', 'Introduction, Argument, Conclusion') 63 | ) 64 | 65 | if docs: 66 | chain = setup_summarization_chain(docs,openai_api_key) 67 | 68 | if st.sidebar.button('Summarize'): 69 | st.session_state.summary = summarize_content(docs, chain, summary_option) 70 | 71 | if 'summary' in st.session_state and st.session_state.summary: 72 | st.write('Your Summary: \n', st.session_state.summary) 73 | if st.sidebar.button('Download Summary'): 74 | with open('summary.txt', 'w') as f: 75 | f.write(st.session_state.summary) 76 | 77 | st.write("Your summary was downloaded to summary.txt") 78 | 79 | 80 | if __name__ == "__main__": 81 | main() -------------------------------------------------------------------------------- /notebooks/pages/demo-app-panda-letters.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | from openai import OpenAI 3 | import os 4 | import wget 5 | 6 | if os.environ.get("OPENAI_API_KEY") is None: 7 | openai_api_key = st.sidebar.text_input("Enter your OpenAI API key", type="password") 8 | os.environ["OPENAI_API_KEY"] = openai_api_key 9 | 10 | client = OpenAI() 11 | 12 | 13 | def generate_image(prompt: str, filename: str = "output_image.png", size: str = "1024x1024"): 14 | """ 15 | Generates an image using the DALL·E 3 model and saves it to a file. 16 | 17 | Args: 18 | prompt (str): The text prompt to generate the image. 19 | filename (str): The output filename for saving the image. 20 | size (str): The size of the generated image (default: 1024x1024). 21 | 22 | Returns: 23 | str: The path to the saved image file. 24 | """ 25 | # Check if file already exists 26 | if os.path.exists(filename): 27 | abs_path = os.path.abspath(filename) 28 | print(f'Using existing image at {abs_path}') 29 | return abs_path 30 | 31 | # Generate the image using the DALL·E 3 model 32 | response = client.images.generate( 33 | model="dall-e-3", 34 | prompt=prompt, 35 | size=size, # Must be one of 1024x1024 , 1792x1024 , or 1024x1792 for dall-e-3 models. 36 | quality="standard", 37 | n=1 38 | ) 39 | 40 | image_url = response.data[0].url 41 | wget.download(image_url, filename) 42 | 43 | abs_path = os.path.abspath(filename) 44 | print(f'Saved image to {abs_path}') 45 | return abs_path 46 | 47 | def get_response(prompt): 48 | response = client.chat.completions.create(model="gpt-4o-mini", 49 | messages= 50 | [ 51 | {"role": "system", "content": "You are the most insanely creative writer of all times."}, 52 | {"role": "user", "content": prompt} 53 | ]) 54 | return response.choices[0].message.content 55 | 56 | 57 | st.title("The Panda Warrior") 58 | cover_image_character = st.sidebar.selectbox("Select your prefered character", ["Panda Warrior", "Punk Elmo", "Drunk OWL", "Funky Penguin"], index=0) 59 | 60 | 61 | def get_character_info(character: str) -> None: 62 | """Display the image for the selected character.""" 63 | if character == "Panda Warrior": 64 | image_path = "./assets-resources/panda_letter.png" 65 | else: 66 | st.write(f"Generating cover image for {character}...") 67 | prompt = f"Generate a creative outside the box like cover image for this character of a story: {character}." 68 | image_path = generate_image(prompt, filename=f"{character}_cover_image.png") 69 | 70 | st.image(image_path, width=400) 71 | 72 | prompt = f"Generate a funny and creative short backstory for this character: {character}. Make it one paragraph max." 73 | backstory = get_response(prompt) 74 | return backstory 75 | 76 | backstory = get_character_info(cover_image_character) 77 | 78 | if cover_image_character == "Panda Warrior": 79 | default_background = "A mighty panda warrior who trained in the bamboo forests of ancient China, mastering both martial arts and meditation. This noble creature now protects the forest and teaches young pandas the way of inner peace and outer strength." 80 | prompt_background = st.sidebar.text_area("Write a different background story for the panda", value=default_background) 81 | else: 82 | prompt_background = st.sidebar.text_area("Write a different background story for the panda", value=backstory) 83 | 84 | if st.button("Write Story"): 85 | if prompt_background != "": 86 | prompt = f"{prompt_background}. Write a super short story using the 3 act structure for about this character:" 87 | response = get_response(prompt) 88 | st.write(response) 89 | 90 | 91 | 92 | -------------------------------------------------------------------------------- /notebooks/pages/demo-app-qa.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | # source for this code mostly from: https://towardsdatascience.com/run-interactive-sessions-with-chatgpt-in-jupyter-notebook-87e00f2ee461 3 | import os 4 | import glob 5 | import requests 6 | __import__('pysqlite3') 7 | import sys 8 | sys.modules['sqlite3'] = sys.modules.pop('pysqlite3') 9 | from langchain.chains import ConversationalRetrievalChain 10 | from langchain_openai import ChatOpenAI, OpenAIEmbeddings 11 | from langchain_community.document_loaders import DirectoryLoader 12 | from langchain_chroma import Chroma 13 | from langchain.prompts import ChatPromptTemplate 14 | from langchain.chains.combine_documents import create_stuff_documents_chain 15 | from langchain.chains.retrieval import create_retrieval_chain 16 | from datetime import datetime 17 | import glob 18 | import chromadb 19 | 20 | chromadb.api.client.SharedSystemClient.clear_system_cache() 21 | 22 | st.session_state["chat_history"] = [] 23 | 24 | 25 | DATA_DIR = "./customer-tickets" 26 | 27 | def qa_over_docs_setup(openai_api_key): 28 | if openai_api_key != "": 29 | os.environ["OPENAI_API_KEY"] = openai_api_key 30 | else: 31 | st.error("Please enter your OpenAI API key") 32 | return 33 | 34 | loader = DirectoryLoader(DATA_DIR, glob="*.txt") 35 | txt_docs = loader.load_and_split() 36 | embeddings = OpenAIEmbeddings() 37 | vectordb = Chroma.from_documents(txt_docs, embeddings) 38 | llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0.0) 39 | system_prompt = ( 40 | "You are a customer support assistant that answers questions about tickets. " 41 | "Use the following pieces of retrieved context to answer " 42 | "the question. If you don't know the answer, say that you " 43 | "don't know. Use three sentences maximum and keep the " 44 | "answer concise." 45 | "\n\n" 46 | "{context}") 47 | 48 | prompt = ChatPromptTemplate.from_messages( 49 | [ 50 | ("system", system_prompt), 51 | ("human", "{input}"), 52 | ]) 53 | 54 | # prompt 55 | qa_chain = create_stuff_documents_chain(llm, prompt) 56 | qa = create_retrieval_chain(vectordb.as_retriever(), qa_chain) 57 | 58 | return qa 59 | 60 | def show_documents(): 61 | st.subheader("Documents") 62 | for doc in glob.glob("./customer-tickets/*.txt"): 63 | with open(doc, "r") as f: 64 | st.write(doc) 65 | st.write(f.read()) 66 | 67 | def main(): 68 | st.title("Q&A") 69 | st.image("./assets-resources/owl-customer-support.png", width=400) 70 | openai_api_key = st.sidebar.text_input("Enter your OpenAI API key", type="password") 71 | show_docs = st.sidebar.checkbox("Show documents") 72 | chat_history = [] 73 | qa = qa_over_docs_setup(openai_api_key) 74 | question = st.text_input("What is your question?") 75 | 76 | if question!="": 77 | try: 78 | response = qa.invoke({"input": f"{question}"}) 79 | answer = response["answer"] 80 | st.session_state["chat_history"].append((question, answer)) 81 | except Exception as e: 82 | answer = "Error: " + str(e) 83 | 84 | st.write(answer) 85 | 86 | if show_docs: 87 | show_documents() 88 | 89 | if __name__ == "__main__": 90 | main() 91 | -------------------------------------------------------------------------------- /notebooks/pages/demo-app-translation.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | from openai import OpenAI 3 | import os 4 | 5 | st.session_state["translated_text"] = "" 6 | # Initialize your OpenAI client 7 | 8 | 9 | # Define your language options 10 | languages = { 11 | "French": "fr", 12 | "Spanish": "es", 13 | "German": "de", 14 | "Italian": "it", 15 | "Japanese": "ja", 16 | "Portuguese": "pt", 17 | "Hindi": "hi", 18 | "Chinese": "zh" 19 | } 20 | 21 | def translate_text(input_text, target_language, openai_api_key): 22 | if openai_api_key != "": 23 | os.environ["OPENAI_API_KEY"] = openai_api_key 24 | else: 25 | st.error("Please enter your OpenAI API key") 26 | return 27 | 28 | client = OpenAI() 29 | # Here, you'll need to tailor the prompt to instruct ChatGPT to translate 30 | # Adjust the prompt according to your needs and the capabilities of the model 31 | prompt = f"Translate this into {target_language}: {input_text}" 32 | 33 | # Make the API call to OpenAI (adjust as per the actual API requirements) 34 | response = client.chat.completions.create(model="gpt-3.5-turbo-1106", 35 | messages= 36 | [ 37 | {"role": "system", "content": "You are a helpful assistant."}, 38 | {"role": "user", "content": prompt} 39 | ], 40 | temperature=0.0, 41 | n = 1 42 | ) 43 | 44 | # Extract the translated text from the response 45 | # This may need to be adjusted based on the actual response structure 46 | translated_text = response.choices[0].message.content.strip() 47 | 48 | return translated_text 49 | 50 | def main(): 51 | st.set_page_config(layout="wide") # Make the app full-width 52 | openai_api_key = st.sidebar.text_input("Enter your OpenAI API key", type="password") 53 | st.title("🌎 Multi-Language Translation Hub") 54 | 55 | # User input 56 | input_text = st.text_area("Enter text to translate", height=150) 57 | 58 | # Create a button to translate to all languages 59 | if st.button("Translate to Multiple Languages 🚀"): 60 | # Create two columns with 3 translations each 61 | col1, col2 = st.columns(2) 62 | 63 | # Dictionary to store translations 64 | translations = {} 65 | 66 | with st.spinner('Translating to all languages...'): 67 | # Generate all translations 68 | for lang_name, lang_code in languages.items(): 69 | translations[lang_name] = translate_text(input_text, lang_code, openai_api_key) 70 | 71 | # Display translations in columns 72 | with col1: 73 | for lang in list(languages.keys())[:4]: 74 | with st.expander(f"🔤 {lang}", expanded=True): 75 | st.text_area( 76 | label="Translation", 77 | value=translations.get(lang, ""), 78 | height=150, 79 | key=f"translation_{lang}", 80 | disabled=True 81 | ) 82 | 83 | with col2: 84 | for lang in list(languages.keys())[4:]: 85 | with st.expander(f"🔤 {lang}", expanded=True): 86 | st.text_area( 87 | label="Translation", 88 | value=translations.get(lang, ""), 89 | height=150, 90 | key=f"translation_{lang}", 91 | disabled=True 92 | ) 93 | 94 | if __name__ == "__main__": 95 | main() -------------------------------------------------------------------------------- /notebooks/pages/demo-quiz-app.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | import json 3 | import csv 4 | import io 5 | import pandas as pd 6 | import plotly.express as px 7 | from pydantic import BaseModel, Field 8 | from typing import List 9 | from openai import OpenAI 10 | import hashlib 11 | 12 | # Add OpenAI client initialization 13 | client = OpenAI() 14 | 15 | # Add Pydantic model for quiz structure 16 | class QuizQuestion(BaseModel): 17 | question: str = Field(..., description="The question text") 18 | options: List[str] = Field(..., description="List of possible answers") 19 | correctAnswer: int = Field(..., description="Index of the correct answer (0-based)") 20 | 21 | class Quiz(BaseModel): 22 | questions: List[QuizQuestion] = Field(..., description="List of quiz questions") 23 | 24 | # Add function to generate quiz from content 25 | def generate_quiz_from_content(content: str, num_questions: int = 5) -> List[dict]: 26 | response = client.beta.chat.completions.parse( 27 | model="gpt-4o", # or your preferred model 28 | messages=[ 29 | { 30 | "role": "system", 31 | "content": f"Generate a quiz with {num_questions} multiple-choice questions based on the following content. Each question should have 4 options." 32 | }, 33 | {"role": "user", "content": content} 34 | ], 35 | response_format=Quiz 36 | ) 37 | 38 | return response.choices[0].message.parsed.questions 39 | 40 | # Add function to generate quiz identifier 41 | def generate_quiz_id(questions): 42 | # Create a string representation of the questions 43 | quiz_str = json.dumps(questions, sort_keys=True) 44 | # Generate a hash of the quiz content 45 | return hashlib.md5(quiz_str.encode()).hexdigest() 46 | 47 | # Initialize all session state variables at the start 48 | if 'questions' not in st.session_state: 49 | st.session_state['questions'] = [] 50 | if 'quiz_started' not in st.session_state: 51 | st.session_state['quiz_started'] = False 52 | if 'current_question' not in st.session_state: 53 | st.session_state['current_question'] = 0 54 | if 'score' not in st.session_state: 55 | st.session_state['score'] = 0 56 | if 'total_questions' not in st.session_state: 57 | st.session_state['total_questions'] = 0 58 | if 'user_answers' not in st.session_state: 59 | st.session_state['user_answers'] = [] 60 | if 'current_quiz_id' not in st.session_state: 61 | st.session_state['current_quiz_id'] = None 62 | if 'score_history' not in st.session_state: 63 | st.session_state['score_history'] = {} 64 | 65 | st.title('Modern Quiz App') 66 | 67 | # Add sidebar for AI quiz generation 68 | with st.sidebar: 69 | use_ai_generation = st.checkbox("Generate Quiz with AI") 70 | 71 | # Add AI quiz generation section 72 | if use_ai_generation: 73 | st.subheader("Generate Quiz from Content") 74 | content = st.text_area("Paste your content here", height=200) 75 | num_questions = st.slider("Number of questions", min_value=3, max_value=10, value=5) 76 | 77 | if st.button("Generate Quiz"): 78 | with st.spinner("Generating quiz..."): 79 | try: 80 | generated_questions = generate_quiz_from_content(content, num_questions) 81 | # Convert to the format expected by the app 82 | quiz_json = [ 83 | { 84 | "question": q.question, 85 | "options": q.options, 86 | "correctAnswer": q.correctAnswer 87 | } for q in generated_questions 88 | ] 89 | # Generate new quiz ID and reset score history for this quiz 90 | new_quiz_id = generate_quiz_id(quiz_json) 91 | st.session_state['current_quiz_id'] = new_quiz_id 92 | st.session_state['score_history'][new_quiz_id] = [] 93 | 94 | st.session_state['questions'] = quiz_json 95 | st.session_state['total_questions'] = len(quiz_json) 96 | st.session_state['quiz_started'] = True 97 | st.session_state['current_question'] = 0 98 | st.session_state['score'] = 0 99 | st.session_state['user_answers'] = [] 100 | st.rerun() 101 | except Exception as e: 102 | st.error(f"Error generating quiz: {str(e)}") 103 | 104 | uploaded_file = st.file_uploader('Choose a quiz JSON file', type=['json']) 105 | 106 | if uploaded_file is not None and not st.session_state['quiz_started']: 107 | # Read the JSON file 108 | try: 109 | questions = json.load(uploaded_file) 110 | st.session_state['questions'] = questions 111 | st.session_state['total_questions'] = len(questions) 112 | # Generate new quiz ID and reset score history for this quiz 113 | new_quiz_id = generate_quiz_id(questions) 114 | if new_quiz_id != st.session_state['current_quiz_id']: 115 | st.session_state['current_quiz_id'] = new_quiz_id 116 | st.session_state['score_history'][new_quiz_id] = [] 117 | 118 | if st.button('Start Quiz'): 119 | st.session_state['quiz_started'] = True 120 | st.session_state['current_question'] = 0 121 | st.session_state['score'] = 0 122 | st.session_state['user_answers'] = [] 123 | except Exception as e: 124 | st.error('Error parsing file. Please ensure it\'s a valid JSON.') 125 | 126 | if st.session_state['quiz_started']: 127 | current_q = st.session_state['current_question'] 128 | questions = st.session_state['questions'] 129 | total_q = st.session_state['total_questions'] 130 | 131 | if current_q < total_q: 132 | question = questions[current_q] 133 | st.subheader(f"Question {current_q + 1}: {question['question']}") 134 | options = question['options'] 135 | selection_key = f"question_{current_q}_selection" 136 | selected_option = st.radio("Select an option", options, key=selection_key) 137 | 138 | if st.button('Submit Answer'): 139 | # Get the selected option from session state 140 | selected_option = st.session_state[selection_key] 141 | # Store the user's answer 142 | st.session_state['user_answers'].append(selected_option) 143 | correct_answer = options[question['correctAnswer']] 144 | if selected_option == correct_answer: 145 | st.session_state['score'] += 1 146 | st.session_state['current_question'] += 1 147 | # Clear the selection for next question 148 | del st.session_state[selection_key] 149 | st.rerun() 150 | else: 151 | # Quiz is over, show results 152 | st.subheader('Quiz Results') 153 | score = st.session_state['score'] 154 | total = st.session_state['total_questions'] 155 | 156 | # Store score in history with percentage for current quiz 157 | score_percentage = (score / total) * 100 158 | current_quiz_id = st.session_state['current_quiz_id'] 159 | st.session_state['score_history'][current_quiz_id].append({ 160 | 'attempt': len(st.session_state['score_history'][current_quiz_id]) + 1, 161 | 'score': score_percentage 162 | }) 163 | 164 | st.write(f"Score: {score} / {total} ({score_percentage:.1f}%)") 165 | 166 | # Add score history visualization for current quiz only 167 | current_history = st.session_state['score_history'][current_quiz_id] 168 | if len(current_history) > 1: # Only show if there's more than one attempt 169 | st.subheader("Your Progress") 170 | history_df = pd.DataFrame(current_history) 171 | fig = px.line(history_df, x='attempt', y='score', 172 | title='Score History', 173 | labels={'attempt': 'Attempt Number', 'score': 'Score (%)'}, 174 | markers=True) 175 | fig.update_layout(yaxis_range=[0, 100]) 176 | st.plotly_chart(fig) 177 | 178 | # Add button to start new attempt 179 | if st.button('Start New Attempt'): 180 | st.session_state['current_question'] = 0 181 | st.session_state['score'] = 0 182 | st.session_state['user_answers'] = [] 183 | st.rerun() 184 | 185 | # Show detailed feedback 186 | user_answers = st.session_state['user_answers'] 187 | for idx, question in enumerate(questions): 188 | st.write(f"**Question {idx + 1}:** {question['question']}") 189 | st.write(f"**Your answer:** {user_answers[idx]}") 190 | correct_answer = question['options'][question['correctAnswer']] 191 | st.write(f"**Correct answer:** {correct_answer}") 192 | if user_answers[idx] == correct_answer: 193 | st.success('Correct') 194 | else: 195 | st.error('Incorrect') 196 | st.markdown("---") 197 | 198 | # Prepare CSV data 199 | output = io.StringIO() 200 | writer = csv.writer(output) 201 | writer.writerow(['Question', 'Your Answer', 'Correct Answer', 'Result']) 202 | for idx, question in enumerate(questions): 203 | user_answer = user_answers[idx] 204 | correct_answer = question['options'][question['correctAnswer']] 205 | is_correct = 'Correct' if user_answer == correct_answer else 'Incorrect' 206 | writer.writerow([question['question'], user_answer, correct_answer, is_correct]) 207 | csv_data = output.getvalue() 208 | st.download_button(label='Download Results (CSV)', data=csv_data, file_name='quiz_results.csv', mime='text/csv') 209 | -------------------------------------------------------------------------------- /notebooks/pages/demo-summarization-app.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | from pypdf import PdfReader 3 | from openai import OpenAI 4 | import numpy as np 5 | import os 6 | 7 | MODEL = "gpt-4o-mini" 8 | 9 | # Add this after the imports 10 | SUMMARY_TYPES = { 11 | "Bullet Points": "Create a bullet-point summary with key points", 12 | "Structure": "Create a structured summary with sections and subsections", 13 | "Key Concepts": "Focus on main concepts and their relationships", 14 | } 15 | 16 | st.set_page_config(page_title="PDF Summarizer", layout="wide") 17 | st.title("Multi-Level PDF Summarizer") 18 | 19 | # Initialize OpenAI client 20 | @st.cache_resource 21 | def load_client(): 22 | api_key = os.environ.get("OPENAI_API_KEY") or st.sidebar.text_input("Enter your OpenAI API key", type="password") 23 | if not api_key: 24 | st.error("Please enter your OpenAI API key") 25 | st.stop() 26 | return OpenAI(api_key=api_key) 27 | 28 | client = load_client() 29 | 30 | 31 | def extract_text_from_pdf(pdf_file): 32 | pdf_reader = PdfReader(pdf_file) 33 | text = "" 34 | for page in pdf_reader.pages: 35 | text += page.extract_text() 36 | return text 37 | 38 | 39 | def generate_summaries(text, summary_type, num_levels=5): 40 | target_lengths = np.geomspace(50, len(text.split()), num=num_levels).astype(int) 41 | summaries = [] 42 | 43 | system_prompts = { 44 | "Bullet Points": "Produce a basic bullet-point summary of main ideas.\ 45 | For each section, expand with detailed bullet points highlighting specific information.\ 46 | Begin each main point with a • symbol, ensuring consistency across sections.", 47 | "Structure": "Start with a high-level structured summary, introducing the \ 48 | main sections (e.g., Introduction, Main Points, Conclusion).\ 49 | For each section, develop a detailed summary under clear headers, preserving structured consistency.", 50 | "Key Concepts": "Begin with a compressed overview of core concepts.\ 51 | For each section, elaborate on the main concepts and their relationships, \ 52 | sorted by importance within each section.\ 53 | Ensure organized conceptual clarity across levels.", 54 | } 55 | 56 | for target_length in target_lengths: 57 | prompt = f"Summarize the following text in approximately\ 58 | {target_length} words using a {summary_type.lower()} style:\n\n{text}" 59 | response = client.chat.completions.create( 60 | model=MODEL, 61 | messages=[ 62 | {"role": "system", "content": "You are a summarization engine that takes in pdf text and\ 63 | outputs a summary of the text in the following style: " + system_prompts[summary_type]}, 64 | {"role": "user", "content": prompt} 65 | ] 66 | ) 67 | summaries.append(response.choices[0].message.content) 68 | return summaries 69 | 70 | # File uploader 71 | uploaded_file = st.file_uploader("Choose a PDF file", type="pdf") 72 | 73 | if uploaded_file is not None: 74 | if 'summaries_dict' not in st.session_state: 75 | st.session_state.summaries_dict = {} 76 | 77 | with st.spinner('Extracting text from PDF...'): 78 | if 'text' not in st.session_state: 79 | st.session_state.text = extract_text_from_pdf(uploaded_file) 80 | 81 | # Add summary type selector 82 | summary_type = st.selectbox( 83 | "Select Summary Type", 84 | list(SUMMARY_TYPES.keys()), 85 | help="Choose the style of summary you want to generate" 86 | ) 87 | 88 | if summary_type not in st.session_state.summaries_dict: 89 | with st.spinner(f'Generating {summary_type.lower()} summaries at different levels...'): 90 | num_levels = st.sidebar.slider("Number of Summary Levels", 5, 10, 5) 91 | st.session_state.summaries_dict[summary_type] = generate_summaries( 92 | st.session_state.text, 93 | summary_type, 94 | num_levels 95 | ) 96 | 97 | summaries = st.session_state.summaries_dict[summary_type] 98 | 99 | col1, col2 = st.columns([1, 3]) 100 | 101 | with col1: 102 | st.subheader("Summary Level") 103 | level = st.slider("Adjust summary detail level", 1, len(summaries), len(summaries)//2) 104 | 105 | st.write("Summary Statistics:") 106 | original_length = len(st.session_state.text.split()) 107 | summary_length = len(summaries[level-1].split()) 108 | st.write(f"Original text: {original_length} words") 109 | st.write(f"Summary: {summary_length} words") 110 | st.write(f"Compression ratio: {summary_length/original_length:.2%}") 111 | 112 | if st.sidebar.checkbox("Show Original Text"): 113 | st.subheader("Original Text") 114 | st.write(st.session_state.text) 115 | 116 | with col2: 117 | st.subheader("Summary") 118 | summary = summaries[level-1] 119 | st.markdown(summary) 120 | st.download_button("Download Summary", summary) 121 | 122 | # Plot summary lengths 123 | if st.sidebar.checkbox("Show Summary Lengths"): 124 | lengths = [len(summary.split()) for summary in summaries] 125 | fig = go.Figure(data=go.Scatter( 126 | x=list(range(1, num_levels+1)), 127 | y=lengths, 128 | mode='lines+markers', 129 | name='Summary Length' 130 | )) 131 | fig.update_layout( 132 | title="Summary Length by Level", 133 | xaxis_title="Summary Level", 134 | yaxis_title="Word Count" 135 | ) 136 | st.plotly_chart(fig) 137 | else: 138 | st.info("Please upload a PDF file to begin.") 139 | -------------------------------------------------------------------------------- /notebooks/prompt_constrain_space_visualization.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import seaborn as sns 3 | sns.set() 4 | import squarify 5 | 6 | # Sample data (sizes of rectangles) 7 | sizes = [500, 300, 200, 100] 8 | 9 | # Labels for the rectangles (optional) 10 | labels = ["A", "B", "C", "D"] 11 | 12 | plt.figure(figsize=(15,7)) 13 | # Plotting the treemap 14 | plt.subplot(1,3,1) 15 | squarify.plot(sizes=sizes, label=labels, alpha=0.8, color=["red", "green", "blue", "grey"]) 16 | plt.title("LLM Representation Space") 17 | plt.axis('off') # Turn off axis 18 | plt.subplot(1,3,2) 19 | plt.plot([], []) 20 | plt.xlim(0,10) 21 | plt.ylim(0,10) 22 | plt.axis("off") 23 | 24 | # Draw an arrow from point (2, 1) to point (4, 3) 25 | plt.annotate('', xy=(10, 5), xytext=(0, 5), 26 | arrowprops=dict(facecolor='black', shrink=0.05),ha="center", va="top") 27 | plt.subplot(1,3,3) 28 | sizes = [200, 600, 100, 400] 29 | 30 | # Labels for the rectangles (optional) 31 | labels = ["A", "B", "C", "D"] 32 | 33 | # Plotting the treemap 34 | squarify.plot(sizes=sizes, label=labels, alpha=0.8, color=["red", "green", "blue", "grey"]) 35 | plt.title("LLM Representation Space After a Prompt") 36 | plt.axis('off') # Turn off axis 37 | plt.show() -------------------------------------------------------------------------------- /notebooks/prompt_engineering_results2.csv: -------------------------------------------------------------------------------- 1 | prompt,response,response_score,concept 2 | Explain this concept in simple terms,"Genetic mutations are changes in the DNA code that can happen naturally or due to outside factors like radiation or chemicals. These changes can lead to differences in traits, such as eye color or disease risk. Some mutations are harmful, some have no effect, and a few can even be beneficial. Mutations are a normal part of evolution and contribute to the diversity of life on Earth.","I would give this response a score of 90 out of 100. 3 | 4 | Here's the breakdown: 5 | 6 | **Strengths:** 7 | 1. **Accuracy:** The response correctly identifies that genetic mutations are changes in the DNA code and that they can occur naturally or due to external factors like radiation or chemicals. 8 | 2. **Comprehensiveness:** It covers the potential effects of mutations, noting that they can be harmful, neutral, or beneficial. 9 | 3. **Relevance:** The explanation ties mutations to traits such",Genetic Mutations 10 | Explain the following concept:,"Genetic mutations are changes in the DNA sequence that can lead to variations in the characteristics of an organism. These variations can occur through a variety of mechanisms, such as errors during DNA replication, exposure to certain environmental factors, or as a result of natural genetic diversity. 11 | 12 | Mutations can have different effects on an organism. Some mutations may have no noticeable impact, while others can lead to changes in physical traits, behaviors, or susceptibility to certain diseases. In some cases, mutations can be harmful, leading to","I would give this response a score of 85 out of 100. 13 | 14 | Here's a breakdown of the evaluation: 15 | 16 | **Strengths:** 17 | 1. **Accuracy:** The response correctly identifies that genetic mutations are changes in the DNA sequence and that these changes can lead to variations in an organism's characteristics. 18 | 2. **Mechanisms:** It accurately mentions that mutations can occur due to errors during DNA replication, exposure to environmental factors, or natural genetic diversity. 19 | 3. **Effects:** The response correctly notes that",Genetic Mutations 20 | Explain this:,"Genetic mutations are alterations in the DNA sequence that can occur naturally or as a result of external factors such as radiation, chemicals, or errors during DNA replication. These mutations can have a wide range of effects, from no discernible impact to causing genetic disorders or cancer. 21 | 22 | Mutations can occur in different parts of the DNA, including genes (which encode proteins), regulatory regions (which control gene expression), and non-coding regions with unknown functions. Depending on the location and nature of the mutation, it","I would give this response a score of 90 out of 100. 23 | 24 | Here's a breakdown of the evaluation: 25 | 26 | **Strengths:** 27 | 1. **Accuracy:** The response correctly identifies genetic mutations as alterations in the DNA sequence and mentions that they can occur naturally or due to external factors like radiation, chemicals, or errors during DNA replication. 28 | 2. **Range of Effects:** It accurately states that mutations can have a wide range of effects, from no impact to causing genetic disorders or cancer. 29 | 3.",Genetic Mutations 30 | Break down this concept for a beginner:,"Genetic mutations are changes in the DNA sequence that can occur naturally or be caused by external factors like radiation or chemicals. These mutations can lead to variations in traits and characteristics among individuals within a species. Some mutations may have no effect, while others can cause genetic disorders or provide advantages in survival and reproduction. These variations are essential for the process of evolution, as they contribute to the diversity of life on Earth.","I would give this response a score of 90 out of 100. 31 | 32 | Here's the breakdown of the score: 33 | 34 | 1. **Accuracy (30/30)**: The response accurately describes genetic mutations as changes in the DNA sequence and correctly identifies both natural occurrences and external factors like radiation and chemicals as causes. It also correctly mentions the potential effects of mutations, including no effect, genetic disorders, and advantages in survival and reproduction. 35 | 36 | 2. **Comprehensiveness (25/30)**: The response",Genetic Mutations 37 | Can you simplify the explanation of the following concept:,"Certainly! Genetic mutations are changes in the DNA sequence that can lead to differences in traits or characteristics. These changes can occur naturally or as a result of external factors, and they can have various effects, ranging from no impact to causing genetic disorders. Overall, genetic mutations are a fundamental part of evolution and the diversity of life.","I would give this response a score of 85 out of 100. 38 | 39 | Here's the breakdown: 40 | 41 | **Strengths:** 42 | 1. **Accuracy:** The response correctly identifies genetic mutations as changes in the DNA sequence. 43 | 2. **Scope:** It mentions that mutations can occur naturally or due to external factors, which is accurate. 44 | 3. **Impact:** It correctly states that mutations can have a range of effects, from no impact to causing genetic disorders. 45 | 4. **Evolutionary Role:** It acknowledges the",Genetic Mutations 46 | Explain this concept in simple terms,"Overfitting in machine learning happens when a model learns the specific details of the training data too well, to the point that it doesn't perform well with new, unseen data. It's like when a student memorizes the answers to a few specific questions for a test, but then struggles with similar questions they haven't seen before. In machine learning, overfitting can lead to poor predictions because the model is too focused on the training data and doesn't generalize well to new situations.","I would give this response a score of 90 out of 100. 47 | 48 | Here's the breakdown of the score: 49 | 50 | 1. **Clarity (25/25)**: The explanation is clear and easy to understand. It uses a relatable analogy (a student memorizing answers) to explain the concept, which helps in grasping the idea of overfitting. 51 | 52 | 2. **Accuracy (25/25)**: The response accurately describes overfitting as a situation where a model learns the training data too",Overfitting in Machine Learning 53 | Explain the following concept:,"Overfitting in machine learning occurs when a model learns to perform well on the training data but does not generalize well to new, unseen data. This happens when the model learns the specific patterns and noise in the training data, rather than the underlying relationships that would allow it to make accurate predictions on new data. 54 | 55 | Imagine a scenario where a model is trained to recognize pictures of cats. If the model memorizes the exact features of the cats in the training set, including any anomalies or imperfections, it","I would give this response a score of 85 out of 100. 56 | 57 | Here's a breakdown of the evaluation: 58 | 59 | **Strengths:** 60 | 1. **Clear Definition:** The response accurately defines overfitting as a situation where a model performs well on training data but fails to generalize to new, unseen data. 61 | 2. **Explanation of Cause:** It correctly identifies that overfitting occurs when the model learns specific patterns and noise in the training data rather than the underlying relationships. 62 | 3. **Illustr",Overfitting in Machine Learning 63 | Explain this:,"Overfitting in machine learning occurs when a model learns to perform exceptionally well on the training data, but it fails to generalize to new, unseen data. This means that the model has learned to capture the noise and idiosyncrasies present in the training data, rather than the underlying patterns that are truly indicative of the target variable. 64 | 65 | Imagine a scenario where a model is trained to predict housing prices based on a set of features such as square footage, number of bedrooms, and location. If the","I would give this response a score of 85 out of 100. 66 | 67 | Here's a breakdown of the evaluation: 68 | 69 | **Strengths:** 70 | 1. **Clear Definition:** The response accurately defines overfitting, explaining that it occurs when a model performs well on training data but fails to generalize to new, unseen data. 71 | 2. **Explanation of Cause:** It correctly identifies that overfitting happens because the model captures noise and idiosyncrasies in the training data rather than the underlying",Overfitting in Machine Learning 72 | Break down this concept for a beginner:,"Overfitting in machine learning occurs when a model learns the training data too well, to the point that it starts to perform poorly on new, unseen data. It's like studying only specific questions for an exam and then finding that the actual exam contains completely different questions. 73 | 74 | An analogy is fitting a very complex puzzle together perfectly, but then finding out that some pieces were forced into the wrong places. The model becomes too specialized in the training data and is unable to generalize well to new situations. 75 | 76 | To avoid","I would give this response a score of 90 out of 100. 77 | 78 | Here's the breakdown of the evaluation: 79 | 80 | 1. **Accuracy (30/30)**: The explanation accurately describes overfitting as a situation where a model performs well on training data but poorly on new, unseen data. This is a correct and fundamental aspect of overfitting. 81 | 82 | 2. **Clarity (25/25)**: The response is clear and easy to understand. The analogy of studying specific questions for an exam",Overfitting in Machine Learning 83 | Can you simplify the explanation of the following concept:,"Of course! Overfitting in machine learning occurs when a model learns the details and noise in the training data to the extent that it negatively impacts its ability to generalize to new, unseen data. Essentially, the model becomes too tailored to the training data and performs poorly when exposed to new data. This can happen when the model is too complex or when the training data is limited. To combat overfitting, techniques such as cross-validation, regularization, and using more data can be employed to help the","I would give this response a score of 85 out of 100. 84 | 85 | Here's a breakdown of the evaluation: 86 | 87 | **Strengths:** 88 | 1. **Clear Definition:** The response accurately defines overfitting and explains that it occurs when a model learns the details and noise in the training data to the extent that it negatively impacts its ability to generalize to new, unseen data. 89 | 2. **Explanation of Causes:** It correctly identifies that overfitting can happen when the model is too complex or when",Overfitting in Machine Learning 90 | -------------------------------------------------------------------------------- /notebooks/qa_pdf.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | from langchain.document_loaders import PyPDFLoader 3 | import PyPDF2 4 | from io import BytesIO 5 | from langchain.embeddings.openai import OpenAIEmbeddings 6 | from langchain.schema import Document 7 | import pysqlite3 as sqlite3 8 | from langchain.vectorstores import Chroma 9 | from langchain.chains import ChatVectorDBChain 10 | from langchain.chat_models import ChatOpenAI 11 | from langchain.text_splitter import RecursiveCharacterTextSplitter 12 | import os 13 | 14 | def setup_pdf(file): 15 | """Read a PDF file and return its content as text.""" 16 | reader = PyPDF2.PdfReader(BytesIO(file.read())) 17 | docs = [] 18 | for page in range(len(reader.pages)): 19 | docs.append(Document(page_content=reader.pages[page].extract_text())) 20 | return docs 21 | 22 | 23 | def setup_qa_chain(pdf_doc, openai_api_key): 24 | if openai_api_key != "": 25 | os.environ["OPENAI_API_KEY"] = openai_api_key 26 | else: 27 | st.error("Please enter your OpenAI API key") 28 | return 29 | text_splitter = RecursiveCharacterTextSplitter(chunk_size = 2000, chunk_overlap = 500) 30 | persist_directory = './persist_directory/' 31 | embedding = OpenAIEmbeddings() 32 | all_splits = text_splitter.split_documents(pdf_doc) 33 | vectordb = Chroma.from_documents(all_splits, embedding=embedding,\ 34 | persist_directory=persist_directory) 35 | vectordb.persist() 36 | # use ConversationalRetrievalChain 37 | pdf_qa = ChatVectorDBChain.from_llm(ChatOpenAI(temperature=0,\ 38 | model_name="gpt-4"), vectordb) 39 | return pdf_qa 40 | 41 | 42 | def main(): 43 | st.header("Q&A with PDF 💬") 44 | openai_api_key = st.sidebar.text_input("Enter your OpenAI API key", type="password") 45 | uploaded_file = st.file_uploader("Choose a PDF file", type="pdf") 46 | 47 | if uploaded_file is not None: 48 | # To read file as string: 49 | pdf_docs = setup_pdf(uploaded_file) 50 | pdf_qa = setup_qa_chain(pdf_docs,openai_api_key) 51 | st.write("Pdf Loaded!") 52 | 53 | # Accept user questions/query 54 | query = st.text_input("Ask questions about your PDF file:") 55 | st.write(f"Question: {query}") 56 | if query: 57 | result = pdf_qa({"question": query, "chat_history": []}) 58 | answer = result["answer"] 59 | st.write(f"Answer: {answer}") 60 | 61 | if __name__ == '__main__': 62 | main() -------------------------------------------------------------------------------- /notebooks/quiz_app.py: -------------------------------------------------------------------------------- 1 | from langchain_openai import ChatOpenAI 2 | from langchain.pydantic_v1 import BaseModel, Field 3 | from quiz_templates import create_multiple_choice_template, create_true_false_template, create_open_ended_template 4 | from typing import List 5 | import streamlit as st 6 | import os 7 | 8 | 9 | class QuizTrueFalse(BaseModel): 10 | quiz_text: str = Field(description="The quiz text") 11 | questions: List[str] = Field(description="The quiz questions") 12 | answers: List[str] = Field(description="The quiz answers for each question as True or False only.") 13 | 14 | 15 | class QuizMultipleChoice(BaseModel): 16 | quiz_text: str = Field(description="The quiz text") 17 | questions: List[str] = Field(description="The quiz questions") 18 | alternatives: List[List[str]] = Field(description="The quiz alternatives for each question as a list of lists") 19 | answers: List[str] = Field(description="The quiz answers") 20 | 21 | class QuizOpenEnded(BaseModel): 22 | questions: List[str] = Field(description="The quiz questions") 23 | answers: List[str] = Field(description="The quiz answers") 24 | 25 | 26 | def create_quiz_chain(prompt_template,llm, pydantic_object_schema): 27 | """Creates the chain for the quiz app.""" 28 | return prompt_template | llm.with_structured_output(pydantic_object_schema) 29 | 30 | def split_questions_answers(quiz_response): 31 | """Function that splits the questions and answers from the quiz response.""" 32 | questions = quiz_response.questions # this will be a list of questions 33 | answers = quiz_response.answers # this will be a list of answers 34 | return questions, answers 35 | 36 | def main(): 37 | st.title("Quiz App") 38 | st.write("This app generates a quiz based on a given context.") 39 | 40 | # Initialize session state variables if they don't already exist 41 | if 'questions' not in st.session_state: 42 | st.session_state.questions = [] 43 | if 'answers' not in st.session_state: 44 | st.session_state.answers = [] 45 | if 'user_answers' not in st.session_state: 46 | st.session_state.user_answers = [] 47 | 48 | openai_api_key = st.sidebar.text_input("Enter your OpenAI API key", type="password") 49 | if openai_api_key != "": 50 | os.environ["OPENAI_API_KEY"] = openai_api_key 51 | else: 52 | st.error("Please enter your OpenAI API key") 53 | llm = ChatOpenAI(model="gpt-4o-mini",temperature=0.0) 54 | 55 | context = st.text_area("Enter the concept/context for the quiz", value=st.session_state.get('context', '')) 56 | num_questions = st.number_input("Enter the number of questions", min_value=1, max_value=10, value=st.session_state.get('num_questions', 3)) 57 | quiz_type = st.selectbox("Select the quiz type", ["multiple-choice", "true-false", "open-ended"], index=st.session_state.get('quiz_type_index', 0)) 58 | if quiz_type == "multiple-choice": 59 | prompt_template = create_multiple_choice_template() 60 | pydantic_object_schema = QuizMultipleChoice 61 | elif quiz_type == "true-false": 62 | prompt_template = create_true_false_template() 63 | pydantic_object_schema = QuizTrueFalse 64 | elif quiz_type == "open-ended": 65 | prompt_template = create_open_ended_template() 66 | pydantic_object_schema = QuizOpenEnded 67 | 68 | if st.button("Generate Quiz"): 69 | chain = create_quiz_chain(prompt_template, llm, pydantic_object_schema) 70 | quiz_response = chain.invoke({"num_questions": num_questions, "quiz_context": context}) 71 | st.write(quiz_response) 72 | st.session_state.questions = quiz_response.questions 73 | st.session_state.answers = quiz_response.answers 74 | if quiz_type == "multiple-choice": 75 | st.session_state.alternatives = quiz_response.alternatives 76 | st.session_state.user_answers = [None] * len(quiz_response.questions) 77 | st.session_state.context = context 78 | st.session_state.num_questions = num_questions 79 | st.session_state.quiz_type_index = ["multiple-choice", "true-false", "open-ended"].index(quiz_type) 80 | 81 | if st.session_state.questions: 82 | with st.form(key='quiz_form'): 83 | display_questions(quiz_type) 84 | submitted = st.form_submit_button("Submit Answers") 85 | if submitted: 86 | process_submission(quiz_type) 87 | 88 | def display_questions(quiz_type): 89 | if quiz_type == "multiple-choice": 90 | for i, question in enumerate(st.session_state.questions): 91 | st.markdown(question) 92 | options = st.session_state.alternatives[i] # Descriptive options 93 | # Display descriptive options for the user to choose from 94 | selected_option = st.radio("Select your answer", options, key=f"question_{i}") 95 | 96 | # Assuming correct answers are stored as 'a', 'b', 'c', 'd', etc. 97 | # Map the selected descriptive option back to its identifier before storing 98 | option_index = options.index(selected_option) # Get index of the selected option 99 | option_identifier = chr(97 + option_index) # Convert index to 'a', 'b', 'c', 'd', etc. 100 | st.session_state.user_answers[i] = option_identifier 101 | elif quiz_type == "true-false": 102 | for i, question in enumerate(st.session_state.questions): 103 | st.markdown(question) 104 | # Convert the selected option to boolean before saving it 105 | selected_option = st.radio("Select your answer", ["True", "False"], key=f"question_{i}") 106 | st.session_state.user_answers[i] = selected_option 107 | elif quiz_type == "open-ended": 108 | # Handle open-ended questions... 109 | pass # existing code for open-ended questions remains here 110 | 111 | def process_submission(quiz_type): 112 | st.write(quiz_type) 113 | if 'user_answers' in st.session_state: 114 | if None in st.session_state.user_answers: 115 | st.warning("Please answer all the questions before submitting.") 116 | elif quiz_type == "multiple-choice": 117 | score = sum(user_answer == correct_answer for user_answer, correct_answer in zip(st.session_state.user_answers, st.session_state.answers)) 118 | st.write(f'Your score is {score}/{len(st.session_state.questions)}') 119 | elif quiz_type == "true-false": 120 | score = sum(user_answer == correct_answer for user_answer, correct_answer in zip(st.session_state.user_answers, st.session_state.answers)) 121 | st.write(f'Your score is {score}/{len(st.session_state.questions)}') 122 | 123 | if __name__=="__main__": 124 | main() 125 | 126 | 127 | 128 | 129 | -------------------------------------------------------------------------------- /notebooks/quiz_templates.py: -------------------------------------------------------------------------------- 1 | from langchain_core.prompts import ChatPromptTemplate 2 | 3 | def create_multiple_choice_template(): 4 | prompt = ChatPromptTemplate.from_messages([ 5 | ('system', 'You are a quiz engine that generates\ 6 | multiple-choice questions with answers according\ 7 | to user input specifications.'), 8 | ('human', "Create a quiz with {num_questions} and this context {quiz_context}") 9 | ]) 10 | 11 | return prompt 12 | 13 | def create_true_false_template(): 14 | prompt = ChatPromptTemplate.from_messages([ 15 | ('system', 'You are a quiz engine that generates\ 16 | true-false questions with answers according\ 17 | to user input specifications.'), 18 | ('human', "Create a quiz with {num_questions} and this context {quiz_context}") 19 | ]) 20 | 21 | return prompt 22 | 23 | def create_open_ended_template(): 24 | prompt = ChatPromptTemplate.from_messages([ 25 | ('system', 'You are a quiz engine that generates\ 26 | open-ended questions with answers according\ 27 | to user input specifications.'), 28 | ('human', "Create a quiz with {num_questions} and this context {quiz_context}") 29 | ]) 30 | 31 | return prompt -------------------------------------------------------------------------------- /notebooks/semantic-search-demo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "execution": { 8 | "iopub.execute_input": "2025-01-08T03:30:15.210098Z", 9 | "iopub.status.busy": "2025-01-08T03:30:15.209656Z", 10 | "iopub.status.idle": "2025-01-08T03:30:15.416294Z", 11 | "shell.execute_reply": "2025-01-08T03:30:15.416018Z" 12 | } 13 | }, 14 | "outputs": [], 15 | "source": [ 16 | "from openai import OpenAI\n", 17 | "client = OpenAI()\n", 18 | "\n", 19 | "def semantic_search(query, pdf_text):\n", 20 | " response = client.chat.completions.create(\n", 21 | " model=\"gpt-4o-mini\",\n", 22 | " messages=[{\"role\": \"system\", \"content\": f\"You answer \\\n", 23 | " semantic search questions ONLY ABOUT THE FOLLOWING TEXT: \\\n", 24 | " {pdf_text}\"},\n", 25 | " {\"role\": \"user\", \"content\": query}]\n", 26 | " )\n", 27 | " \n", 28 | " return response.choices[0].message.content\n" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 2, 34 | "metadata": { 35 | "execution": { 36 | "iopub.execute_input": "2025-01-08T03:30:15.417706Z", 37 | "iopub.status.busy": "2025-01-08T03:30:15.417636Z", 38 | "iopub.status.idle": "2025-01-08T03:30:21.352924Z", 39 | "shell.execute_reply": "2025-01-08T03:30:21.352132Z" 40 | } 41 | }, 42 | "outputs": [ 43 | { 44 | "data": { 45 | "text/plain": [ 46 | "'An attention function can be described as mapping a query and a set of key-value pairs to an output, where the query, keys, values, and output are all vectors. The output is computed as a weighted sum of the values, where the weight assigned to each value is computed by a compatibility function of the query with the corresponding key. \\n\\nIn the context of the Transformer model, there are two primary forms of attention used:\\n\\n1. **Scaled Dot-Product Attention**: This involves computing the dot products of the query with all keys, dividing each by the square root of the key dimension, and applying a softmax function to obtain the weights on the values. The formula for this attention is:\\n \\\\[\\n Attention(Q, K, V) = softmax\\\\left(\\\\frac{QK^T}{\\\\sqrt{d_k}}\\\\right)V\\n \\\\]\\n\\n2. **Multi-Head Attention**: Instead of performing a single attention function, the model linearly projects the queries, keys, and values multiple times (h times with different learned linear projections) and performs the attention function in parallel. This allows the model to jointly attend to information from different representation subspaces at different positions. The formula for multi-head attention is:\\n \\\\[\\n MultiHead(Q, K, V) = Concat(head_1, ..., head_h)W_O\\n \\\\]\\n where each head is computed as:\\n \\\\[\\n head_i = Attention(QW_Q^i, KW_K^i, V W_V^i)\\n \\\\]\\n\\nThese attention mechanisms allow the model to capture dependencies between different input and output positions, enhancing its ability to model complex sequences.'" 47 | ] 48 | }, 49 | "execution_count": 2, 50 | "metadata": {}, 51 | "output_type": "execute_result" 52 | } 53 | ], 54 | "source": [ 55 | "from pypdf import PdfReader\n", 56 | "\n", 57 | "def load_pdf_text(file_path):\n", 58 | " '''Loads text from a PDF file.'''\n", 59 | " # creating a pdf reader object\n", 60 | " reader = PdfReader(file_path)\n", 61 | "\n", 62 | " # extracting text from page\n", 63 | " text = \"\\n\\n\".join([page.extract_text() for page in reader.pages])\n", 64 | " \n", 65 | " return text\n", 66 | "\n", 67 | "\n", 68 | "pdf_path = \"./assets-resources/attention-paper.pdf\"\n", 69 | "pdf_text = load_pdf_text(pdf_path)\n", 70 | "\n", 71 | "query = \"What is the attention mechanism?\"\n", 72 | "semantic_search(query, pdf_text)" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 3, 78 | "metadata": { 79 | "execution": { 80 | "iopub.execute_input": "2025-01-08T03:30:21.390984Z", 81 | "iopub.status.busy": "2025-01-08T03:30:21.390733Z", 82 | "iopub.status.idle": "2025-01-08T03:30:23.959176Z", 83 | "shell.execute_reply": "2025-01-08T03:30:23.958000Z" 84 | } 85 | }, 86 | "outputs": [ 87 | { 88 | "data": { 89 | "text/plain": [ 90 | "SemanticSearchResponse(answer='An attention mechanism is a technique in neural networks that enables the model to focus on specific parts of the input sequence when producing each element of the output sequence. It works by mapping a query and a set of key-value pairs to an output, where the query, keys, values, and output are all vectors. The output is computed as a weighted sum of the values, with the weights determined by a compatibility function that measures how well each key matches the query. In the case of the Transformer model, a specific type called \"Scaled Dot-Product Attention\" is employed, where the compatibility is calculated by taking the dot product of the query and keys, scaling by the square root of the dimensionality of the keys, and applying a softmax function to obtain the weights on the values.', quote_sources=['Section 3.2 Attention', 'Section 3.2.1 Scaled Dot-Product Attention'])" 91 | ] 92 | }, 93 | "execution_count": 3, 94 | "metadata": {}, 95 | "output_type": "execute_result" 96 | } 97 | ], 98 | "source": [ 99 | "from pydantic import BaseModel, Field\n", 100 | "from typing import List\n", 101 | "class SemanticSearchResponse(BaseModel):\n", 102 | " answer: str = Field(..., description=\"The answer to the semantic search question\")\n", 103 | " quote_sources: List[str] = Field(..., description=\"The source of the quote\")\n", 104 | "\n", 105 | "def structured_semantic_search(query, pdf_text):\n", 106 | " response = client.beta.chat.completions.parse(\n", 107 | " model=\"gpt-4o-mini\",\n", 108 | " messages=[{\"role\": \"system\", \"content\": f\"You answer \\\n", 109 | " semantic search questions ONLY ABOUT THE FOLLOWING TEXT: \\\n", 110 | " {pdf_text}\"},\n", 111 | " {\"role\": \"user\", \"content\": query}],\n", 112 | " response_format=SemanticSearchResponse\n", 113 | " )\n", 114 | " return response.choices[0].message.parsed\n", 115 | "\n", 116 | "query = \"What is the attention mechanism?\"\n", 117 | "output = structured_semantic_search(query, pdf_text)\n", 118 | "output" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": 4, 124 | "metadata": { 125 | "execution": { 126 | "iopub.execute_input": "2025-01-08T03:30:23.962443Z", 127 | "iopub.status.busy": "2025-01-08T03:30:23.962258Z", 128 | "iopub.status.idle": "2025-01-08T03:30:23.965635Z", 129 | "shell.execute_reply": "2025-01-08T03:30:23.965020Z" 130 | } 131 | }, 132 | "outputs": [ 133 | { 134 | "data": { 135 | "text/plain": [ 136 | "['Section 3.2 Attention', 'Section 3.2.1 Scaled Dot-Product Attention']" 137 | ] 138 | }, 139 | "execution_count": 4, 140 | "metadata": {}, 141 | "output_type": "execute_result" 142 | } 143 | ], 144 | "source": [ 145 | "output.answer\n", 146 | "output.quote_sources" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": null, 152 | "metadata": {}, 153 | "outputs": [], 154 | "source": [] 155 | } 156 | ], 157 | "metadata": { 158 | "kernelspec": { 159 | "display_name": "oreilly-chatgpt-apps", 160 | "language": "python", 161 | "name": "oreilly-chatgpt-apps" 162 | }, 163 | "language_info": { 164 | "codemirror_mode": { 165 | "name": "ipython", 166 | "version": 3 167 | }, 168 | "file_extension": ".py", 169 | "mimetype": "text/x-python", 170 | "name": "python", 171 | "nbconvert_exporter": "python", 172 | "pygments_lexer": "ipython3", 173 | "version": "3.11.10" 174 | } 175 | }, 176 | "nbformat": 4, 177 | "nbformat_minor": 2 178 | } 179 | -------------------------------------------------------------------------------- /notebooks/summary.txt: -------------------------------------------------------------------------------- 1 | - Kickoff meeting held for new project, CodeName X 2 | - Goal is to develop a new AI model to improve product capabilities 3 | - Estimated timeline for project is 18 months, subject to refinement 4 | - Resources include robust development team and state-of-the-art AI training facilities 5 | - Possibility of bringing in external consultants for specific tasks approved 6 | - Proposed two-stage process for AI model development: exploratory phase followed by focused development phase 7 | - Importance of maintaining good communication highlighted, Agile methodology agreed upon 8 | - Next steps: project brief to be sent out, potential consultants to be explored, sandbox set up for development team, project planning meeting scheduled for next week. -------------------------------------------------------------------------------- /notebooks/superheroes.csv: -------------------------------------------------------------------------------- 1 | Superhero Name,Superpower,Power Level,Catchphrase 2 | Captain Thunder,Bolt Manipulation,90,"Feel the power of the storm!" 3 | Silver Falcon,Flight and Agility,85,"Soar high, fearlessly!" 4 | Mystic Shadow,Invisibility and Illusions,78,"Disappear into the darkness!" 5 | Blaze Runner,Pyrokinesis,88,"Burn bright and fierce!" 6 | Electra-Wave,Electric Manipulation,82,"Unleash the electric waves!" 7 | Crimson Cyclone,Super Speed,91,"Blazing fast and unstoppable!" 8 | Aqua Fury,Hydrokinesis,80,"Ride the waves of power!" 9 | Lunar Guardian,Lunar Manipulation,77,"Embrace the moon's might!" 10 | Steel Titan,Super Strength and Durability,95,"Indestructible force of nature!" 11 | Nightblade,Night Vision and Stealth,84,"Strike from the shadows!" 12 | Frostbite,Ice Manipulation,87,"Chill your bones!" 13 | Starburst,Energy Projection,83,"Ignite the cosmos!" 14 | Sonic Dash,Sound Manipulation,86,"Hear the sound of victory!" 15 | Nova Surge,Energy Absorption and Redirection,89,"Harness the energy within!" 16 | Shadowcat,Intangibility,76,"Phase through the impossible!" 17 | Neon Spark,Light Manipulation,79,"Glow and dazzle!" 18 | Phoenix Flame,Resurrection and Healing,94,"Rise from the ashes!" 19 | Crystal Guardian,Crystallokinesis,81,"Shatter your illusions!" 20 | Earthshaker,Geokinesis,92,"Feel the earth tremble beneath you!" 21 | Silver Seraph,Telekinesis,85,"Move objects with your mind!" 22 | Stormbringer,Weather Manipulation,93,"Unleash the storm's fury!" 23 | Scarlet Siren,Mind Control,88,"Obey my commands!" 24 | Rift Walker,Dimensional Travel,90,"Step between worlds!" 25 | Chrono-Wizard,Time Manipulation,91,"Master of time's flow!" 26 | Blazing Comet,Fireball Projection,82,"Burn brighter than a comet!" 27 | Phantom Wisp,Invisibility,75,"Disappear like a wisp in the wind!" 28 | Luminous Knight,Luminokinesis,78,"Illuminate the darkest night!" 29 | Gravity Shift,Gravity Manipulation,89,"Bend gravity to your will!" 30 | Solar Empress,Solar Energy Absorption,87,"Harness the power of the sun!" 31 | Twilight Specter,Shadow Manipulation,80,"Dance with shadows!" 32 | Thunderstrike,Lightning Control,91,"Electrify the battlefield!" 33 | Nebula Weaver,Reality Warping,96,"Shape the fabric of reality!" 34 | Frostglide,Cryo-Teleportation,85,"Freeze and glide through space!" 35 | Zenith Blaze,Heat Vision,83,"Gaze into the flames of justice!" 36 | Astral Drifter,Astral Projection,79,"Roam the astral plane!" 37 | Blade Dancer,Swordsmanship and Agility,88,"Dance with deadly grace!" 38 | Azure Tempest,Water Manipulation,82,"Unleash the tempest's rage!" 39 | Ghost Sentinel,Intangibility and Invisibility,76,"Haunt your nightmares!" 40 | Ember Fox,Pyrokinetic Fox Shapeshifter,89,"Outfox your enemies with fire!" 41 | Psy-Bender,Telepathy,84,"Read minds like an open book!" 42 | Celestial Sphinx,Cosmic Awareness,93,"Unravel the secrets of the universe!" 43 | Dragonfist,Dragon Summoning and Martial Arts,92,"Unleash the dragon's fury!" 44 | Solar Flare,Solar Energy Projection,85,"Feel the burning light!" 45 | Night Lotus,Darkness Manipulation,78,"Bloom in the shadows!" 46 | Quantum Strider,Quantum Manipulation,90,"Walk the edge of reality!" 47 | Ironclad,Invulnerability and Enhanced Strength,95,"Invincible and mighty!" 48 | Shadow Stalker,Shadow Shifting,81,"Disappear and reappear at will!" 49 | Aqua Archer,Water Arrow Projection,80,"Shoot through water's flow!" 50 | Crystal Gazer,Crystal Ball Scrying,77,"See what the future holds!" 51 | -------------------------------------------------------------------------------- /presentation-slides/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/presentation-slides/.DS_Store -------------------------------------------------------------------------------- /presentation-slides/chatgpt-api-presentation.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EnkrateiaLucca/oreilly_live_training_llm_apps/e290197a8531c54e0948e51ade96555ec01edb0e/presentation-slides/chatgpt-api-presentation.pdf -------------------------------------------------------------------------------- /requirements/requirements.in: -------------------------------------------------------------------------------- 1 | ipykernel 2 | openai 3 | langchain>=0.3.0 4 | langsmith 5 | langchain-cli 6 | langserve 7 | langchain-openai 8 | langchain-ollama 9 | langchain-community 10 | langchain-pinecone 11 | langchainhub 12 | langchain-chroma 13 | pydantic 14 | instructor 15 | pypdf 16 | chromadb 17 | arxiv 18 | xmltodict 19 | duckduckgo-search 20 | playwright 21 | matplotlib 22 | streamlit 23 | python-dotenv 24 | unstructured 25 | pysqlite3 26 | wget 27 | plotly 28 | 29 | --------------------------------------------------------------------------------