├── deploy-github.sh
├── .gitignore
├── docs
    ├── index.md
    ├── en
    │   ├── CHAPTER-2
    │   │   ├── 2.8 Takeaways and Reflections.md
    │   │   ├── 2.1 Introduction.md
    │   │   ├── Answers 2.5.md
    │   │   ├── Answers 2.6.md
    │   │   ├── 2.4 The Power of Embeddings.md
    │   │   └── 2.6 RAG — Techniques for QA.md
    │   ├── CHAPTER-1
    │   │   ├── Answers 1.2.md
    │   │   ├── 1.7 Takeaways and Reflections.md
    │   │   ├── Answers 1.4.md
    │   │   ├── Answers 1.6.md
    │   │   ├── Answers 1.3.md
    │   │   ├── 1.4 Advanced Machine Reasoning.md
    │   │   ├── Answers 1.1.md
    │   │   ├── 1.2 Classification.md
    │   │   ├── 1.6 Building and Evaluating LLM Applications.md
    │   │   └── Answers 1.5.md
    │   ├── index.md
    │   └── CHAPTER-3
    │   │   ├── 3.4 Takeaways and Reflections.md
    │   │   ├── 3.4 Conclusions and Reflections.md
    │   │   └── 3.1 Introduction.md
    ├── ru
    │   ├── CHAPTER-2
    │   │   ├── 2.8 Итоги и размышления.md
    │   │   ├── 2.1 Введение.md
    │   │   └── Ответы 2.6.md
    │   ├── index.md
    │   ├── CHAPTER-1
    │   │   ├── Ответы 1.2.md
    │   │   ├── 1.7 Итоги и размышления.md
    │   │   ├── Ответы 1.6.md
    │   │   ├── Ответы 1.4.md
    │   │   ├── Ответы 1.3.md
    │   │   ├── 1.4 Продвинутое машинное рассуждение.md
    │   │   ├── Ответы 1.1.md
    │   │   ├── 1.2 Классификация.md
    │   │   └── 1.6 Построение и оценка LLM-приложений.md
    │   └── CHAPTER-3
    │   │   ├── 3.4 Итоги и размышления.md
    │   │   └── 3.1 Введение.md
    ├── CHAPTER-1
    │   ├── Answers 1.2.md
    │   ├── Answers 1.6.md
    │   ├── 1.7 Summary and Reflections.md
    │   └── Answers 1.3.md
    ├── CHAPTER-2
    │   ├── 2.8 Summary and Reflections.md
    │   ├── 2.1 Introduction.md
    │   └── Answers 2.6.md
    └── CHAPTER-3
    │   └── 3.4 Summary and Reflections.md
├── mkdocs.yml
├── deploy.sh
├── LICENSE
├── .github
    └── workflows
    │   └── deploy.yml
├── DEPLOYMENT.md
├── README.md
├── mkdocs-ru.yml
└── mkdocs-en.yml


/deploy-github.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | echo "🚀 Deploying to GitHub Pages..."
 4 | 
 5 | # Build all versions
 6 | ./deploy.sh
 7 | 
 8 | # Deploy to GitHub Pages using subtree
 9 | echo "📤 Pushing to gh-pages branch..."
10 | git add -A
11 | git commit -m "Deploy multilingual documentation $(date)"
12 | 
13 | # Push the site directory to gh-pages branch
14 | git subtree push --prefix site origin gh-pages
15 | 
16 | echo "✅ Deployment complete!"
17 | echo "🌐 Your site will be available at: https://boramorka.github.io/LLM-book/"
18 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # MkDocs build directory
 2 | site/
 3 | 
 4 | # Python
 5 | __pycache__/
 6 | *.py[cod]
 7 | *$py.class
 8 | *.so
 9 | .Python
10 | env/
11 | venv/
12 | .venv
13 | pip-log.txt
14 | pip-delete-this-directory.txt
15 | 
16 | # Operating System
17 | .DS_Store
18 | .DS_Store?
19 | ._*
20 | .Spotlight-V100
21 | .Trashes
22 | ehthumbs.db
23 | Thumbs.db
24 | 
25 | # Development logs and temp files
26 | *.log
27 | .tmp/
28 | .temp/
29 | 
30 | # IDE
31 | .vscode/
32 | .idea/
33 | *.swp
34 | *.swo
35 | 
36 | # Local development scripts  
37 | serve-*.sh
38 | *_server.log
39 | 
40 | # Personal notes and prompts
41 | prompt*.txt
42 | notes*.md
43 | 


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
1 | LLMOps. Make AI Work For You is a concise, hands-on guide by Nikita Goryachev for data scientists, ML engineers, and software developers to build real products with Large Language Models: mastering the OpenAI API, creating conversational chatbots with LangChain, and applying LLMOps to ship, monitor, and improve LLM applications responsibly. Start reading: English edition (en) or Russian edition (ru): [English](en/index.md) · [Русский](ru/index.md).
2 | 
3 | Эта книга — практический гид по LLM от Никиты Горячева для DS/ML‑инженеров и разработчиков: основы OpenAI API, разговорные чат‑боты на LangChain и LLMOps для деплоя, мониторинга и улучшения LLM‑приложений. Начните чтение на удобном языке: [Английская версия](en/index.md) · [Русская версия](ru/index.md).
4 | 


--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
 1 | site_name: LLMOps. Make AI Work For You. 
 2 | repo_url: https://github.com/boramorka/LLM-book
 3 | repo_name: boramorka/LLM-book
 4 | 
 5 | theme:
 6 |   name: material
 7 |   features:
 8 |     - navigation.sections
 9 |     - toc.integrate
10 |     - navigation.top
11 |     - search.suggest
12 |     - search.highlight
13 |     - content.tabs.link
14 |     - content.code.annotation
15 |     - content.code.copy
16 |   palette:
17 |     primary: black
18 |   language: en
19 | 
20 | markdown_extensions:
21 |   - meta
22 |   - pymdownx.highlight
23 |   - pymdownx.superfences
24 |   - pymdownx.tasklist:
25 |       custom_checkbox: true
26 | 
27 | plugins:
28 |   - search:
29 |       lang:
30 |         - en
31 |         - ru
32 | 
33 | extra:
34 |   alternate:
35 |     - name: English
36 |       link: en/
37 |       lang: en
38 |     - name: Русский
39 |       link: ru/
40 |       lang: ru
41 | 
42 | nav:
43 |   - Home: index.md
44 | 


--------------------------------------------------------------------------------
/deploy.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | echo "🚀 Building multilingual site for GitHub Pages deployment..."
 4 | 
 5 | # Clean previous builds
 6 | rm -rf site
 7 | 
 8 | # Build main landing page
 9 | echo "📄 Building main landing page..."
10 | mkdocs build
11 | 
12 | # Build English version in subdirectory
13 | echo "🇺🇸 Building English version..."
14 | mkdocs build -f mkdocs-en.yml
15 | 
16 | # Build Russian version in subdirectory  
17 | echo "🇷🇺 Building Russian version..."
18 | mkdocs build -f mkdocs-ru.yml
19 | 
20 | # Verify structure
21 | echo "📁 Site structure:"
22 | find site -type f -name "index.html" | head -10
23 | 
24 | echo "✅ Build complete! Site ready in ./site/"
25 | echo ""
26 | echo "🌐 Local testing URLs:"
27 | echo "   Main: file://$(pwd)/site/index.html"
28 | echo "   English: file://$(pwd)/site/en/index.html" 
29 | echo "   Russian: file://$(pwd)/site/ru/index.html"
30 | echo ""
31 | echo "📤 To deploy to GitHub Pages:"
32 | echo "   git add site/"
33 | echo "   git commit -m 'Deploy multilingual site'"
34 | echo "   git subtree push --prefix site origin gh-pages"
35 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Nikita Goryachev
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/docs/en/CHAPTER-2/2.8 Takeaways and Reflections.md:
--------------------------------------------------------------------------------
1 | # 2.8 Takeaways and Reflections
2 | 
3 | Building conversational chatbots with LangChain leads to interfaces that understand natural language and hold meaningful dialogues. We walked through environment setup, document loading and indexing, and retrieving relevant fragments — and saw how LangChain bridges LLMs and your data, making integration and accessibility foundational. The key advance in modern conversational AI is dialogue context and memory: instead of disconnected replies, there is a conversational “thread” that the bot maintains via memory and retrieval chains. Technical depth (loading, retrieval, chains) goes hand in hand with UX: the examples show how complexity becomes a clear, useful experience, and memory makes interactions more natural and human. The evolution isn’t only about code — it’s about rethinking how we interact with technology. Combining advanced retrieval, contextual understanding, and memory points toward intelligent, genuinely useful systems. Ahead lie more intuitive, responsive, and “human” scenarios; we have the blueprint for conversational systems, and progress will come from engineering boldness joined with attention to human needs.
4 | 
5 | 


--------------------------------------------------------------------------------
/docs/ru/CHAPTER-2/2.8 Итоги и размышления.md:
--------------------------------------------------------------------------------
1 | # 2.8 Итоги и размышления
2 | 
3 | Создание разговорных чат‑ботов на LangChain — это путь к интерфейсам, которые понимают естественный язык и умеют вести диалог по существу. Мы прошли по цепочке шагов — настройка окружения, загрузка и индексация документов, извлечение релевантных фрагментов — и увидели, как LangChain выступает мостом между LLM и вашими данными, делая интеграцию и доступность базовыми принципами. Главное улучшение современного диалогового ИИ — учёт контекста и памяти беседы: вместо разрозненных реплик появляется «нить» разговора, которую бот удерживает с помощью механизмов памяти и retrieval‑цепочек. Техническая глубина (загрузка, извлечение, цепочки) идёт рука об руку с UX: примеры показывают, как сложность превращается в ясный и полезный опыт, а память делает взаимодействие естественным и человечным. Эволюция тут не только про код — это переосмысление способов общения с технологиями: сочетание продвинутого извлечения, контекстного понимания и памяти ведёт к умным и по‑настоящему полезным системам. Впереди — всё более интуитивные, отзывчивые и «человечные» сценарии; у нас есть чертёж разговорных систем, и прогресс определяется союзом инженерной смелости с вниманием к потребностям людей.
4 | 


--------------------------------------------------------------------------------
/.github/workflows/deploy.yml:
--------------------------------------------------------------------------------
 1 | name: Deploy Multilingual Documentation
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main, master ]
 6 |   pull_request:
 7 |     branches: [ main, master ]
 8 | 
 9 | permissions:
10 |   contents: read
11 |   pages: write
12 |   id-token: write
13 | 
14 | concurrency:
15 |   group: "pages"
16 |   cancel-in-progress: false
17 | 
18 | jobs:
19 |   build:
20 |     runs-on: ubuntu-latest
21 |     
22 |     steps:
23 |     - uses: actions/checkout@v4
24 |       with:
25 |         fetch-depth: 0
26 | 
27 |     - name: Set up Python
28 |       uses: actions/setup-python@v4
29 |       with:
30 |         python-version: '3.x'
31 | 
32 |     - name: Install dependencies
33 |       run: |
34 |         pip install mkdocs mkdocs-material
35 | 
36 |     - name: Build all language versions
37 |       run: |
38 |         # Build main landing page
39 |         mkdocs build
40 |         
41 |         # Build English version
42 |         mkdocs build -f mkdocs-en.yml
43 |         
44 |         # Build Russian version  
45 |         mkdocs build -f mkdocs-ru.yml
46 | 
47 |     - name: Setup Pages
48 |       uses: actions/configure-pages@v3
49 | 
50 |     - name: Upload artifact
51 |       uses: actions/upload-pages-artifact@v2
52 |       with:
53 |         path: './site'
54 | 
55 |   deploy:
56 |     environment:
57 |       name: github-pages
58 |       url: ${{ steps.deployment.outputs.page_url }}
59 |     runs-on: ubuntu-latest
60 |     needs: build
61 |     if: github.ref == 'refs/heads/main' || github.ref == 'refs/heads/master'
62 |     
63 |     steps:
64 |     - name: Deploy to GitHub Pages
65 |       id: deployment
66 |       uses: actions/deploy-pages@v2
67 | 


--------------------------------------------------------------------------------
/docs/en/CHAPTER-1/Answers 1.2.md:
--------------------------------------------------------------------------------
 1 | # Answers 1.2
 2 | 
 3 | ## Theory
 4 | 1. The key message components when working with GPT models are `role` and `content`. `role` (system/user/assistant) identifies the speaker and guides the model’s style or behavior; `content` holds the message text. Distinguishing roles is essential for a correct dialogue simulation and expected behavior.
 5 | 2. `system` messages set instructions, context, and constraints (style, tone, rules). `user` messages are the user’s inputs (questions, instructions) that the model should answer. Clear role separation helps control the model effectively.
 6 | 3. Example of `system` influence: “Reply in the style of a playful poet” — the model will follow that style.
 7 | 4. Message order shapes context and influences answers: user turns are interpreted in light of earlier system instructions and the conversation history.
 8 | 5. In the customer review classification example, the categories are “Positive”, “Negative”, and “Neutral”.
 9 | 6. Classifying movie review sentiment is useful for aggregating viewer opinions. Possible categories: “Positive”, “Negative”, “Neutral”.
10 | 7. Classifying news topics helps manage content and recommendations. Possible categories: “Politics”, “Technology”, “Sports”, “Entertainment”.
11 | 8. Classifying customer requests speeds routing and increases satisfaction. Categories: “Billing”, “Support”, “Sales”, “General Question”.
12 | 9. In classification, the `user_message` should contain the text to be labeled; keep it clear and concise so the model has enough context for an accurate result.
13 | 10. Classifying social‑post tone helps with moderation (flagging inappropriate content) and marketing (analyzing audience engagement). Example tones: “Serious”, “Ironic”, “Inspiring”, “Irritated”.
14 | 
15 | 


--------------------------------------------------------------------------------
/docs/ru/index.md:
--------------------------------------------------------------------------------
 1 | # Обзор
 2 | 
 3 | ## Обо мне
 4 | 
 5 | Привет! Меня зовут Никита Горячев, я Senior AI/ML Engineer в Сбере. Моя команда внедряет SOTA‑алгоритмы в NLP и рекомендательных системах. Мы организуем отраслевые митапы, участвуем в конференциях (RecSys в Сингапуре, AI Journey в Москве) и развиваем RePlay — открытую библиотеку для рекомендаций.
 6 | 
 7 | ## О книге
 8 | 
 9 | Эта книга — практический гид для DS, ML‑инженеров, разработчиков и всех, кто работает с современным ИИ. Мы разбираем LLM, разговорный ИИ и интеграцию LLM в процессы разработки, с акцентом на LLMOps (MLOps для крупных языковых моделей). Цель — дать понятные инструменты и подходы, чтобы использовать потенциал ИИ на практике.
10 | 
11 | Подходы и примеры — прикладные: как встраивать LLM в бизнес‑сценарии (поддержка с чат‑ботами, персонализация с рекомендациями, повышение эффективности с MLOps). Книга помогает преодолеть разрыв между сложными технологиями и практикой, показывая, как использовать ИИ для роста и ценности.
12 | 
13 | ## Глава 1: Основы OpenAI API
14 | 
15 | Введение в ChatGPT API: возможности, классификация и применения. Обзор продвинутой модерации, усиления машинного рассуждения, чейнинга промптов, а также построения и оценки LLM‑приложений.
16 | 
17 | ## Глава 2: Разговорные чат‑боты на LangChain
18 | 
19 | Практика разработки чат‑ботов на LangChain: от настройки окружения до продвинутого ретривала. Особое внимание — контексту и памяти диалога для «человечного» взаимодействия.
20 | 
21 | ## Глава 3: LLMOps
22 | 
23 | Структурированное руководство по интеграции LLM в дев‑воркфлоу: выбор и тюнинг моделей, деплой, мониторинг, автоматизация и лучшие практики. Практические кейсы и этические аспекты.
24 | 
25 | Книга — не просто набор техник, а полноценное руководство по ответственному и инновационному использованию ИИ. Здесь — технические, этические и практические аспекты, а также карта для развития в стремительно меняющемся мире LLMOps.
26 | 


--------------------------------------------------------------------------------
/docs/ru/CHAPTER-1/Ответы 1.2.md:
--------------------------------------------------------------------------------
 1 | # Ответы 1.2
 2 | 
 3 | ## Теория
 4 | 1. Ключевые компоненты сообщения при работе с моделями GPT — это `role` и `content`. `role` (system/user/assistant) определяет отправителя реплики и направляет модель на соответствующий стиль или поведение; `content` содержит текст сообщения. Разграничение ролей важно для корректной симуляции диалога и получения ожидаемой реакции.
 5 | 2. Сообщения с ролью `system` задают инструкции, контекст и ограничения (стиль, тон, правила поведения). Сообщения с ролью `user` — это входные запросы пользователя (вопросы, указания), на которые модель должна ответить. Чёткое разделение ролей помогает эффективно управлять поведением модели.
 6 | 3. Пример влияния `system`: «Отвечай в стиле игривого поэта» — модель будет придерживаться заданного стиля в своих ответах.
 7 | 4. Последовательность сообщений формирует контекст и влияет на ответы: реплики пользователя интерпретируются с учётом предыдущих системных указаний и всей истории диалога.
 8 | 5. В примере классификации отзывов клиента используются категории: «Положительный», «Отрицательный», «Нейтральный».
 9 | 6. Классификация тональности рецензии на фильм полезна для агрегирования мнений зрителей. Возможные категории: «Положительная», «Отрицательная», «Нейтральная».
10 | 7. Классификация темы новости помогает управлять контентом и формировать рекомендации. Возможные категории: «Политика», «Технологии», «Спорт», «Развлечения».
11 | 8. Классификация обращений клиентов ускоряет маршрутизацию и повышает удовлетворённость. Категории: «Биллинг», «Техподдержка», «Продажи», «Общий вопрос».
12 | 9. `user_message` при классификации должен содержать классифицируемый текст; формулировка должна быть ясной и краткой, чтобы модели хватило контекста для точного вывода.
13 | 10. Классификация тона постов в соцсетях помогает в модерации (для поиска неподходящего контента) и маркетинге (для анализа вовлечённости аудитории). Примеры тонов: «Серьёзный», «Ироничный», «Вдохновляющий», «Раздражённый».
14 | 


--------------------------------------------------------------------------------
/docs/en/index.md:
--------------------------------------------------------------------------------
 1 | # Overview
 2 | 
 3 | ## About Me
 4 | 
 5 | Hi! I'm Nikita Goryachev, a Senior AI/ML Engineer at Sber. My team implements SOTA algorithms in NLP and recommendation systems. We organize industry meetups, participate in conferences (RecSys in Singapore, AI Journey in Moscow), and develop RePlay — an open-source library for recommendations.
 6 | 
 7 | ## About the Book
 8 | 
 9 | This book is a practical guide for data scientists, ML engineers, developers, and anyone working with modern AI. We explore LLMs, conversational AI, and LLM integration into development processes, with a focus on LLMOps (MLOps for Large Language Models). The goal is to provide clear tools and approaches to harness AI's potential in practice.
10 | 
11 | The approaches and examples are practical: how to embed LLMs in business scenarios (customer support with chatbots, personalization with recommendations, efficiency improvements with MLOps). The book helps bridge the gap between complex technologies and practice, showing how to use AI for growth and value.
12 | 
13 | ## Chapter 1: OpenAI API Fundamentals
14 | 
15 | Introduction to ChatGPT API: capabilities, classification, and applications. Overview of advanced moderation, enhanced machine reasoning, prompt chaining, and building and evaluating LLM applications.
16 | 
17 | ## Chapter 2: Conversational Chatbots with LangChain
18 | 
19 | Practical chatbot development with LangChain: from environment setup to advanced retrieval. Special attention to context and dialogue memory for "human-like" interactions.
20 | 
21 | ## Chapter 3: LLMOps
22 | 
23 | Structured guide to integrating LLMs into dev workflows: model selection and tuning, deployment, monitoring, automation, and best practices. Practical cases and ethical aspects.
24 | 
25 | This book is not just a collection of techniques, but a comprehensive guide to responsible and innovative AI use. Here you'll find technical, ethical, and practical aspects, as well as a roadmap for development in the rapidly changing world of LLMOps.
26 | 


--------------------------------------------------------------------------------
/docs/en/CHAPTER-3/3.4 Takeaways and Reflections.md:
--------------------------------------------------------------------------------
1 | # 3.4 Takeaways and Reflections
2 | 
3 | We covered the path from integrating LLMs into product development and LLMOps practices to orchestrating ML workflows with Kubeflow Pipelines and implementing a practical AI‑based quiz generator — an end‑to‑end arc showing how engineering and automation turn ideas into working systems. Key LLM takeaway: use a structured approach — deliberate model selection and preparation, thoughtful deployment with observability, continuous monitoring and upkeep; automation streamlines the development/update cycle, and solid prompt management with dynamic tests and A/B experiments is critical for quality. Kubeflow Pipelines demonstrates how reproducible pipelines and automated fine‑tuning (including PEFT for PaLM 2) improve efficiency and reliability — especially with large, complex models. The quiz generator highlighted the applied side: environment setup, dataset creation, prompt engineering, and LangChain for structured prompting combine into a system that generates personalized learning quizzes and serves as a template for interactive educational tools. Overall, the material underscores the transformative potential of LLMs and ML workflows: by following LLMOps best practices, using Kubeflow for automation, and building applied scenarios, you can accelerate innovation and deliver real value. Continuous learning, adaptation to new technology, and AI ethics matter throughout; participation in the community and knowledge‑sharing help tackle challenges and seize opportunities. This chapter lays a foundation for continued innovation in AI apps and offers strategic guidance on leveraging the latest AI/ML advances for practical problems. For further study: Hugging Face Transformers, O’Reilly’s “Introducing MLOps”, Google Cloud’s MLOps fundamentals course, the Kubeflow docs and pipeline automation guides, UNESCO’s resources on AI in education, IBM’s AI ethics overview and Algorithmic Justice League initiatives, plus reviews of interactive learning and quiz platforms like Quizlet.
4 | 


--------------------------------------------------------------------------------
/docs/en/CHAPTER-1/1.7 Takeaways and Reflections.md:
--------------------------------------------------------------------------------
1 | # 1.7 Takeaways and Reflections
2 | 
3 | This closing section brings together key observations about large language models and their practical use. LLMs are trained on massive corpora and generate answers token by token based on the provided context; understanding tokenization helps you manage length, quality, and cost. Quality and safety are not a single mechanism but a toolbox: input filtering and moderation, clear task formulation via prompts, and careful handling of user data. Where explainability matters, advanced reasoning techniques — step‑by‑step chains and decomposition — improve transparency and allow you to verify the model’s thought process. Effective systems must be responsible as well as accurate: prioritize transparency and fairness, protect privacy, and continuously manage risk — ethics and safety matter just as much as engineering.
4 | 
5 | Moving from theory to practice, real‑world cases are invaluable: they show what already works, where bottlenecks appear, how to scale solutions, and how to build user feedback loops. Best practices include regular data and check updates, input validation, logging and metrics collection, plus discussing solutions with the community and experts — all of which accelerates iteration and strengthens reliability. For further learning, we recommend resources that help you operationalize approaches quickly: the OpenAI API docs with a focus on quickstart, best practices, and safety; the Twelve‑Factor App principles for configuration and keeping secrets out of code; Panel for Python as a convenient way to build interactive interfaces and experiment with LLMs; books and articles on chatbot design and AI integration; works on “practical AI” and engineering LLM‑based products.
6 | 
7 | As a parting note: progress with LLMs balances technology and responsibility. Build systems that genuinely improve processes and user experience while accounting for consequences, potential risks, and ethical norms. Learn evaluation methods, refine prompting, automate quality checks, and keep human‑centered design in view — combining these approaches is how you deliver useful, safe products.
8 | 
9 | 


--------------------------------------------------------------------------------
/docs/en/CHAPTER-1/Answers 1.4.md:
--------------------------------------------------------------------------------
 1 | # Answers 1.4
 2 | 
 3 | ## Theory
 4 | 
 5 | 1. Chain of Thought (CoT) breaks problem solving into sequential steps, improving accuracy and making the decision process understandable.
 6 | 2. CoT transparency lets users see the model’s logic, strengthening trust.
 7 | 3. In education, CoT mimics a tutor: guiding step by step and fostering critical thinking.
 8 | 4. In customer support, CoT helps unpack complex requests and arrive at precise answers step by step, reducing agent load.
 9 | 5. Inner Monologue hides intermediate reasoning and shows only the result — unlike CoT, where steps are visible to the user.
10 | 6. For sensitive information, Inner Monologue reduces the chance of accidentally revealing details.
11 | 7. In “guided learning”, Inner Monologue provides hints without “spoiling” the full solution.
12 | 8. Environment prep includes loading the OpenAI key and importing required Python libraries.
13 | 9. `get_response_for_queries` sends prompts to the API and returns the model’s answer, encapsulating the interaction.
14 | 10. CoT prompting guides the model through steps when a direct answer is non‑obvious or requires complex logic.
15 | 11. In support, the system/user prompt structure directs reasoning for detailed product answers.
16 | 12. With Inner Monologue, you can extract only the final part of the answer to keep the interface concise and clear.
17 | 
18 | ## Practice
19 | 
20 | Task 1: CoT — Detailed product answer
21 | 
22 | 1. Implement `detailed_product_info_cot(product_name, user_question)` that uses CoT to build a detailed, stepwise answer.
23 | 2. Steps:
24 |    - Step 1: Identify the product in question.
25 |    - Step 2: Collect key characteristics (type, features, benefits).
26 |    - Step 3: Use the collected data to answer `user_question` clearly and logically.
27 | 
28 | Task 2: Inner Monologue — Concise summary
29 | 
30 | 1. Implement `concise_product_summary_inner_monologue(product_name, user_question)` that uses Inner Monologue to produce a concise answer.
31 | 2. Steps:
32 |    - Internal: perform the same steps as CoT, but do not expose intermediate reasoning.
33 |    - Final: return only a brief, direct answer to `user_question`.
34 | 3. Compare the outputs of both functions and explain their appropriate use cases.
35 | 
36 | 


--------------------------------------------------------------------------------
/docs/en/CHAPTER-2/2.1 Introduction.md:
--------------------------------------------------------------------------------
 1 | # 2.1 Introduction
 2 | 
 3 | LangChain is an open framework that connects large language models (LLMs), such as ChatGPT, to a user’s internal and personal data, enabling you to “talk” to documents and get answers from content that search engines can’t see or that was created after a model was trained. Created by Harrison Chase (co‑founder and CEO of LangChain), it is a key step toward letting organizations and individuals genuinely use their own data. The core idea is to democratize access to information and turn “raw” data into an interactive, dialog‑driven knowledge source: internal reports, research, personal notes — you can now ask about them like you would an assistant, without SQL queries or manual file search, speeding up analysis and making data work far more efficient.
 4 | 
 5 | LangChain’s architecture is modular and built to assemble and deploy LLM applications. At its heart are prompts that set instructions and context for relevant generation; models, i.e. the LLMs that understand context and produce human‑like answers; indexes that speed up indexing and retrieval; chains — multi‑step processing pipelines where you can clean, analyze, and compose final answers; and agents — “orchestrators” that combine tools, manage data flow, and adapt behavior to specific tasks. Together these elements form a flexible platform that can be tailored to almost any data landscape and use case.
 6 | 
 7 | Functionally, LangChain covers the full data lifecycle around LLMs. It supports loading documents from many sources and formats with configurable access and keys; offers pre‑processing — splitting texts into semantically meaningful chunks that preserve context and improve retrieval; implements semantic search via embeddings and similarity measures so that you interact with data by meaning rather than keywords; and, for conversational scenarios, provides “memory” — keeping track of prior messages and maintaining a coherent dialogue — which integrates naturally into chains. This combination makes LangChain a great fit for assistants, analytical tools, and enterprise bots running on private knowledge stores.
 8 | 
 9 | To go deeper, start with the official docs and tutorials, lean on the community, and take a basic LangChain LLM‑app course — you’ll get a fast practical ramp‑up and learn to build solutions that put your internal data to work alongside LLMs.
10 | 
11 | 


--------------------------------------------------------------------------------
/docs/ru/CHAPTER-1/1.7 Итоги и размышления.md:
--------------------------------------------------------------------------------
1 | # 1.7 Итоги и размышления
2 | 
3 | Эта заключительная часть главы собирает воедино ключевые наблюдения о больших языковых моделях и об их практическом применении. LLM обучены на масштабных корпусах и генерируют ответы по токенам, опираясь на переданный контекст; понимание того, как работает токенизация, помогает управлять длиной, качеством и стоимостью вывода. Качество и безопасность — это не один механизм, а совокупность приёмов: фильтрация и модерация пользовательского ввода, корректная постановка задач через промпты и аккуратная работа с пользовательскими данными. Там, где важна объяснимость, хорошо себя показывают продвинутые техники рассуждений — пошаговая цепочка и декомпозиция — они повышают прозрачность решения и позволяют проверять ход мысли модели. При этом эффективные системы должны быть не только точными, но и ответственными: речь о прозрачности и справедливости, о защите приватности и постоянном контроле рисков — этика и безопасность не менее важны, чем инженерные решения.
4 | 
5 | Переходя от теории к практике, полезно опираться на реальные кейсы: они показывают, что уже работает, где возникают узкие места, как масштабировать решения и как выстраивать обратную связь с пользователями. К лучшим практикам относятся регулярные обновления данных и проверок, валидация входа, логирование и сбор метрик, а также обсуждение решений с сообществом и экспертами — всё это ускоряет итерации и повышает надёжность. Для дальнейшего углубления рекомендуем материалы, которые помогут быстро «приземлить» подходы: документацию OpenAI API с акцентом на запуск, лучшие практики и безопасность; принципы Twelve‑Factor App применительно к конфигурации и хранению секретов вне кода; Panel для Python как удобный путь к сборке интерактивных интерфейсов и экспериментов с LLM; книги и статьи по проектированию чат‑ботов и интеграции ИИ‑компонентов; работы о «практичном ИИ» и инженерном подходе к продуктам на базе LLM.
6 | 
7 | В качестве напутствия: развитие в области LLM — это баланс технологий и ответственности. Стройте системы, которые действительно улучшают процессы и опыт пользователей, учитывая последствия решений, потенциальные риски и этические нормы. Осваивайте методики оценки, совершенствуйте промптинг, автоматизируйте проверки качества и не забывайте о человеко‑ориентированном дизайне — именно сочетание этих подходов позволяет делать полезные и безопасные продукты.
8 | 


--------------------------------------------------------------------------------
/DEPLOYMENT.md:
--------------------------------------------------------------------------------
 1 | # 🚀 Deployment Guide
 2 | 
 3 | ## 📋 Overview
 4 | 
 5 | This multilingual MkDocs site has three deployment options:
 6 | 
 7 | ## 🎯 Option 1: Manual Deployment (Recommended)
 8 | 
 9 | ### Local Testing
10 | ```bash
11 | # Test Russian version
12 | mkdocs serve -f mkdocs-ru.yml -a localhost:8001
13 | 
14 | # Test English version  
15 | mkdocs serve -f mkdocs-en.yml -a localhost:8002
16 | 
17 | # Test main landing page
18 | mkdocs serve
19 | ```
20 | 
21 | ### Build All Versions
22 | ```bash
23 | ./deploy.sh
24 | ```
25 | 
26 | ### Deploy to GitHub Pages
27 | ```bash
28 | ./deploy-github.sh
29 | ```
30 | 
31 | ## 🤖 Option 2: GitHub Actions (Automatic)
32 | 
33 | The `.github/workflows/deploy.yml` automatically builds and deploys on every push to main/master.
34 | 
35 | **Setup:**
36 | 1. Go to your repo → Settings → Pages
37 | 2. Set Source to "GitHub Actions"
38 | 3. Push to main branch
39 | 4. Site will be available at: `https://yourusername.github.io/repo-name/`
40 | 
41 | ## 📁 Option 3: Manual GitHub Pages
42 | 
43 | If you prefer the classic `mkdocs gh-deploy`:
44 | 
45 | ```bash
46 | # Build all versions first
47 | ./deploy.sh
48 | 
49 | # Deploy using subtree
50 | git add site/
51 | git commit -m "Deploy documentation"
52 | git subtree push --prefix site origin gh-pages
53 | ```
54 | 
55 | ## 🌐 Site Structure
56 | 
57 | After deployment, your site will have:
58 | 
59 | ```
60 | https://yourdomain.com/
61 | ├── index.html          # Main landing page with language selection
62 | ├── en/                 # English version with full navigation
63 | │   ├── index.html
64 | │   ├── CHAPTER-1/
65 | │   ├── CHAPTER-2/
66 | │   └── CHAPTER-3/
67 | ├── ru/                 # Russian version with full navigation  
68 | │   ├── index.html
69 | │   ├── CHAPTER-1/
70 | │   ├── CHAPTER-2/
71 | │   └── CHAPTER-3/
72 | └── assets/            # Shared assets
73 | ```
74 | 
75 | ## ✅ What Works After Deployment
76 | 
77 | - ✅ Main page with language selection
78 | - ✅ Full Russian navigation at `/ru/`
79 | - ✅ Full English navigation at `/en/`
80 | - ✅ Language switcher in each version
81 | - ✅ Search functionality for each language
82 | - ✅ Mobile-responsive design
83 | - ✅ All Material theme features
84 | 
85 | ## 🔧 Configuration Files
86 | 
87 | - `mkdocs.yml` - Main landing page
88 | - `mkdocs-en.yml` - English version configuration
89 | - `mkdocs-ru.yml` - Russian version configuration
90 | - `deploy.sh` - Local build script
91 | - `deploy-github.sh` - GitHub deployment script
92 | - `.github/workflows/deploy.yml` - GitHub Actions workflow
93 | 


--------------------------------------------------------------------------------
/docs/en/CHAPTER-1/Answers 1.6.md:
--------------------------------------------------------------------------------
 1 | # Answers 1.6
 2 | 
 3 | ## Theory
 4 | 
 5 | 1. Evaluating LLM answers is necessary to understand effectiveness, alignment with goals, and areas to improve. Evaluate accuracy, relevance, and completeness.
 6 | 2. Key metrics: accuracy, recall, F1, and user satisfaction ratings. These guide product development and release decisions.
 7 | 3. The path to production is iterative: start with quick prototypes, find gaps, gradually increase complexity and dataset coverage. Practical value matters more than perfection.
 8 | 4. High‑stakes scenarios (medicine, law, finance) require stricter validation, bias detection/mitigation, and ethical review.
 9 | 5. Best practices: start small, iterate quickly, automate testing and quality checks.
10 | 6. Automated tests speed up gold‑standard comparisons, surface errors, and provide continuous feedback.
11 | 7. Choose metrics and rigor to match the application’s goals and risks; use heightened rigor for high stakes.
12 | 8. A full evaluation framework includes a rubric, protocols (who/what/how), and gold‑standard comparison when needed.
13 | 9. Advanced techniques: semantic similarity (embeddings), crowd evaluation, automated coherence/logic checks, and adaptive schemes tailored to the domain.
14 | 10. Continuous evaluation and diverse test cases increase reliability and relevance across scenarios.
15 | 
16 | ## Practice (sketches)
17 | 
18 | 1. Rubric‑based evaluation function:
19 |     ```python
20 |     def evaluate_response(response: str, rubric: dict) -> dict:
21 |         results = {}
22 |         total_weight = sum(rubric[c]['weight'] for c in rubric)
23 |         total_score = 0
24 |         for criteria, details in rubric.items():
25 |             score = details.get('weight', 1)  # stub — replace with real logic
26 |             feedback = f"Stub feedback for {criteria}."
27 |             results[criteria] = {'score': score, 'feedback': feedback}
28 |             total_score += score * details['weight']
29 |         results['overall'] = {
30 |             'weighted_average_score': total_score / total_weight,
31 |             'feedback': 'Overall feedback based on the rubric.'
32 |         }
33 |         return results
34 |     ```
35 | 
36 | 2. Rubric template:
37 |     ```python
38 |     rubric = {
39 |         'accuracy': {'weight': 3},
40 |         'relevance': {'weight': 2},
41 |         'completeness': {'weight': 3},
42 |         'coherence': {'weight': 2},
43 |     }
44 |     ```
45 | 
46 | 3. The ideal (gold) answer serves as a comparison point for weighted scoring and textual feedback.
47 | 
48 | 


--------------------------------------------------------------------------------
/docs/ru/CHAPTER-2/2.1 Введение.md:
--------------------------------------------------------------------------------
 1 | # 2.1 Введение
 2 | 
 3 | LangChain — открытый фреймворк, который соединяет большие языковые модели (LLM), такие как ChatGPT, с внутренними и персональными данными пользователя, позволяя «разговаривать» с документами и получать ответы из контента, недоступного поисковикам или созданного уже после обучения модели. Идея и реализация принадлежат Харрисону Чейзу (сооснователь и CEO LangChain) и стали важным шагом к тому, чтобы организации и люди могли по‑настоящему использовать свои данные. Суть подхода — демократизировать доступ к информации и превратить «сырые» данные во взаимодействующий диалоговый источник знаний: внутренние отчёты, исследования, личные заметки — всё это теперь можно спрашивать как у ассистента, без SQL‑запросов и ручного поиска по файлам, ускоряя анализ и делая работу с данными заметно эффективнее.
 4 | 
 5 | Архитектура LangChain модульная и приспособлена для сборки и развёртывания LLM‑приложений. В её основе — промпты, задающие инструкции и контекст для релевантной генерации; модели, собственно LLM, которые понимают контекст и формируют человекоподобные ответы; индексы — структуры, ускоряющие индексацию и извлечение данных; цепочки — последовательности шагов обработки, где можно организовать очистку, анализ и финальную сборку ответа; и агенты — «оркестраторы», которые комбинируют инструменты, управляют потоком данных и адаптируют поведение под конкретные задачи. В совокупности эти элементы образуют гибкую платформу, которую легко подстроить под любой ландшафт данных и сценариев.
 6 | 
 7 | Функционально LangChain покрывает полный цикл работы с данными вокруг LLM. Поддерживается загрузка документов из разных источников и форматов с настраиваемым доступом и ключами; предусмотрена предварительная обработка — разбиение текстов на семантические «чанки», сохраняющие контекст и улучшающие извлечение; реализован семантический поиск по эмбеддингам и мерам смысловой близости, чтобы взаимодействовать с данными не по ключевым словам, а по сути; для диалоговых сценариев есть «память» — учёт контекста предыдущих сообщений и поддержание связного диалога, которая органично интегрируется в цепочки. Такая комбинация делает LangChain удобным для построения ассистентов, аналитических инструментов и корпоративных ботов, работающих поверх частных хранилищ знаний.
 8 | 
 9 | Для углублённого освоения лучше всего начать с официальной документации и туториалов, воспользоваться поддержкой сообщества и пройти базовый курс по разработке LLM‑приложений на LangChain — это даёт быстрый практический вход и помогает собирать решения, которые эффективно используют внутренние данные в связке с LLM.
10 | 


--------------------------------------------------------------------------------
/docs/ru/CHAPTER-3/3.4 Итоги и размышления.md:
--------------------------------------------------------------------------------
1 | # 3.4 Итоги и размышления
2 | 
3 | Мы прошли путь от интеграции LLM в продуктовую разработку и практик LLMOps до оркестрации ML‑воркфлоу в Kubeflow Pipelines и реализации прикладного механизма генерации квизов на базе ИИ — цельной цепочки, показывающей, как инженерные подходы и автоматизация превращают идеи в работающие решения. Ключевой вывод в части LLM — необходимость структурного подхода: осознанный выбор и подготовка моделей, продуманное развертывание с наблюдаемостью, непрерывный мониторинг и поддержка; автоматизация упрощает цикл разработки и обновлений, а грамотный промпт‑менеджмент с динамическими тестами и A/B‑экспериментами критичен для качества. Kubeflow Pipelines демонстрирует, как повышать эффективность и надёжность проектов за счёт воспроизводимых пайплайнов и автоматизации тонкой настройки (вплоть до сценариев вроде PEFT для PaLM 2), что особенно важно при работе с большими и сложными моделями. Квиз‑генератор показал прикладную сторону: подготовка окружения, создание датасета, инженерия промптов и использование LangChain для структурирования подсказок складываются в систему, генерирующую индивидуальные учебные квизы и служащую шаблоном для интерактивных образовательных инструментов. В целом материал подчёркивает трансформационный потенциал LLM и ML‑воркфлоу: следуя передовым практикам LLMOps, используя Kubeflow для автоматизации и реализуя прикладные сценарии, можно ускорять инновации и приносить реальную ценность. На этом пути важны непрерывное обучение, адаптация к новым технологиям и этика разработки ИИ; участие в сообществе и обмен опытом помогают эффективнее справляться с вызовами и использовать новые возможности. Эта глава закладывает основу для дальнейших инноваций в ИИ‑приложениях и даёт стратегические ориентиры, как задействовать новейшие достижения ИИ/ML для решения прикладных задач. Для углублённого изучения полезны ресурсы: библиотека Transformers от Hugging Face (https://huggingface.co/transformers/) как комплексная база для трансформеров; введение в MLOps (O’Reilly) (https://www.oreilly.com/library/view/introducing-mlops/9781492083283/) и курс Google Cloud по основам MLOps (https://www.coursera.org/learn/mlops-fundamentals); документация Kubeflow (https://www.kubeflow.org/docs/started/introduction/) и разбор автоматизации пайплайнов (https://towardsdatascience.com/automating-machine-learning-pipelines-with-kubeflow-342fb3e7bbd8); материалы по ИИ в образовании от ЮНЕСКО (https://unesdoc.unesco.org/ark:/48223/pf0000374266) и монография о вызовах/возможностях (https://link.springer.com/book/10.1007/978-3-030-52240-7); руководства по этике ИИ от IBM (https://www.ibm.com/cloud/learn/ethics-in-ai) и инициативы Лиги алгоритмической справедливости (https://www.ajl.org/); а также обзоры по интерактивному обучению и квизам (https://www.edutopia.org/article/creating-educational-quizzes-ai-opportunities-and-challenges) и платформа Quizlet (https://quizlet.com/).
4 | 


--------------------------------------------------------------------------------
/docs/CHAPTER-1/Answers 1.2.md:
--------------------------------------------------------------------------------
 1 | # Answers 1.2 
 2 | 
 3 | ## Theory
 4 | 1. The key components of a message in the context of interacting with OpenAI's GPT models are `role` and `content`. The `role` specifies whether the message is from the system or the user, guiding the AI on how to frame its response. Distinguishing between them is important for simulating a dynamic exchange and for the AI to understand and respond appropriately to the task at hand.
 5 | 
 6 | 2. 'System' messages provide instructions, context, or constraints, shaping the AI's behavior, personality, or response style. 'User' messages, on the other hand, are inputs from the user's perspective, such as queries or statements, that the AI responds to. The distinction helps in crafting interactions that elicit desired responses from the AI.
 7 | 
 8 | 3. An example of how a 'system' message can dictate the AI's behavior is instructing the AI to respond in the style of a whimsical poet. This message sets the tone and style for the AI's responses, ensuring they match the whimsical, poetic context requested by the user.
 9 | 
10 | 4. The sequence of messages influences the AI model's response by providing a contextually rich background for its replies. It ensures that the AI's responses are aligned with both the direct inputs from the user and the overarching instructions or context provided by the system, enabling more nuanced conversations.
11 | 
12 | 5. The categories available for classifying customer feedback in the provided example are "Positive", "Negative", or "Neutral". This classification helps in understanding customer satisfaction and areas of improvement.
13 | 
14 | 6. Classifying the sentiment of a movie review could be beneficial for aggregating consumer opinions on films, helping potential viewers make informed decisions. Categories for classification could include "Positive", "Negative", and "Neutral".
15 | 
16 | 7. Classifying the topic of a news article helps in content management by organizing articles into categories for easier navigation and in recommendation systems by suggesting articles of interest to readers. Examples of categories include "Politics", "Technology", "Sports", and "Entertainment".
17 | 
18 | 8. Classifying customer inquiries is crucial in a business setting to efficiently direct queries to the appropriate department, improving response times and customer satisfaction. Categories could include "Billing", "Technical Support", "Sales", and "General Inquiry".
19 | 
20 | 9. The 'user_message' in AI classification tasks should contain the text that needs to be classified. It should be structured clearly and concisely to provide the AI with enough context to make an accurate classification into predefined categories.
21 | 
22 | 10. Classifying the tone of social media posts benefits content moderation by identifying and managing inappropriate content and informs marketing strategies by analyzing audience engagement. Tone categories could include "Serious", "Humorous", "Inspirational", and "Angry".
23 | 


--------------------------------------------------------------------------------
/docs/en/CHAPTER-3/3.4 Conclusions and Reflections.md:
--------------------------------------------------------------------------------
1 | # 3.4 Conclusions and Reflections
2 | 
3 | We have traveled from integrating LLMs into product development and adopting LLMOps practices to orchestrating ML workflows in Kubeflow Pipelines and implementing an applied AI‑based quiz generation mechanism—a coherent chain showing how engineering approaches and automation turn ideas into working solutions. The key takeaway for the LLM part is the need for a structured approach: deliberate model selection and preparation, thoughtful deployment with observability, continuous monitoring and maintenance; automation streamlines the development and update cycle, and sound prompt management with dynamic tests and A/B experiments is critical for quality. Kubeflow Pipelines demonstrates how to boost efficiency and reliability via reproducible pipelines and automation of fine‑tuning (up to scenarios like PEFT for PaLM 2), which is especially important when working with large and complex models. The quiz generator highlighted the applied side: environment setup, dataset creation, prompt engineering, and using LangChain to structure prompts all come together into a system that generates personalized learning quizzes and serves as a template for interactive educational tools. Overall, the material underscores the transformational potential of LLMs and ML workflows: by following LLMOps best practices, using Kubeflow for automation, and implementing applied scenarios, we can accelerate innovation and deliver real value. Along the way, continuous learning, adaptation to new technologies, and AI development ethics are essential; involvement in the community and knowledge sharing help deal more effectively with challenges and leverage new opportunities. This chapter lays the groundwork for further innovation in AI applications and offers strategic guidance on harnessing the latest AI/ML advances to solve practical problems. For further study, useful resources include: the Transformers library from Hugging Face (https://huggingface.co/transformers/) as a comprehensive base for transformers; Introducing MLOps (O’Reilly) (https://www.oreilly.com/library/view/introducing-mlops/9781492083283/) and Google Cloud’s course on MLOps fundamentals (https://www.coursera.org/learn/mlops-fundamentals); Kubeflow documentation (https://www.kubeflow.org/docs/started/introduction/) and an overview of automating pipelines (https://towardsdatascience.com/automating-machine-learning-pipelines-with-kubeflow-342fb3e7bbd8); materials on AI in education from UNESCO (https://unesdoc.unesco.org/ark:/48223/pf0000374266) and a monograph on challenges/opportunities (https://link.springer.com/book/10.1007/978-3-030-52240-7); AI ethics guides from IBM (https://www.ibm.com/cloud/learn/ethics-in-ai) and the Algorithmic Justice League initiative (https://www.ajl.org/); as well as overviews of interactive learning and quizzes (https://www.edutopia.org/article/creating-educational-quizzes-ai-opportunities-and-challenges) and the Quizlet platform (https://quizlet.com/).
4 | 
5 | 


--------------------------------------------------------------------------------
/docs/ru/CHAPTER-1/Ответы 1.6.md:
--------------------------------------------------------------------------------
 1 | # Ответы 1.6
 2 | 
 3 | ## Теория
 4 | 
 5 | 1.  **Оценка ответов LLM** необходима для понимания их эффективности, соответствия поставленным целям и выявления областей для улучшения. Оценивать следует по точности, релевантности и полноте.
 6 | 2.  **Ключевые метрики**: точность, полнота, F1-мера и рейтинги удовлетворённости пользователей. Эти показатели помогают направлять развитие продукта и принимать решения о выпуске изменений.
 7 | 3.  **Переход к продакшену** — это итеративный процесс: от быстрых прототипов к выявлению недостатков, постепенному усложнению и расширению датасетов. При этом важна практическая ценность, а не достижение «идеальности».
 8 | 4.  Для **high-stakes сценариев** (медицина, право, финансы) требуется строгая проверка, расширенные валидации, обнаружение и смягчение предвзятости (bias), а также этическая экспертиза.
 9 | 5.  **Лучшие практики**: начинать с малого, быстро итерировать, автоматизировать тестирование и контроль качества.
10 | 6.  **Автоматизированные тесты** ускоряют сравнение с эталоном, помогают выявлять ошибки и обеспечивают непрерывную обратную связь.
11 | 7.  **Подбор метрик и строгости проверки** должен соответствовать целям и рискам приложения; для high-stakes сценариев требуется повышенная строгость.
12 | 8.  **Полноценный фреймворк оценки** включает: рубрику критериев, протоколы (кто, как и чем оценивает), а также сравнение с эталоном при необходимости.
13 | 9.  **Продвинутые техники оценки**: семантическая близость (с использованием эмбеддингов), крауд-оценка, автоматические проверки когерентности и логики, а также адаптивные схемы оценки под конкретный домен.
14 | 10. **Непрерывная оценка** и использование разнообразных тест-кейсов повышают надёжность и релевантность ответов в различных сценариях.
15 | 
16 | ## Практика (эскизы)
17 | 
18 | 1.  **Функция оценки по рубрике**:
19 |     ```python
20 |     def evaluate_response(response: str, rubric: dict) -> dict:
21 |         results = {}
22 |         total_weight = sum(rubric[c]['weight'] for c in rubric)
23 |         total_score = 0
24 |         for criteria, details in rubric.items():
25 |             score = details.get('weight', 1)  # заглушка — замените на реальную логику
26 |             feedback = f"Заглушка обратной связи для {criteria}."
27 |             results[criteria] = {'score': score, 'feedback': feedback}
28 |             total_score += score * details['weight']
29 |         results['overall'] = {
30 |             'weighted_average_score': total_score / total_weight,
31 |             'feedback': 'Общая обратная связь на основе рубрики.'
32 |         }
33 |         return results
34 |     ```
35 | 
36 | 2.  **Заготовка рубрики**:
37 |     ```python
38 |     rubric = {
39 |         'accuracy': {'weight': 3},
40 |         'relevance': {'weight': 2},
41 |         'completeness': {'weight': 3},
42 |         'coherence': {'weight': 2},
43 |     }
44 |     ```
45 | 
46 | 3.  **Идеальный ответ как эталон** — это сравнение по критериям с взвешиванием и текстовой обратной связью.
47 | 
48 | 


--------------------------------------------------------------------------------
/docs/ru/CHAPTER-1/Ответы 1.4.md:
--------------------------------------------------------------------------------
 1 | # Ответы 1.4
 2 | 
 3 | ## Теория
 4 | 
 5 | 1. Цепочка рассуждений (Chain of Thought, CoT) разбивает решение задачи на последовательные шаги, что повышает точность и делает понятным ход принятия решения.
 6 | 2. Прозрачность CoT позволяет пользователю видеть логику модели, что укрепляет доверие к ней.
 7 | 3. В обучении CoT имитирует работу «наставника»: ведёт по шагам, развивая критическое мышление.
 8 | 4. В поддержке клиентов CoT помогает разбирать сложные запросы и поэтапно приходить к точному ответу, снижая нагрузку на операторов.
 9 | 5. «Внутренний монолог» (Inner Monologue) скрывает промежуточные рассуждения и показывает только результат — в отличие от CoT, где шаги видны пользователю.
10 | 6. Для фильтрации конфиденциальной информации Inner Monologue полезен, поскольку исключает случайное раскрытие деталей.
11 | 7. В сценариях «управляемого обучения» Inner Monologue позволяет давать подсказки без «спойлеров» полного решения.
12 | 8. Подготовка окружения включает загрузку ключа OpenAI и импорт необходимых библиотек Python.
13 | 9. Функция `get_response_for_queries` отправляет промпты в API и возвращает ответ модели, инкапсулируя логику взаимодействия.
14 | 10. CoT-промптинг направляет модель по заданным шагам в тех случаях, где прямой ответ неочевиден или требует сложной логики.
15 | 11. В поддержке клиентов структура system/user-промптов направляет рассуждение для получения детальных ответов о товарах.
16 | 12. При использовании Inner Monologue извлекается только финальная часть ответа — это позволяет сохранить лаконичность и понятность интерфейса для пользователя.
17 | 
18 | ## Практика
19 | 
20 | **Задание 1: CoT-промптинг — Детальный ответ о продукте**
21 | 
22 | 1.  **Создание функции `detailed_product_info_cot`**: Необходимо написать функцию, которая принимает `product_name` и `user_question`, используя CoT для последовательного построения детального ответа.
23 | 2.  **Шаги CoT**:
24 |     *   **Шаг 1: Идентификация продукта**: Модель должна определить, о каком продукте идёт речь.
25 |     *   **Шаг 2: Сбор основной информации**: Модель собирает ключевые характеристики продукта (например, тип, функции, преимущества).
26 |     *   **Шаг 3: Формирование ответа на вопрос пользователя**: Модель использует собранные данные для ответа на конкретный вопрос `user_question`, сохраняя логику и последовательность.
27 | 
28 | **Задание 2: Inner Monologue — Краткий итоговый ответ**
29 | 
30 | 1.  **Создание функции `concise_product_summary_inner_monologue`**: Разработайте функцию, которая также принимает `product_name` и `user_question`, но использует Inner Monologue для формирования краткого ответа.
31 | 2.  **Шаги Inner Monologue**:
32 |     *   **Внутренний этап**: Модель проводит те же шаги, что и в CoT (идентификация продукта, сбор информации, формирование ответа), но эти промежуточные рассуждения не выводятся.
33 |     *   **Финальный этап**: Модель возвращает только итоговый, лаконичный ответ на `user_question`.
34 | 3.  **Сравнение**: В итоге сравните ответы двух функций — `detailed_product_info_cot` и `concise_product_summary_inner_monologue` — и объясните разницу в их применении.
35 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # 📖 LLMOps: Make AI Work For You
 2 | 
 3 | > A comprehensive, hands-on guide for data scientists, ML engineers, and software developers to build real products with Large Language Models.
 4 | 
 5 | [![GitHub Pages](https://img.shields.io/badge/docs-GitHub%20Pages-blue)](https://boramorka.github.io/LLM-Book/)
 6 | [![License](https://img.shields.io/badge/license-MIT-green)](LICENSE)
 7 | [![MkDocs](https://img.shields.io/badge/docs-MkDocs-blue)](https://www.mkdocs.org/)
 8 | 
 9 | ## 🌍 Available Languages
10 | 
11 | - **🇺🇸 [English](https://boramorka.github.io/LLM-Book/en/)** - Full English documentation
12 | - **🇷🇺 [Русский](https://boramorka.github.io/LLM-Book/ru/)** - Полная русская документация
13 | 
14 | ## 📚 About the Book
15 | 
16 | This book is a practical guide by **Nikita Goryachev** covering:
17 | 
18 | ### 📖 Chapter 1: OpenAI API Fundamentals
19 | - Introduction to ChatGPT API capabilities, classification, and applications
20 | - Advanced moderation and machine reasoning techniques  
21 | - Prompt chaining strategies
22 | - Building and evaluating LLM applications
23 | 
24 | ### 🔗 Chapter 2: Conversational Chatbots with LangChain
25 | - Practical chatbot development with LangChain
26 | - Environment setup to advanced retrieval
27 | - Context and dialogue memory for human-like interactions
28 | 
29 | ### ⚙️ Chapter 3: LLMOps
30 | - Structured guide to integrating LLMs into development workflows
31 | - Model selection, tuning, deployment, and monitoring
32 | - Automation, best practices, and ethical considerations
33 | 
34 | ## 🚀 Quick Start
35 | 
36 | ### Reading Online
37 | Visit our documentation site: **[boramorka.github.io/LLM-Book](https://boramorka.github.io/LLM-Book/)**
38 | 
39 | ### Local Development
40 | 
41 | ```bash
42 | # Clone the repository
43 | git clone https://github.com/boramorka/LLM-Book.git
44 | cd LLM-Book
45 | 
46 | # Install dependencies
47 | pip install mkdocs mkdocs-material
48 | 
49 | # Serve English version
50 | mkdocs serve -f mkdocs-en.yml -a localhost:8002
51 | 
52 | # Serve Russian version  
53 | mkdocs serve -f mkdocs-ru.yml -a localhost:8001
54 | ```
55 | 
56 | ### Building All Versions
57 | 
58 | ```bash
59 | # Build all language versions
60 | ./deploy.sh
61 | 
62 | # Deploy to GitHub Pages
63 | ./deploy-github.sh
64 | ```
65 | 
66 | ## 👨‍💻 About the Author
67 | 
68 | **Nikita Goryachev** is a Senior AI/ML Engineer at Sber, leading a team that implements state-of-the-art algorithms in NLP and recommendation systems. The team organizes industry meetups, participates in conferences (RecSys in Singapore, AI Journey in Moscow), and develops RePlay — an open-source library for recommendations.
69 | 
70 | ## 🤝 Contributing
71 | 
72 | This book is open for contributions! Feel free to:
73 | 
74 | - Report issues or typos
75 | - Suggest improvements
76 | - Submit translations to other languages
77 | - Add practical examples
78 | 
79 | ## 📄 License
80 | 
81 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
82 | 
83 | ## 🔗 Links
84 | 
85 | - 📖 **Documentation**: [boramorka.github.io/LLM-Book](https://boramorka.github.io/LLM-Book/)
86 | - 🐙 **Repository**: [github.com/boramorka/LLM-Book](https://github.com/boramorka/LLM-Book)
87 | - 💼 **Author**: [Nikita Goryachev](https://github.com/boramorka)
88 | 
89 | ---
90 | 
91 | ⭐ **Star this repository if it helped you!**


--------------------------------------------------------------------------------
/mkdocs-ru.yml:
--------------------------------------------------------------------------------
 1 | site_name: LLMOps. Make AI Work For You. 
 2 | site_url: https://boramorka.github.io/LLM-book/ru/
 3 | repo_url: https://github.com/boramorka/LLM-book
 4 | repo_name: boramorka/LLM-book
 5 | 
 6 | theme:
 7 |   name: material
 8 |   features:
 9 |     - navigation.sections
10 |     - navigation.expand
11 |     - navigation.indexes
12 |     - navigation.top
13 |     - search.suggest
14 |     - search.highlight
15 |     - content.tabs.link
16 |     - content.code.annotation
17 |     - content.code.copy
18 |   palette:
19 |     primary: black
20 |   language: ru
21 | 
22 | docs_dir: docs/ru
23 | site_dir: site/ru
24 | 
25 | markdown_extensions:
26 |   - meta
27 |   - pymdownx.highlight
28 |   - pymdownx.superfences
29 |   - pymdownx.tasklist:
30 |       custom_checkbox: true
31 | 
32 | plugins:
33 |   - search:
34 |       lang:
35 |         - ru
36 | 
37 | extra:
38 |   alternate:
39 |     - name: English
40 |       link: ../en/
41 |       lang: en
42 |     - name: Русский
43 |       link: ./
44 |       lang: ru
45 | 
46 | nav:
47 |   - Обзор: index.md
48 |   - ГЛАВА-1. OPEN AI API:
49 |       - 1.1 Введение: 'CHAPTER-1/1.1 Введение.md'
50 |       - 1.2 Классификация: 'CHAPTER-1/1.2 Классификация.md'
51 |       - 1.3 Advanced Moderation: 'CHAPTER-1/1.3 Advanced Moderaton.md'
52 |       - 1.4 Продвинутое машинное рассуждение: 'CHAPTER-1/1.4 Продвинутое машинное рассуждение.md'
53 |       - 1.5 Сила чейнинга промптов: 'CHAPTER-1/1.5 Сила чейнинга промптов.md'
54 |       - 1.6 Построение и оценка LLM-приложений: 'CHAPTER-1/1.6 Построение и оценка LLM-приложений.md'
55 |       - 1.7 Итоги и размышления: 'CHAPTER-1/1.7 Итоги и размышления.md'
56 |   - ГЛАВА-2. LANGCHAIN:
57 |       - 2.1 Введение: 'CHAPTER-2/2.1 Введение.md'
58 |       - 2.2 Загрузчики документов LangChain: 'CHAPTER-2/2.2 Загрузчики документов LangChain.md'
59 |       - 2.3 Углубление в текстовое разбиение: 'CHAPTER-2/2.3 Углубление в текстовое разбиение.md'
60 |       - 2.4 Сила эмбеддингов: 'CHAPTER-2/2.4 Сила эмбеддингов.md'
61 |       - 2.5 Семантический поиск. Продвинутые стратегии: 'CHAPTER-2/2.5 Семантический поиск. Продвинутые стратегии.md'
62 |       - 2.6 RAG. Техники для QA: 'CHAPTER-2/2.6 RAG. Техники для QA.md'
63 |       - 2.7 Чат-боты на LangChain: 'CHAPTER-2/2.7 Чат-боты на LangChain.md'
64 |       - 2.8 Итоги и размышления: 'CHAPTER-2/2.8 Итоги и размышления.md'
65 |   - ГЛАВА-3. LLMOPS:
66 |       - 3.1 Введение: 'CHAPTER-3/3.1 Введение.md'
67 |       - 3.2 Воркфлоу с Kubeflow Pipelines: 'CHAPTER-3/3.2 Воркфлоу с Kubeflow Pipelines.md'
68 |       - 3.3 Механизм генерации квиза ИИ: 'CHAPTER-3/3.3 Механизм генерации квиза ИИ.md'
69 |       - 3.4 Итоги и размышления: 'CHAPTER-3/3.4 Итоги и размышления.md'
70 |   - Ответы:
71 |       - 1.1 Введение: 'CHAPTER-1/Ответы 1.1.md'
72 |       - 1.2 Классификация: 'CHAPTER-1/Ответы 1.2.md'
73 |       - 1.3 Advanced Moderation: 'CHAPTER-1/Ответы 1.3.md'
74 |       - 1.4 Продвинутое рассуждение: 'CHAPTER-1/Ответы 1.4.md'
75 |       - 1.5 Чейнинг промптов: 'CHAPTER-1/Ответы 1.5.md'
76 |       - 1.6 Построение и оценка: 'CHAPTER-1/Ответы 1.6.md'
77 |       - 2.2 Загрузчики документов: 'CHAPTER-2/Ответы 2.2.md'
78 |       - 2.3 Разбиение текста: 'CHAPTER-2/Ответы 2.3.md'
79 |       - 2.4 Эмбеддинги: 'CHAPTER-2/Ответы 2.4.md'
80 |       - 2.5 Семантический поиск: 'CHAPTER-2/Ответы 2.5.md'
81 |       - 2.6 RAG: 'CHAPTER-2/Ответы 2.6.md'
82 |       - 2.7 Чат-боты: 'CHAPTER-2/Ответы 2.7.md'
83 |       - 3.2 Kubeflow Pipelines: 'CHAPTER-3/Ответы 3.2.md'
84 |       - 3.3 Генерация квиза: 'CHAPTER-3/Ответы 3.3.md'
85 | 


--------------------------------------------------------------------------------
/docs/CHAPTER-2/2.8 Summary and Reflections.md:
--------------------------------------------------------------------------------
 1 | # 2.8 Summary and Reflections
 2 | 
 3 | The journey of creating conversational chatbots using LangChain, as detailed in the comprehensive guide, represents a significant stride towards enhancing interactive user interfaces with a deep understanding of natural language. This chapter not only illuminated the steps necessary for building such advanced systems but also emphasized the intricate dance between technological innovation and user experience design.
 4 | 
 5 | ### The Road to Conversational AI
 6 | 
 7 | The progression from setting up the environment, through loading documents and creating a vector store, to the implementation of advanced retrieval techniques, underlines the multifaceted nature of chatbot development. Each phase of the process builds upon the previous, laying a foundation for a chatbot capable of contextual understanding and dynamic interactions. The use of LangChain as a bridge to connect language models with proprietary or personal data is a testament to the evolving landscape of AI, where accessibility and integration of data become paramount.
 8 | 
 9 | ### Enhancing Interactivity through Memory and Context
10 | 
11 | One of the most notable advancements discussed in the chapter is the incorporation of conversational context and memory. This approach marks a departure from static, one-off interactions, moving towards a more fluid dialogue where the chatbot not only responds to immediate queries but also understands the thread of the conversation. Implementing conversation buffer memory and building a conversational retrieval chain highlight the intricate work behind making chatbots not just responsive but contextually aware.
12 | 
13 | ### Technical Sophistication Meets User Centricity
14 | 
15 | As the chapter traverses the technical aspects of chatbot development, from loading documents to creating conversational retrieval chains, it also underscores the importance of user-centric design. The meticulous detailing of processes, combined with practical examples and code snippets, exemplifies how technical sophistication can be harnessed to meet user needs. The focus on conversational memory, in particular, showcases an acute awareness of the nuances of human dialogue, which is crucial for creating engaging and meaningful user experiences.
16 | 
17 | ### Reflections on the Evolution of Conversational AI
18 | 
19 | Reflecting on the content of the chapter, it is evident that the development of conversational chatbots is not just a technical endeavor but a holistic approach to enhancing how we interact with technology. The interplay between advanced retrieval techniques, contextual understanding, and memory integration reveals the depth of thought and innovation driving the evolution of conversational AI. The journey from conceptualization to implementation encapsulated in this chapter serves as both a guide and an inspiration for those venturing into the realm of chatbot development.
20 | 
21 | ### Towards a Future of Enhanced Digital Interactions
22 | 
23 | In conclusion, this chapter not only provides a blueprint for building conversational chatbots but also paints a vision for the future of digital interactions. The advancements in conversational AI, as demonstrated through the development process using LangChain, open new avenues for creating more intuitive, responsive, and human-like interfaces. As we move forward, the fusion of technical innovation with a keen understanding of user needs will undoubtedly shape the next generation of digital experiences, making technology more accessible, engaging, and, ultimately, more human.


--------------------------------------------------------------------------------
/docs/en/CHAPTER-3/3.1 Introduction.md:
--------------------------------------------------------------------------------
 1 | # 3.1 Introduction
 2 | 
 3 | Bringing large language models into the software development process is the next turn in the evolution of AI products. This section is a practical introduction to LLMOps, covering the full lifecycle of LLM‑based applications: from model selection and fine‑tuning to production deployment, monitoring, and ongoing operations. LLMs understand and generate human‑like text, so they are used for summarization, classification, content generation, and many other tasks. Their strengths are broad knowledge from training on large corpora, adaptability to a wide range of scenarios without heavy task‑specific training, and the ability to work with context and capture nuance. Building on this, LLMOps acts as the LLM‑focused layer of MLOps: model selection and domain preparation, thoughtful deployment to meet SLAs, continuous monitoring with metrics and alerts, plus security and privacy with ethical principles and data protection.
 4 | 
 5 | An LLMOps roadmap typically includes several steps. First, choose a model by size, training data, and benchmarks: match metrics to your task and prepare a fine‑tuning dataset that faithfully reflects the domain and goals. Next, design deployment architecture and infrastructure: plan for scale with headroom for peaks, minimize latency via caching and shorter execution paths, and account for integrations. In production, rely on continuous monitoring to catch degradation and data drift; define KPI/SLI up front, and bake in regular updates and regression tests. Throughout, protect privacy and security: anonymize sensitive fields, control access to models, prevent abuse, and formalize a responsible‑AI policy.
 6 | 
 7 | An LLM app’s structure typically involves selection and fine‑tuning: evaluate available options and their fit to your requirements, then adapt the model to your domain using prompt engineering, PEFT/LoRA, and other methods — paying attention to infrastructure compatibility and to the cost/efficiency balance of tuning techniques. Deployment is often a REST API around the model or an orchestrator; observability and real‑time metric tracking are critical to understand model health and react quickly to incidents. Automate anything repetitive: prompt management with versioning and A/B tests, automated tests and CI/CD, orchestration of multi‑step LLM chains and their dependencies. Data preparation underpins effective tuning: use SQL/ETL and open tooling to build clean data marts; orchestrate complex workflows to meet SLAs, with retries and idempotency as first‑class properties.
 8 | 
 9 | Best practices rest on three pillars: automation (tests and CI/CD speed up reliable releases), prompt management (context‑aware dynamics and steady A/B testing improve quality), and case‑by‑case scaling (a modular architecture that adds new scenarios without breaking existing ones, and capacity planning for load). Given how fast LLMs and MLOps change, build in flexibility: follow trends, engage with the community, and regularly take courses and workshops.
10 | 
11 | From practice: automating support with an LLM chatbot plus dynamic prompt management reduces response time and improves service quality; in publishing, a summarization‑and‑editing pipeline together with prompt management radically speeds article production. Overall, a structured approach to LLMOps — with automation, solid prompt management, thoughtful scalability, and a culture of continuous learning — is key to building and operating successful LLM applications. For deeper study, keep these at hand: WhyLabs’ “A Guide to LLMOps” with material on prompts, evaluation, testing, and scaling; Weights & Biases “Understanding LLMOps” — a review of open and proprietary LLMs with monitoring practices; and the DataRobot AI Wiki, which positions LLMOps as a subset of MLOps and covers adjacent topics.
12 | 
13 | 


--------------------------------------------------------------------------------
/mkdocs-en.yml:
--------------------------------------------------------------------------------
 1 | site_name: LLMOps. Make AI Work For You. 
 2 | site_url: https://boramorka.github.io/LLM-book/en/
 3 | repo_url: https://github.com/boramorka/LLM-book
 4 | repo_name: boramorka/LLM-book
 5 | 
 6 | theme:
 7 |   name: material
 8 |   features:
 9 |     - navigation.sections
10 |     - navigation.expand
11 |     - navigation.indexes
12 |     - navigation.top
13 |     - search.suggest
14 |     - search.highlight
15 |     - content.tabs.link
16 |     - content.code.annotation
17 |     - content.code.copy
18 |   palette:
19 |     primary: black
20 |   language: en
21 | 
22 | docs_dir: docs/en
23 | site_dir: site/en
24 | 
25 | markdown_extensions:
26 |   - meta
27 |   - pymdownx.highlight
28 |   - pymdownx.superfences
29 |   - pymdownx.tasklist:
30 |       custom_checkbox: true
31 | 
32 | plugins:
33 |   - search:
34 |       lang:
35 |         - en
36 | 
37 | extra:
38 |   alternate:
39 |     - name: English
40 |       link: ./
41 |       lang: en
42 |     - name: Русский
43 |       link: ../ru/
44 |       lang: ru
45 | 
46 | nav:
47 |   - Overview: index.md
48 |   - CHAPTER-1. OPEN AI API:
49 |       - 1.1 Introduction: 'CHAPTER-1/1.1 Introduction.md'
50 |       - 1.2 Classification: 'CHAPTER-1/1.2 Classification.md'
51 |       - 1.3 Advanced Moderation: 'CHAPTER-1/1.3 Advanced Moderation.md'
52 |       - 1.4 Elevating Machine Reasoning: 'CHAPTER-1/1.4 Advanced Machine Reasoning.md'
53 |       - 1.5 The Power of Prompt Chaining: 'CHAPTER-1/1.5 The Power of Prompt Chaining.md'
54 |       - 1.6 Building and Evaluating LLM Applications: 'CHAPTER-1/1.6 Building and Evaluating LLM Applications.md'
55 |       - 1.7 Summary and Reflections: 'CHAPTER-1/1.7 Takeaways and Reflections.md'
56 |   - CHAPTER-2. LANGCHAIN:
57 |       - 2.1 Introduction: 'CHAPTER-2/2.1 Introduction.md'
58 |       - 2.2 LangChain Document Loaders: 'CHAPTER-2/2.2 LangChain Document Loaders.md'
59 |       - 2.3 Deep Dive into Text Splitting: 'CHAPTER-2/2.3 Deep Dive into Text Splitting.md'
60 |       - 2.4 The Power of Embeddings: 'CHAPTER-2/2.4 The Power of Embeddings.md'
61 |       - 2.5 Semantic Search. Advanced Retrieval Strategies: 'CHAPTER-2/2.5 Semantic Search — Advanced Strategies.md'
62 |       - 2.6 RAG Systems. Techniques for Question Answering: 'CHAPTER-2/2.6 RAG — Techniques for QA.md'
63 |       - 2.7 Building Chatbots with LangChain: 'CHAPTER-2/2.7 Chatbots with LangChain.md'
64 |       - 2.8 Summary and Reflections: 'CHAPTER-2/2.8 Takeaways and Reflections.md'
65 |   - CHAPTER-3. LLMOPS:
66 |       - 3.1 Introduction: 'CHAPTER-3/3.1 Introduction.md'
67 |       - 3.2 Mastering LLM Workflows with Kubeflow Pipelines: 'CHAPTER-3/3.2 Workflow with Kubeflow Pipelines.md'
68 |       - 3.3 Implementing the AI Quiz Generation Mechanism: 'CHAPTER-3/3.3 AI Quiz Generation Mechanism.md'
69 |       - 3.4 Summary and Reflections: 'CHAPTER-3/3.4 Takeaways and Reflections.md'
70 |   - Answers:
71 |       - 1.1 Introduction: 'CHAPTER-1/Answers 1.1.md'
72 |       - 1.2 Classification: 'CHAPTER-1/Answers 1.2.md'
73 |       - 1.3 Advanced Moderation: 'CHAPTER-1/Answers 1.3.md'
74 |       - 1.4 Elevating Machine Reasoning: 'CHAPTER-1/Answers 1.4.md'
75 |       - 1.5 The Power of Prompt Chaining: 'CHAPTER-1/Answers 1.5.md'
76 |       - 1.6 Building and Evaluating LLM Applications: 'CHAPTER-1/Answers 1.6.md'
77 |       - 2.2 LangChain Document Loaders: 'CHAPTER-2/Answers 2.2.md'
78 |       - 2.3 Deep Dive into Text Splitting: 'CHAPTER-2/Answers 2.3.md'
79 |       - 2.4 The Power of Embeddings: 'CHAPTER-2/Answers 2.4.md'
80 |       - 2.5 Semantic Search. Advanced Retrieval Strategies: 'CHAPTER-2/Answers 2.5.md'
81 |       - 2.6 RAG Systems. Techniques for Question Answering: 'CHAPTER-2/Answers 2.6.md'
82 |       - 2.7 Building Chatbots with LangChain: 'CHAPTER-2/Answers 2.7.md'
83 |       - 3.2 Mastering LLM Workflows with Kubeflow Pipelines: 'CHAPTER-3/Answers 3.2.md'
84 |       - 3.3 Implementing the AI Quiz Generation Mechanism: 'CHAPTER-3/Answers 3.3.md'
85 | 


--------------------------------------------------------------------------------
/docs/ru/CHAPTER-3/3.1 Введение.md:
--------------------------------------------------------------------------------
 1 | # 3.1 Введение
 2 | 
 3 | Интеграция больших языковых моделей в процессы разработки — следующий виток эволюции ИИ‑продуктов. Этот раздел — практическое введение в LLMOps, охватывающее полный жизненный цикл приложений на базе LLM: от выбора и донастройки модели до продакшен‑деплоя, мониторинга и регулярной поддержки. Сами LLM понимают и генерируют текст, близкий к человеческому, поэтому применяются для суммаризации, классификации, генерации контента и множества других задач; их сила — в обширных знаниях благодаря обучению на больших корпусах, адаптивности к широкому спектру сценариев без жёсткой узкоспециализированной подготовки и умении работать с контекстом, улавливая нюансы. На этой основе LLMOps выступает специализированным слоем MLOps: сюда входят выбор и подготовка модели под домен, продуманное развертывание под заданные SLA, непрерывный мониторинг с метриками и алертами, а также безопасность и приватность — с этическими принципами и защитой данных.
 4 | 
 5 | Дорожная карта LLMOps складывается из нескольких шагов. Сначала — выбор модели по размерам, данным обучения и бенчмаркам: сопоставляйте метрики именно вашей задаче и готовьте датасет для тюнинга, который правдиво отражает домен и цели. Далее — архитектура и инфраструктура развертывания: планируйте масштабирование с запасом под пики, минимизируйте задержки через кэширование и сокращение путей выполнения, учитывайте интеграции. На продакшене — непрерывный мониторинг, позволяющий замечать деградации и сдвиги данных; заранее договоритесь о KPI/SLI, закладывайте регулярные обновления и регрессионные тесты. И всё время — соблюдайте приватность и безопасность: анонимизируйте чувствительные поля, разграничивайте доступ к моделям, предотвращайте злоупотребления и формализуйте политику ответственного ИИ.
 6 | 
 7 | Структура разработки LLM‑приложений обычно включает выбор и донастройку: сначала оцените доступные варианты и их соответствие требованиям, затем адаптируйте модель под специфику вашего домена с помощью инженерии промптов, PEFT/LoRA и других подходов — обращая внимание на совместимость с инфраструктурой и на баланс стоимости/эффективности методов тюнинга. Развертывание часто реализуют через REST API вокруг модели или оркестратора; критично обеспечить наблюдаемость и трекинг метрик в реальном времени, чтобы видеть состояние модели и быстро реагировать на инциденты. Автоматизируйте всё, что повторяется: управление промптами с версионированием и A/B‑тестами, автотесты и CI/CD, оркестрацию многошаговых цепочек вызовов LLM и их зависимостей. Подготовка данных — фундамент эффективности тюнинга: используйте SQL/ETL и открытые инструменты для аккуратных витрин; оркестрируйте сложные воркфлоу так, чтобы соблюдались SLA, а повторные попытки и идемпотентность были встроенными свойствами процессов.
 8 | 
 9 | Лучшие практики сводятся к трём опорам: автоматизация (автотесты и CI/CD дают скорость и стабильность релизов), управление промптами (динамика под контекст и планомерные A/B‑тесты повышают качество), масштабирование по кейсам (модульная архитектура, добавляющая новые сценарии без поломки существующих, и планирование ресурсных квот под нагрузку). Учитывая, как быстро меняются LLM и MLOps, закладывайте гибкость: следите за трендами, общайтесь с сообществом, регулярно проходите курсы и воркшопы.
10 | 
11 | Из практики: автоматизация поддержки через LLM‑чат‑бота с динамическим управлением промптами сокращает время ответа и повышает качество сервиса; в издательских платформах пайплайн суммаризации и редактирования вместе с управлением промптами радикально ускоряет подготовку материалов. В целом, структурный подход к LLMOps — с автоматизацией, грамотным промпт‑менеджментом, продуманной масштабируемостью и культурой непрерывного обучения — это ключ к успешному созданию и эксплуатации LLM‑приложений. Для углубления держите под рукой проверенные источники: A Guide to LLMOps (WhyLabs) с материалами по промптам, оценке, тестированию и масштабированию; Weights & Biases: Understanding LLMOps — обзор открытых и проприетарных LLM с практиками мониторинга; и DataRobot AI Wiki, рассматривающий LLMOps как подмножество MLOps и смежные темы.
12 | 


--------------------------------------------------------------------------------
/docs/en/CHAPTER-1/Answers 1.3.md:
--------------------------------------------------------------------------------
  1 | # Answers 1.3
  2 | 
  3 | ## Theory
  4 | 
  5 | 1. Integration of the OpenAI Moderation API: obtain an API key, add a client library on the backend, and insert moderation into the content submission pipeline so all data is analyzed before publication.
  6 | 2. Customization: tune sensitivity, focus on specific violations, and use your own allow/deny lists in line with community standards and compliance requirements.
  7 | 3. Extending moderation: beyond text, add checks for images and video (using OpenAI tools or third‑party solutions) for comprehensive protection.
  8 | 4. Delimiters reduce prompt‑injection risk by separating user input from system instructions and preserving command integrity.
  9 | 5. Isolating commands with delimiters clearly separates executable instructions from user data, preventing injection of malicious directives.
 10 | 6. Additional measures: strict input validation, least‑privilege design, allow‑lists, regular expressions, monitoring, and logging to detect anomalies.
 11 | 7. Direct assessment: ask the model to classify input as an injection attempt or not — this reduces false positives and improves response accuracy.
 12 | 8. Response measures: notify and educate users, ask them to rephrase, isolate suspicious content for human review, and dynamically adjust sensitivity.
 13 | 9. Pros and cons of direct assessment: accuracy and adaptability versus development/maintenance complexity and the need to balance security with UX.
 14 | 10. Combining the Moderation API with anti‑injection strategies significantly improves the safety and integrity of UGC platforms.
 15 | 
 16 | ## Practice (sketches)
 17 | 
 18 | 1. Moderate a single text fragment:
 19 | ```python
 20 | from openai import OpenAI
 21 | 
 22 | client = OpenAI()
 23 | 
 24 | def moderate_content(content: str) -> bool:
 25 |     resp = client.moderations.create(model="omni-moderation-latest", input=content)
 26 |     return bool(resp.results[0].flagged)
 27 | ```
 28 | 
 29 | 2. Remove a delimiter from a string:
 30 | ```python
 31 | def sanitize_delimiter(input_text: str, delimiter: str) -> str:
 32 |     return input_text.replace(delimiter, "")
 33 | ```
 34 | 
 35 | 3. Check input length:
 36 | ```python
 37 | def validate_input_length(input_text: str, min_length=1, max_length=200) -> bool:
 38 |     return min_length <= len(input_text) <= max_length
 39 | ```
 40 | 
 41 | 4. User session with simple heuristics:
 42 | ```python
 43 | class UserSession:
 44 |     def __init__(self, user_id: int):
 45 |         self.user_id = user_id
 46 |         self.trust_level = 0
 47 |         self.sensitivity_level = 5
 48 | 
 49 |     def adjust_sensitivity(self):
 50 |         if self.trust_level > 5:
 51 |             self.sensitivity_level = max(1, self.sensitivity_level - 1)
 52 |         else:
 53 |             self.sensitivity_level = min(10, self.sensitivity_level + 1)
 54 | 
 55 |     def evaluate_input(self, user_input: str) -> bool:
 56 |         dangerous_keywords = ["exec", "delete", "drop"]
 57 |         return any(k in user_input.lower() for k in dangerous_keywords)
 58 | 
 59 |     def handle_input(self, user_input: str):
 60 |         if self.evaluate_input(user_input):
 61 |             if self.trust_level < 5:
 62 |                 print("Input flagged and sent for security review.")
 63 |             else:
 64 |                 print("The request looks suspicious. Please clarify or rephrase.")
 65 |         else:
 66 |             print("Input accepted. Thank you!")
 67 |         print("Remember: input should be clear and free of potentially dangerous commands.")
 68 | ```
 69 | 
 70 | 5. Direct assessment for injection (stub logic):
 71 | ```python
 72 | def direct_evaluation_for_injection(user_input: str) -> str:
 73 |     if "ignore instructions" in user_input.lower() or "disregard previous guidelines" in user_input.lower():
 74 |         return 'Y'
 75 |     return 'N'
 76 | ```
 77 | 
 78 | 6. Example integration in a main loop:
 79 | ```python
 80 | if __name__ == "__main__":
 81 |     session = UserSession(user_id=1)
 82 |     while True:
 83 |         text = input("Enter text (or 'exit'): ")
 84 |         if text.lower() == 'exit':
 85 |             break
 86 | 
 87 |         text = sanitize_delimiter(text, "####")
 88 |         if not validate_input_length(text):
 89 |             print("Input too short/long.")
 90 |             continue
 91 | 
 92 |         if moderate_content(text):
 93 |             print("Content flagged as unacceptable. Please revise.")
 94 |             continue
 95 | 
 96 |         if direct_evaluation_for_injection(text) == 'Y':
 97 |             print("Potential injection detected. Please rephrase.")
 98 |             continue
 99 | 
100 |         session.handle_input(text)
101 | ```
102 | 
103 | 


--------------------------------------------------------------------------------
/docs/CHAPTER-1/Answers 1.6.md:
--------------------------------------------------------------------------------
 1 | # Answers 1.3 
 2 | 
 3 | ## Theory
 4 | 
 5 | 1. Evaluating the outputs of LLM applications is significant for understanding their effectiveness, ensuring they meet intended objectives, and improving future performance. The outputs should be assessed across dimensions of accuracy, relevance, and completeness to ensure they align with the application's goals.
 6 | 
 7 | 2. Developing robust performance metrics is crucial for quantitatively assessing how well an LLM application meets its objectives. Examples of such metrics include precision, recall, F1 score, and user satisfaction ratings. These metrics guide ongoing development and inform decisions about the application's deployment.
 8 | 
 9 | 3. The process of transitioning LLM applications from development to deployment is iterative, involving initial prototyping with simple prompts, identifying deficiencies, and gradually increasing complexity. This process balances development effort with application performance, emphasizing efficiency over perfection.
10 | 
11 | 4. Rigorous evaluation is critical for high-stakes LLM applications, such as those in healthcare, legal advice, or financial planning, where the consequences of erroneous outputs can be severe. In these contexts, evaluation must be thorough, including extensive testing and bias mitigation, to ensure reliability and ethical integrity.
12 | 
13 | 5. Best practices for developing and deploying LLM applications include starting small with a modular approach, iterating rapidly to refine the application, and automating testing for efficiency. These practices ensure a solid foundation and facilitate continuous improvement.
14 | 
15 | 6. Automating testing streamlines the evaluation process, identifies discrepancies and errors precisely, and integrates continuous testing into the development pipeline. This automation maintains a constant feedback loop for ongoing improvement.
16 | 
17 | 7. Customizing evaluation metrics and adjusting the evaluation rigor are important to reflect the application's objectives and the impact of potential errors. High-stakes applications require more stringent testing protocols to ensure safety and reliability.
18 | 
19 | 8. Developing a comprehensive evaluation framework for LLM outputs involves creating a detailed rubric for consistent assessment, structuring systematic evaluation protocols, and using expert comparisons to benchmark quality. This framework ensures objective and thorough evaluation.
20 | 
21 | 9. Advanced evaluation techniques, such as semantic similarity assessments and crowdsourced evaluation, address the multifaceted nature of LLM output evaluation. These techniques provide a granular assessment of performance and contribute to the improvement of LLM applications.
22 | 
23 | 10. Continuous evaluation and diverse test cases enhance the reliability and relevance of LLM applications by ensuring they remain effective across various scenarios and user groups. Continuous feedback and version tracking facilitate adaptation and refinement, improving application quality over time.
24 | 
25 | ## Practice
26 | 1.
27 | 
28 | ```python
29 | def evaluate_response(response, rubric):
30 |     """
31 |     Evaluates an LLM response against a detailed rubric.
32 | 
33 |     Args:
34 |         response (str): The LLM-generated response to evaluate.
35 |         rubric (dict): A dictionary containing the criteria and their respective weights.
36 | 
37 |     Returns:
38 |         dict: A dictionary containing the score and feedback for each criterion.
39 |     """
40 |     # Initialize the results dictionary
41 |     results = {}
42 |     total_weight = sum(rubric[criteria]['weight'] for criteria in rubric)
43 |     total_score = 0
44 | 
45 |     # Example evaluation logic (to be customized based on actual rubric and response evaluation)
46 |     for criteria, details in rubric.items():
47 |         # Placeholder for the actual evaluation logic
48 |         score = details['weight']  # Example: Using the weight as the score
49 |         feedback = f"Placeholder feedback for {criteria}."
50 | 
51 |         results[criteria] = {'score': score, 'feedback': feedback}
52 |         total_score += score * details['weight']
53 | 
54 |     # Calculate the weighted average score
55 |     weighted_average_score = total_score / total_weight
56 | 
57 |     results['overall'] = {'weighted_average_score': weighted_average_score, 'feedback': "Overall feedback based on the rubric."}
58 | 
59 |     return results
60 | 
61 | # Example usage
62 | # rubric = {
63 | #     'accuracy': {'weight': 3},
64 | #     'relevance': {'weight': 2},
65 | #     'completeness': {'weight': 3},
66 | #     'coherence': {'weight': 2}
67 | # }
68 | # response = "Paris is the capital of France."
69 | # evaluation_results = evaluate_response(response, rubric)
70 | # print(evaluation_results)
71 | ```


--------------------------------------------------------------------------------
/docs/ru/CHAPTER-1/Ответы 1.3.md:
--------------------------------------------------------------------------------
  1 | # Ответы 1.3
  2 | 
  3 | ## Теория
  4 | 
  5 | 1. Интеграция OpenAI Moderation API: получите API-ключ, подключите клиентскую библиотеку на бэкенде и встройте проверку в цепочку подачи контента, чтобы анализировать все данные до публикации.
  6 | 2. Кастомизация: настройте чувствительность, сфокусируйтесь на определённых нарушениях, используйте собственные списки разрешённых/запрещённых элементов (allow/deny-листы) в соответствии со стандартами сообщества и требованиями комплаенса.
  7 | 3. Расширение модерации: помимо текста, добавьте проверку изображений и видео (используя инструменты OpenAI или сторонние решения) для обеспечения комплексной защиты.
  8 | 4. Разделители (delimiters) снижают риск prompt-инъекций, отделяя ввод пользователя от системных инструкций и сохраняя целостность команд.
  9 | 5. Изоляция команд с помощью разделителей чётко отделяет исполняемые команды от пользовательских данных и предотвращает внедрение вредоносных инструкций.
 10 | 6. Дополнительные меры: строгая валидация ввода, принцип наименьших привилегий, allow-листы, регулярные выражения, мониторинг и логирование для выявления аномалий.
 11 | 7. Прямая оценка: попросите модель классифицировать ввод как попытку инъекции или нет — это снижает количество ложных срабатываний и повышает точность реакции.
 12 | 8. Ответные меры: оповещение и обучение пользователей, просьба переформулировать запрос, изоляция подозрительного контента и его ревью человеком, а также динамическая настройка чувствительности системы.
 13 | 9. Плюсы и минусы прямой оценки: точность и адаптивность против сложности разработки и поддержки, а также необходимости баланса между безопасностью и удобством использования (UX).
 14 | 10. Совмещение Moderation API и стратегий против инъекций существенно повышает безопасность и целостность платформ, работающих с пользовательским контентом (UGC).
 15 | 
 16 | ## Практика (эскизы)
 17 | 
 18 | 1. Функция модерации одного фрагмента текста:
 19 | ```python
 20 | from openai import OpenAI
 21 | 
 22 | client = OpenAI()
 23 | 
 24 | def moderate_content(content: str) -> bool:
 25 |     resp = client.moderations.create(model="omni-moderation-latest", input=content)
 26 |     return bool(resp.results[0].flagged)
 27 | ```
 28 | 
 29 | 2. Удаление разделителя из строки:
 30 | ```python
 31 | def sanitize_delimiter(input_text: str, delimiter: str) -> str:
 32 |     return input_text.replace(delimiter, "")
 33 | ```
 34 | 
 35 | 3. Проверка длины ввода:
 36 | ```python
 37 | def validate_input_length(input_text: str, min_length=1, max_length=200) -> bool:
 38 |     return min_length <= len(input_text) <= max_length
 39 | ```
 40 | 
 41 | 4. Сессия пользователя с простыми эвристиками:
 42 | ```python
 43 | class UserSession:
 44 |     def __init__(self, user_id: int):
 45 |         self.user_id = user_id
 46 |         self.trust_level = 0
 47 |         self.sensitivity_level = 5
 48 | 
 49 |     def adjust_sensitivity(self):
 50 |         if self.trust_level > 5:
 51 |             self.sensitivity_level = max(1, self.sensitivity_level - 1)
 52 |         else:
 53 |             self.sensitivity_level = min(10, self.sensitivity_level + 1)
 54 | 
 55 |     def evaluate_input(self, user_input: str) -> bool:
 56 |         dangerous_keywords = ["exec", "delete", "drop"]
 57 |         return any(k in user_input.lower() for k in dangerous_keywords)
 58 | 
 59 |     def handle_input(self, user_input: str):
 60 |         if self.evaluate_input(user_input):
 61 |             if self.trust_level < 5:
 62 |                 print("Ввод помечен и отправлен на проверку безопасностью.")
 63 |             else:
 64 |                 print("Запрос выглядит подозрительно. Уточните или переформулируйте, пожалуйста.")
 65 |         else:
 66 |             print("Ввод принят. Спасибо!")
 67 |         print("Помните: ввод должен быть ясным и без потенциально опасных команд.")
 68 | ```
 69 | 
 70 | 5. Прямая оценка на предмет инъекций (фиктивная логика):
 71 | ```python
 72 | def direct_evaluation_for_injection(user_input: str) -> str:
 73 |     if "ignore instructions" in user_input.lower() or "disregard previous guidelines" in user_input.lower():
 74 |         return 'Y'
 75 |     return 'N'
 76 | ```
 77 | 
 78 | 6. Пример интеграции в основной цикл:
 79 | ```python
 80 | if __name__ == "__main__":
 81 |     session = UserSession(user_id=1)
 82 |     while True:
 83 |         text = input("Введите текст (или 'exit'): ")
 84 |         if text.lower() == 'exit':
 85 |             break
 86 | 
 87 |         text = sanitize_delimiter(text, "####")
 88 |         if not validate_input_length(text):
 89 |             print("Ввод слишком короткий/длинный.")
 90 |             continue
 91 | 
 92 |         if moderate_content(text):
 93 |             print("Контент помечен как неприемлемый. Измените формулировку.")
 94 |             continue
 95 | 
 96 |         if direct_evaluation_for_injection(text) == 'Y':
 97 |             print("Обнаружена потенциальная инъекция. Переформулируйте, пожалуйста.")
 98 |             continue
 99 | 
100 |         session.handle_input(text)
101 | ```
102 | 
103 | 


--------------------------------------------------------------------------------
/docs/en/CHAPTER-2/Answers 2.5.md:
--------------------------------------------------------------------------------
  1 | # Answers 2.5
  2 | 
  3 | ## Theory
  4 | 1. Maximum Marginal Relevance (MMR): balances relevance and diversity by selecting documents close to the query yet dissimilar to each other.
  5 | 2. Self‑Query Retrieval: splits a query into semantic content and metadata constraints for precise content‑plus‑attribute retrieval.
  6 | 3. Contextual compression: extracts only the most relevant segments from documents, reducing noise and improving answer quality.
  7 | 4. Environment setup: install libraries, configure API access (for embeddings), and initialize a vector store — the foundation for advanced retrieval.
  8 | 5. Vector stores: hold embeddings and power fast similarity search.
  9 | 6. Populating the store: add texts and run similarity search; MMR helps eliminate redundancy.
 10 | 7. Boosting diversity with MMR: reduces clustering of near‑duplicates and broadens coverage.
 11 | 8. Metadata for specificity: attributes (e.g., date, type) improve precision and relevance.
 12 | 9. Self‑Query Retriever: automatically extracts both semantic and metadata parts from user input.
 13 | 10. Benefits of contextual compression: saves computation and focuses on the essential.
 14 | 11. Best practices: tune MMR, leverage metadata wisely, configure compression carefully, and prepare documents thoroughly.
 15 | 12. Combining methods: embedding‑based retrieval excels at meaning, while TF‑IDF or SVM can help for keyword or classification‑based scenarios.
 16 | 13. Advantages of advanced techniques: improved precision, diversity, context, and overall UX.
 17 | 14. NLP outlook: continued progress will yield even smarter handling of complex queries.
 18 | 
 19 | ## Practical Tasks
 20 | 
 21 | 1.
 22 | ```python
 23 | from typing import List
 24 | import numpy as np
 25 | 
 26 | def openai_embedding(text: str) -> List[float]:
 27 |     # Placeholder: return a random vector instead of calling OpenAI.
 28 |     return np.random.rand(768).tolist()
 29 | 
 30 | def cosine_similarity(vec1: List[float], vec2: List[float]) -> float:
 31 |     v1 = np.array(vec1); v2 = np.array(vec2)
 32 |     return float(np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2)))
 33 | 
 34 | class VectorDatabase:
 35 |     def __init__(self, persist_directory: str):
 36 |         self.persist_directory = persist_directory
 37 |         self.database = []  # (text, embedding)
 38 | 
 39 |     def add_text(self, text: str):
 40 |         self.database.append((text, openai_embedding(text)))
 41 | 
 42 |     def similarity_search(self, query: str, k: int) -> List[str]:
 43 |         q = openai_embedding(query)
 44 |         scored = [(t, cosine_similarity(q, e)) for t, e in self.database]
 45 |         return [t for t, _ in sorted(scored, key=lambda x: x[1], reverse=True)[:k]]
 46 | 
 47 | if __name__ == "__main__":
 48 |     db = VectorDatabase("path/to/persist")
 49 |     db.add_text("The quick brown fox jumps over the lazy dog.")
 50 |     db.add_text("Lorem ipsum dolor sit amet, consectetur adipiscing elit.")
 51 |     db.add_text("Python is a popular programming language for data science.")
 52 |     print("Similarity results:", db.similarity_search("Programming in Python", 2))
 53 | ```
 54 | 
 55 | 2.
 56 | ```python
 57 | def compress_segment(segment: str, query: str) -> str:
 58 |     # Placeholder: return half the segment.
 59 |     return segment[:len(segment)//2]
 60 | 
 61 | def compress_document(document: List[str], query: str) -> List[str]:
 62 |     return [compress_segment(s, query) for s in document]
 63 | 
 64 | doc = [
 65 |     "The first chapter introduces the concepts of machine learning.",
 66 |     "Machine learning techniques are varied and serve different purposes.",
 67 |     "In data analysis, regression models can predict continuous outcomes.",
 68 | ]
 69 | print("Compressed:", compress_document(doc, "machine learning"))
 70 | ```
 71 | 
 72 | 3.
 73 | ```python
 74 | def similarity(doc_id: str, query: str) -> float: return 0.5
 75 | def diversity(doc_id1: str, doc_id2: str) -> float: return 0.5
 76 | 
 77 | def max_marginal_relevance(doc_ids: List[str], query: str, lambda_param: float, k: int) -> List[str]:
 78 |     selected, remaining = [], doc_ids.copy()
 79 |     while len(selected) < k and remaining:
 80 |         scores = {
 81 |             d: lambda_param * similarity(d, query) - (1 - lambda_param) * max([diversity(d, s) for s in selected] or [0])
 82 |             for d in remaining
 83 |         }
 84 |         nxt = max(scores, key=scores.get)
 85 |         selected.append(nxt)
 86 |         remaining.remove(nxt)
 87 |     return selected
 88 | 
 89 | print(max_marginal_relevance(["d1","d2","d3"], "query", 0.7, 2))
 90 | ```
 91 | 
 92 | 
 93 | 4.
 94 | ```python
 95 | def initialize_vector_db():
 96 |     # Initialize the vector DB using the VectorDatabase class defined above
 97 |     vector_db = VectorDatabase("path/to/persist/directory")
 98 | 
 99 |     # Sample texts to add
100 |     texts = [
101 |         "The quick brown fox jumps over the lazy dog.",
102 |         "Lorem ipsum dolor sit amet, consectetur adipiscing elit.",
103 |         "Python is a popular programming language for data science."
104 |     ]
105 | 
106 |     for text in texts:
107 |         vector_db.add_text(text)
108 | 
109 |     # Similarity search
110 |     query = "data science"
111 |     similar_texts = vector_db.similarity_search(query, 2)
112 |     print("Similarity search results:", similar_texts)
113 | 
114 |     # Placeholder for “diverse search” demonstration — call MMR or similar here in a real setup
115 |     print("Diverse search (simulated):", similar_texts)
116 | 
117 | # Run the demonstration
118 | initialize_vector_db()
119 | ```
120 | 


--------------------------------------------------------------------------------
/docs/CHAPTER-2/2.1 Introduction.md:
--------------------------------------------------------------------------------
 1 | # 2.1 Introduction
 2 | 
 3 | LangChain stands as a pioneering open-source framework uniquely designed to bridge the gap between Large Language Models (LLMs) like ChatGPT and the vast reservoirs of proprietary or personal data that remain untapped by traditional search engines. By enabling direct conversational interfaces with documents, LangChain opens up new avenues for extracting insights and deriving answers from content that is either not available on the internet or was created after the LLM's last training update. This innovative framework, the brainchild of Harrison Chase, who co-founded LangChain and serves as its CEO, marks a pivotal leap forward in how both organizations and individuals can harness the full potential of their data.
 4 | 
 5 | The essence of LangChain lies in its ability to democratize access to information, transforming raw data into a dialog-driven treasure trove of knowledge. Whether it's sifting through internal reports, research papers, or personal notes, LangChain equips users with a powerful tool to query their documents as if they were engaging in a conversation with a well-informed assistant. This approach not only makes data more accessible but also significantly enhances the efficiency and effectiveness of information retrieval and analysis.
 6 | 
 7 | ## Core Components of LangChain
 8 | 
 9 | At the heart of LangChain's revolutionary approach are its core components, each meticulously designed to serve a specific purpose within the ecosystem. Together, these components form a robust architecture that supports the development and deployment of customized LLM applications. Here's a closer look at each component:
10 | 
11 | - **Prompts:** Prompts act as the initial touchpoint between the user and the system, crafted to guide the LLM towards generating responses that are both relevant and contextually accurate. These customizable text inputs are crucial for narrowing down the vast possibilities of language generation to meet specific user needs.
12 | 
13 | - **Models:** The cornerstone of LangChain, the Models are sophisticated LLMs trained on extensive datasets to emulate human-like text comprehension and generation. These models are adept at parsing complex queries, understanding nuanced contexts, and crafting responses that mirror human conversation.
14 | 
15 | - **Indexes:** Indexes are meticulously organized structures that catalog data for swift and efficient retrieval. They are the backbone of LangChain's ability to quickly sift through large volumes of information, ensuring that the system can pull relevant data points in response to user queries without significant delays.
16 | 
17 | - **Chains:** Chains represent the sequential processing steps that raw data undergoes to be transformed into actionable insights. These chains can include a variety of processes, such as data cleansing, context analysis, and response formulation, each tailored to refine the interaction between the user and the data.
18 | 
19 | - **Agents:** Agents are autonomous entities within the LangChain framework that orchestrate the interaction between its various components. They manage the flow of information, ensure the integrity of data processing, and adapt responses based on user feedback and interaction patterns.
20 | 
21 | By harmonizing these components, LangChain provides a flexible and powerful platform for creating data interaction applications that are not only intuitive but also highly adaptive to specific user requirements. This modular design ensures that organizations and individuals can tailor the system to fit their unique data landscapes, making it a versatile tool for a wide range of use cases.
22 | 
23 | ## LangChain Capabilities
24 | 
25 | **Loading Data with Document Loaders**
26 | 
27 | The initial step in leveraging LangChain involves using document loaders to import data from various sources. This process is crucial for ensuring that the framework has access to the most relevant and up-to-date information. Document loaders are designed to be versatile, supporting a wide range of data types and sources.
28 | 
29 | **Pre-processing Documents**
30 | 
31 | Once data is loaded, it must be pre-processed by splitting documents into semantically meaningful chunks. This step, although seemingly straightforward, requires careful consideration of the nuances involved in text segmentation to maintain the context and integrity of the information.
32 | 
33 | **Implementing Semantic Search**
34 | 
35 | Semantic search is introduced as a fundamental method for retrieving information in response to user queries. It represents the simplest approach to begin interacting with data. However, limitations exist, and the guide will explore common scenarios where semantic search may fall short and how to address these challenges.
36 | 
37 | **Enhancing Responses with Memory**
38 | 
39 | To create a chatbot that offers a dynamic and interactive experience, it is essential to incorporate a memory component. This allows the chatbot to maintain context across interactions, providing responses that reflect a continuous conversation rather than isolated exchanges. The guide will detail how to integrate memory into LangChain applications, enabling the development of fully functional chatbots capable of engaging in meaningful dialogue with users.
40 | 
41 | ## Further Resources
42 | 
43 | For those seeking to deepen their understanding of LangChain or explore more advanced topics, the guide recommends additional resources, including online tutorials, community forums, and the initial course on LangChain for LLM application development. These resources provide valuable support for both new and experienced users of the framework.
44 | 


--------------------------------------------------------------------------------
/docs/CHAPTER-3/3.4 Summary and Reflections.md:
--------------------------------------------------------------------------------
 1 | # 3.4 Summary and Reflections
 2 | 
 3 | Throughout the chapters, we embarked on a comprehensive exploration of integrating large language models (LLMs) into the development process, leveraging Kubeflow Pipelines for efficient ML workflows, and implementing an AI-powered quiz generation mechanism. This journey provided a deep dive into the intricacies of LLM-based application development, the automation of machine learning workflows, and the practical application of AI in educational content generation.
 4 | 
 5 | ## Reflections on LLM-Based Development
 6 | 
 7 | LLM-based development marks a significant advancement in creating intelligent applications that can understand and generate human-like text. The key takeaway from this exploration is the critical importance of a structured approach to LLM Ops, which encompasses model selection, preparation, deployment, monitoring, and maintenance. By embracing automation, developers can streamline development cycles, enabling smoother updates and migrations. Moreover, effective prompt management emerged as a pivotal element in enhancing the performance of LLM-based applications, highlighting the necessity for dynamic prompt adjustment and the testing of different prompts.
 8 | 
 9 | ## Insights on Mastering LLM Workflows with Kubeflow Pipelines
10 | 
11 | The integration of Kubeflow Pipelines offers a powerful framework for orchestrating and automating machine learning workflows. This tool significantly enhances the efficiency and reliability of machine learning projects by enabling data scientists and developers to define, deploy, and manage complex workflows with ease. The utilization of Kubeflow Pipelines for automating tasks such as supervised tuning pipelines for foundation models like PaLM 2 underscores the versatility and efficiency of this approach in managing large, complex models.
12 | 
13 | ## Implementing the AI Quiz Generation Mechanism
14 | 
15 | The creation of an AI-powered quiz generator served as a practical demonstration of applying AI models to generate educational content. Through the careful preparation of the environment, dataset creation, prompt engineering, and the use of Langchain for structuring prompts, we successfully implemented a system capable of generating customized quizzes. This project highlighted the potential of AI in educational technology, providing a template for developing interactive learning tools that can adapt to various subjects and user preferences.
16 | 
17 | ## Final Thoughts
18 | 
19 | The chapters collectively underscore the transformative potential of LLMs and machine learning workflows in reshaping software development and application functionality. By adopting best practices in LLM Ops, leveraging tools like Kubeflow Pipelines for workflow automation, and exploring practical applications such as AI-powered quiz generators, developers and organizations can harness the power of AI to innovate and deliver value.
20 | 
21 | Moreover, the journey through these chapters emphasizes the importance of continuous learning, adaptation, and the application of ethical considerations in AI development. As the field of AI continues to evolve, staying informed and engaged with the community will be crucial for navigating future challenges and opportunities.
22 | 
23 | In conclusion, this exploration serves as a foundation for further innovation in AI application development, offering insights, strategies, and inspiration for leveraging the latest advancements in AI and machine learning to solve real-world problems.
24 | 
25 | 
26 | 
27 | ## Further Reading and Resources
28 | 
29 | To deepen your understanding of the topics covered and explore more advanced concepts in LLM-based development, Kubeflow Pipelines, and AI-powered applications, consider the following resources:
30 | 
31 | 1. **Large Language Models and Their Applications**:
32 |     - [Hugging Face's Transformers Library](https://huggingface.co/transformers/): A comprehensive library for employing state-of-the-art transformer models including GPT, BERT, and others in Python.
33 | 
34 | 2. **Machine Learning Operations (MLOps)**:
35 |     - [Introducing MLOps by Mark Treveil and others (O'Reilly)](https://www.oreilly.com/library/view/introducing-mlops/9781492083283/): A book providing an overview of MLOps principles, practices, and implementation strategies.
36 |     - [Google Cloud MLOps (Machine Learning Operations) Fundamentals](https://www.coursera.org/learn/mlops-fundamentals): A Coursera course covering the basics of MLOps including building, deploying, and continuously improving machine learning models.
37 | 
38 | 3. **Kubeflow and Kubeflow Pipelines**:
39 |     - [Kubeflow Documentation](https://www.kubeflow.org/docs/started/introduction/): Official documentation for Kubeflow, offering guides, tutorials, and API references.
40 |     - [Automating Machine Learning Pipelines with Kubeflow](https://towardsdatascience.com/automating-machine-learning-pipelines-with-kubeflow-342fb3e7bbd8): An article explaining how to automate ML workflows using Kubeflow.
41 | 
42 | 4. **AI in Education**:
43 |     - [AI and Education: Guidance for Policy Makers](https://unesdoc.unesco.org/ark:/48223/pf0000374266): A UNESCO report discussing the implications and potential of AI in education.
44 |     - [Artificial Intelligence in Education: Challenges and Opportunities for Sustainable Development](https://link.springer.com/book/10.1007/978-3-030-52240-7): A book exploring the intersection of AI and education, focusing on sustainable development.
45 | 
46 | 5. **Ethical Considerations in AI**:
47 |     - [Ethics in Artificial Intelligence and Machine Learning](https://www.ibm.com/cloud/learn/ethics-in-ai): IBM's guide on the importance of ethics in AI development.
48 |     - [The Algorithmic Justice League](https://www.ajl.org/): An organization that combines art and research to illuminate the social implications and harms of AI.
49 | 
50 | 6. **Interactive Learning and Quiz Generation**:
51 |     - [Creating Educational Quizzes with AI: Opportunities and Challenges](https://www.edutopia.org/article/creating-educational-quizzes-ai-opportunities-and-challenges): An Edutopia article discussing how AI can transform quiz generation in educational settings.
52 |     - [Quizlet](https://quizlet.com/): An online tool that demonstrates the potential of interactive learning and quiz generation for educational purposes.


--------------------------------------------------------------------------------
/docs/en/CHAPTER-1/1.4 Advanced Machine Reasoning.md:
--------------------------------------------------------------------------------
  1 | # 1.4 Advanced Machine Reasoning: Strategies
  2 | 
  3 | Advanced machine reasoning brings together a set of practices that help language models solve complex tasks more reliably and transparently. Chain of Thought (CoT) encourages step‑by‑step solutions, breaking the problem into logical stages. This approach improves accuracy and makes the reasoning auditable: the user can see how the model arrived at the answer, which is especially helpful for multi‑constraint tasks, comparative analysis, and calculations. In education, CoT mimics a tutor who guides you through each step rather than giving a finished answer. In customer support, it helps unpack complicated requests: clarify details, check assumptions, fix misunderstandings, and provide a correct conclusion.
  4 | 
  5 | In parallel with CoT, teams often use Inner Monologue, where intermediate reasoning is hidden and only the result (or a minimal slice of logic) is shown. This is appropriate when exposing internal steps could harm learning (avoiding “spoilers”), when sensitive information is involved, or when extra details would degrade the user experience.
  6 | 
  7 | To make the examples reproducible, start by preparing the environment and API client.
  8 | 
  9 | ```python
 10 | # Import libraries and load keys
 11 | import os
 12 | from openai import OpenAI
 13 | from dotenv import load_dotenv, find_dotenv
 14 | 
 15 | load_dotenv(find_dotenv())
 16 | client = OpenAI()
 17 | ```
 18 | 
 19 | ```python
 20 | def get_response_for_queries(query_prompts,
 21 |                              model_name="gpt-4o-mini",
 22 |                              response_temperature=0,
 23 |                              max_response_tokens=500):
 24 |     """
 25 |     Returns the model response based on a list of messages (system/user...).
 26 |     """
 27 |     model_response = client.chat.completions.create(
 28 |         model=model_name,
 29 |         messages=query_prompts,
 30 |         temperature=response_temperature,
 31 |         max_tokens=max_response_tokens,
 32 |     )
 33 |     return model_response.choices[0].message["content"]
 34 | ```
 35 | 
 36 | Next, we’ll set up a wrapper for requests and move to CoT prompting, where the reasoning is structured into steps under a special delimiter. The system message describes the analysis stages, and the user input is wrapped in delimiters, simplifying parsing and later post‑processing.
 37 | 
 38 | ```python
 39 | step_delimiter = "####"
 40 | 
 41 | system_prompt = f"""
 42 | Follow the steps, separating them with the '{step_delimiter}' marker.
 43 | 
 44 | Step 1:{step_delimiter} Check whether the question is about a specific product (not a category).
 45 | 
 46 | Step 2:{step_delimiter} If yes, match it to the product list (brand, specs, price).
 47 | 
 48 | [Insert your product list here]
 49 | 
 50 | Step 3:{step_delimiter} Identify the user’s assumptions (comparisons/specifications).
 51 | 
 52 | Step 4:{step_delimiter} Verify those assumptions against the product data.
 53 | 
 54 | Step 5:{step_delimiter} Correct inaccuracies using only the list and respond politely.
 55 | """
 56 | 
 57 | example_query_1 = "How does the BlueWave Chromebook compare to the TechPro Desktop in terms of cost?"
 58 | example_query_2 = "Are televisions available for sale?"
 59 | 
 60 | query_prompts_1 = [
 61 |     {'role': 'system', 'content': system_prompt},
 62 |     {'role': 'user', 'content': f"{step_delimiter}{example_query_1}{step_delimiter}"},
 63 | ]
 64 | 
 65 | query_prompts_2 = [
 66 |     {'role': 'system', 'content': system_prompt},
 67 |     {'role': 'user', 'content': f"{step_delimiter}{example_query_2}{step_delimiter}"},
 68 | ]
 69 | ```
 70 | 
 71 | ```python
 72 | response_to_query_1 = get_response_for_queries(query_prompts_1)
 73 | print(response_to_query_1)
 74 | 
 75 | response_to_query_2 = get_response_for_queries(query_prompts_2)
 76 | print(response_to_query_2)
 77 | ```
 78 | 
 79 | To compare approaches, first print the full answer with intermediate CoT steps, then apply an Inner Monologue variant where only the final portion is shown to the user. If the model returns text with steps separated by `step_delimiter`, you can keep just the final segment — keeping the interface succinct where the “inner workings” aren’t needed.
 80 | 
 81 | ```python
 82 | try:
 83 |     final_response = response_to_query_2.split(step_delimiter)[-1].strip()
 84 | except Exception:
 85 |     final_response = "Sorry, there was a problem. Please try another question."
 86 | 
 87 | print(final_response)
 88 | ```
 89 | 
 90 | The result is two modes of the same solution: a detailed one, with a visible chain of steps, and a concise one showing only the outcome. Clear prompt design helps in both cases; keep refining your prompts based on observed behavior. When UI clarity matters and extra details are undesirable, prefer Inner Monologue for display while still leveraging internal step‑by‑step analysis for quality control.
 91 | 
 92 | ## Theory Questions
 93 | 1. What is Chain of Thought (CoT) and why is it useful for multi‑step tasks?
 94 | 2. How does CoT transparency increase user trust in model answers?
 95 | 3. How does CoT help in educational scenarios?
 96 | 4. How does a reasoning chain improve the quality of support chatbot answers?
 97 | 5. What is Inner Monologue, and how does it differ from CoT in terms of what the user sees?
 98 | 6. Why is Inner Monologue important when dealing with sensitive information?
 99 | 7. How can Inner Monologue help in learning scenarios without revealing “spoilers”?
100 | 8. What steps are needed to prepare the environment for OpenAI API examples?
101 | 9. How is the `get_response_for_queries` function structured?
102 | 10. How does CoT prompting simplify handling complex queries?
103 | 11. How does the system/user prompt structure help answer product questions?
104 | 12. Why is extracting only the final part of the answer useful when using Inner Monologue?
105 | 
106 | ## Practical Tasks
107 | 1. Implement `chain_of_thought_prompting(query)`, which generates a system prompt with step structure and wraps the user query in a delimiter.
108 | 2. Write `get_final_response(output, delimiter)` to extract the last part of the answer and handle possible errors.
109 | 3. Create a script that sends two queries — one with CoT and one with Inner Monologue — and prints both responses.
110 | 4. Implement `validate_response_structure(resp, delimiter)` to check that the answer contains the required number of steps.
111 | 5. Build a `QueryProcessor` class that encapsulates CoT and Inner Monologue logic (key loading, prompt assembly, request sending, post‑processing, and error handling).
112 | 
113 | 


--------------------------------------------------------------------------------
/docs/en/CHAPTER-1/Answers 1.1.md:
--------------------------------------------------------------------------------
  1 | # Answers 1.1
  2 | 
  3 | ## Theory
  4 | 
  5 | 1. Key benefits of integrating the OpenAI API: generating natural text, automating support, improving content creation, and expanding application functionality with advanced AI — boosting user engagement and operational efficiency.
  6 | 2. Obtaining and securing the API key: register on the OpenAI platform, select a plan, and get your key in the dashboard. Store the key in environment variables or a secrets manager; never commit it to a repository — this prevents unauthorized access and potential losses.
  7 | 3. `temperature`: controls creativity and variability of generated text. Low values make responses more predictable; higher values increase diversity. Choose based on the task.
  8 | 4. Keys should be stored outside code (env vars or secret managers) to avoid leaks through source code and version control systems (VCS).
  9 | 5. Model choice influences quality, speed, and cost. Balance model capability and resources to fit your app’s requirements.
 10 | 6. Response metadata (e.g., token counts in `usage`) helps optimize prompts, manage costs, and use the API more efficiently.
 11 | 7. An interactive interface includes dialogue history, input widgets, a send button, and panels to display responses. It updates in real time as answers arrive.
 12 | 8. Best practices: post‑processing (style and grammar), personalization to user context, collecting feedback, and monitoring performance and spend.
 13 | 9. Pitfalls: over‑trusting model output without checks. Use validation, a mix of automated and manual review, monitoring, and fine‑tuning.
 14 | 10. Ethics and privacy: comply with data regulations, be transparent about AI’s role, implement review/correction processes, and consider social impact.
 15 | 
 16 | ## Practice
 17 | 
 18 | Below is a progression of Python scripts for the OpenAI API — from a basic request to error handling and a CLI.
 19 | 
 20 | ### Task 1: Basic API request
 21 | 
 22 | ```python
 23 | from openai import OpenAI
 24 | 
 25 | client = OpenAI()
 26 | 
 27 | response = client.chat.completions.create(
 28 |     model="gpt-4o-mini",
 29 |     messages=[{"role": "user", "content": "What is the future of AI?"}],
 30 |     max_tokens=100,
 31 | )
 32 | 
 33 | print(response.choices[0].message.content)
 34 | ```
 35 | 
 36 | ### Task 2: Secure key handling
 37 | 
 38 | ```python
 39 | import os
 40 | from openai import OpenAI
 41 | 
 42 | client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 43 | 
 44 | response = client.chat.completions.create(
 45 |     model="gpt-4o-mini",
 46 |     messages=[{"role": "user", "content": "What is the future of AI?"}],
 47 |     max_tokens=100,
 48 | )
 49 | 
 50 | print(response.choices[0].message.content)
 51 | ```
 52 | 
 53 | ### Task 3: Interpreting the response
 54 | 
 55 | ```python
 56 | import os
 57 | from openai import OpenAI
 58 | 
 59 | client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 60 | 
 61 | response = client.chat.completions.create(
 62 |     model="gpt-4o-mini",
 63 |     messages=[{"role": "user", "content": "What is the future of AI?"}],
 64 |     max_tokens=100,
 65 | )
 66 | 
 67 | print("Response:", response.choices[0].message.content.strip())
 68 | print("Model used:", response.model)
 69 | print("Finish reason:", response.choices[0].finish_reason)
 70 | ```
 71 | 
 72 | ### Task 4: Error handling
 73 | 
 74 | ```python
 75 | import os
 76 | from openai import OpenAI
 77 | from openai import APIConnectionError, RateLimitError, APIStatusError
 78 | 
 79 | client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 80 | 
 81 | try:
 82 |     response = client.chat.completions.create(
 83 |         model="gpt-4o-mini",
 84 |         messages=[{"role": "user", "content": "What is the future of AI?"}],
 85 |         max_tokens=100,
 86 |     )
 87 |     print("Response:", response.choices[0].message.content.strip())
 88 |     print("Model used:", response.model)
 89 |     print("Finish reason:", response.choices[0].finish_reason)
 90 | except RateLimitError as e:
 91 |     print(f"Rate limit exceeded: {e}")
 92 | except APIConnectionError as e:
 93 |     print(f"Connection error: {e}")
 94 | except APIStatusError as e:
 95 |     print(f"API returned an error: {e}")
 96 | except Exception as e:
 97 |     print(f"Other error occurred: {e}")
 98 | ```
 99 | 
100 | ### Task 5: CLI chat without post‑processing
101 | 
102 | ```python
103 | from openai import OpenAI
104 | import os
105 | 
106 | client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
107 | 
108 | def chat_with_openai():
109 |     print("Starting chat with OpenAI. Type 'quit' to exit.")
110 |     while True:
111 |         user_input = input("You: ")
112 |         if user_input.lower() == 'quit':
113 |             break
114 |         try:
115 |             response = client.chat.completions.create(
116 |                 model="gpt-4o-mini",
117 |                 messages=[{"role": "user", "content": user_input}],
118 |                 max_tokens=100,
119 |             )
120 |             print("OpenAI:", response.choices[0].message.content.strip())
121 |         except Exception as e:
122 |             print(f"Error: {e}")
123 | 
124 | if __name__ == "__main__":
125 |     chat_with_openai()
126 | ```
127 | 
128 | ### Task 6: Post‑processing
129 | 
130 | ```python
131 | from openai import OpenAI
132 | import os
133 | from textblob import TextBlob
134 | 
135 | client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
136 | 
137 | def post_process_response(response_text):
138 |     blob = TextBlob(response_text)
139 |     corrected_text = str(blob.correct())
140 |     formatted_text = " ".join(corrected_text.split())
141 |     return formatted_text
142 | 
143 | def chat_with_openai():
144 |     print("Starting chat with OpenAI. Type 'quit' to exit.")
145 |     while True:
146 |         user_input = input("You: ")
147 |         if user_input.lower() == 'quit':
148 |             break
149 |         try:
150 |             response = client.chat.completions.create(
151 |                 model="gpt-4o-mini",
152 |                 messages=[{"role": "user", "content": user_input}],
153 |                 max_tokens=100,
154 |             )
155 |             processed = post_process_response(response.choices[0].message.content)
156 |             print("OpenAI:", processed)
157 |         except Exception as e:
158 |             print(f"Other error occurred: {e}")
159 | 
160 | if __name__ == "__main__":
161 |     chat_with_openai()
162 | ```
163 | 
164 | ### Tasks 7–8 (ideas)
165 | 
166 | - Generate a post outline for a user‑provided topic and output a bulleted list.
167 | - Log response time and token usage for each call to a file for later analysis and optimization.
168 | 
169 | 


--------------------------------------------------------------------------------
/docs/en/CHAPTER-2/Answers 2.6.md:
--------------------------------------------------------------------------------
  1 | # Answers 2.6
  2 | 
  3 | ## Theory
  4 | 1. Three stages of RAG‑QA: accept the query, retrieve relevant documents, and generate the answer.
  5 | 2. Context window constraints: because the LLM context is limited, you cannot pass every fragment. MapReduce and Refine help aggregate or iteratively refine information across multiple documents.
  6 | 3. Vector database: stores document embeddings and provides fast retrieval of the most relevant documents based on semantic similarity.
  7 | 4. RetrievalQA chain: combines retrieval and answer generation, improving relevance and accuracy of results.
  8 | 5. MapReduce and Refine: MapReduce quickly produces a summary from many documents; Refine sequentially improves the answer, which is useful when precision is critical. Choose based on the task.
  9 | 6. Distributed systems: account for network latency and serialization when operating in distributed setups.
 10 | 7. Experimentation: try MapReduce and Refine; effectiveness depends heavily on data types and question styles.
 11 | 8. RetrievalQA limitation: no built‑in dialogue memory, which makes maintaining context across follow‑ups difficult.
 12 | 9. Dialogue memory: needed to incorporate previous turns and provide contextual answers during longer conversations.
 13 | 10. Further study: new LLM approaches, their impact on RAG systems, and memory strategies in RAG chains.
 14 | 
 15 | ## Practical Tasks
 16 | 1.
 17 | ```python
 18 | from langchain.vectorstores import Chroma
 19 | from langchain_openai import OpenAIEmbeddings
 20 | 
 21 | def initialize_vector_database(directory_path):
 22 |     # Initialize an embeddings generator (OpenAI) to create vector representations for text
 23 |     embeddings_generator = OpenAIEmbeddings()
 24 | 
 25 |     # Initialize a Chroma vector database pointing to a persistence directory
 26 |     # and the embedding function to use
 27 |     vector_database = Chroma(persist_directory=directory_path, embedding_function=embeddings_generator)
 28 | 
 29 |     # Display current document count to verify initialization
 30 |     # Assumes Chroma exposes `_collection.count()`
 31 |     document_count = vector_database._collection.count()
 32 |     print(f"Documents in VectorDB: {document_count}")
 33 | 
 34 | # Example usage of initialize_vector_database:
 35 | documents_storage_directory = 'path/to/your/directory'
 36 | initialize_vector_database(documents_storage_directory)
 37 | ```
 38 | 
 39 | 2.
 40 | ```python
 41 | from langchain.vectorstores import Chroma
 42 | from langchain_openai import OpenAIEmbeddings, ChatOpenAI
 43 | from langchain.chains import RetrievalQA
 44 | from langchain.prompts import PromptTemplate
 45 | 
 46 | def setup_retrieval_qa_chain(model_name, documents_storage_directory):
 47 |     # Initialize embeddings and Chroma vector store
 48 |     embeddings_generator = OpenAIEmbeddings()
 49 |     vector_database = Chroma(persist_directory=documents_storage_directory, embedding_function=embeddings_generator)
 50 | 
 51 |     # Initialize the language model (LLM) used in the RetrievalQA chain
 52 |     language_model = ChatOpenAI(model=model_name, temperature=0)
 53 | 
 54 |     # Define a custom prompt template to format LLM inputs
 55 |     custom_prompt_template = """To better assist with the inquiry, consider the details provided below as your reference...
 56 | {context}
 57 | Inquiry: {question}
 58 | Insightful Response:"""
 59 | 
 60 |     # Create the RetrievalQA chain, passing the LLM, a retriever from the vector DB,
 61 |     # requesting source documents, and using the custom prompt
 62 |     question_answering_chain = RetrievalQA.from_chain_type(
 63 |         language_model,
 64 |         retriever=vector_database.as_retriever(),
 65 |         return_source_documents=True,
 66 |         chain_type_kwargs={"prompt": PromptTemplate.from_template(custom_prompt_template)}
 67 |     )
 68 | 
 69 |     return question_answering_chain
 70 | 
 71 | # Example usage of setup_retrieval_qa_chain:
 72 | model_name = "gpt-4o-mini"
 73 | documents_storage_directory = 'path/to/your/documents'
 74 | qa_chain = setup_retrieval_qa_chain(model_name, documents_storage_directory)
 75 | ```
 76 | 
 77 | 3.
 78 | ```python
 79 | # Assume setup_retrieval_qa_chain has been defined in the same script or imported.
 80 | 
 81 | # Configure to demonstrate both techniques (MapReduce and Refine)
 82 | model_name = "gpt-3.5-turbo"
 83 | documents_storage_directory = 'path/to/your/documents'
 84 | qa_chain = setup_retrieval_qa_chain(model_name, documents_storage_directory)
 85 | 
 86 | # Create QA chains: one for MapReduce, one for Refine
 87 | question_answering_chain_map_reduce = RetrievalQA.from_chain_type(
 88 |     qa_chain.llm,
 89 |     retriever=qa_chain.retriever,
 90 |     chain_type="map_reduce"  # Use MapReduce chain type
 91 | )
 92 | 
 93 | question_answering_chain_refine = RetrievalQA.from_chain_type(
 94 |     qa_chain.llm,
 95 |     retriever=qa_chain.retriever,
 96 |     chain_type="refine"  # Use Refine chain type
 97 | )
 98 | 
 99 | # Example query to test both techniques
100 | query = "What is the importance of probability in machine learning?"
101 | 
102 | # Run MapReduce and print the answer
103 | response_map_reduce = question_answering_chain_map_reduce({"query": query})
104 | print("MapReduce answer:", response_map_reduce["result"])
105 | 
106 | # Run Refine and print the answer
107 | response_refine = question_answering_chain_refine({"query": query})
108 | print("Refine answer:", response_refine["result"])
109 | ```
110 | 
111 | 4.
112 | ```python
113 | def handle_conversational_context(initial_query, follow_up_query, qa_chain):
114 |     """
115 |     Simulate handling a follow‑up question in a longer conversation.
116 | 
117 |     Args:
118 |     - initial_query (str): First user query.
119 |     - follow_up_query (str): Follow‑up query referring to prior context.
120 |     - qa_chain (RetrievalQA): Initialized QA chain that can answer queries.
121 | 
122 |     Returns:
123 |     - None: Prints both answers directly to the console.
124 |     """
125 |     # Generate the answer to the initial query
126 |     initial_response = qa_chain({"query": initial_query})
127 |     print("Answer to initial query:", initial_response["result"])
128 | 
129 |     # Generate the answer to the follow‑up query (note: no dialogue memory)
130 |     follow_up_response = qa_chain({"query": follow_up_query})
131 |     print("Answer to follow‑up query:", follow_up_response["result"])
132 | 
133 | # Example usage
134 | a_initial = "Does the curriculum cover probability theory?"
135 | a_follow_up = "Why are those prerequisites important?"
136 | handle_conversational_context(a_initial, a_follow_up, qa_chain)
137 | ```
138 | 


--------------------------------------------------------------------------------
/docs/CHAPTER-1/1.7 Summary and Reflections.md:
--------------------------------------------------------------------------------
 1 | # 1.7 Summary and Reflections
 2 | 
 3 | ## Mastering the Essentials of Large Language Models
 4 | 
 5 | **Unraveling the Mechanics of LLMs**
 6 | 
 7 | At the heart of Large Language Models (LLMs) lies a sophisticated engine powered by extensive datasets, crafted to emulate human-like text generation. The journey begins with the tokenizer, a fundamental piece of the puzzle, dissecting input text into digestible tokens. Following closely is the model architecture, a complex network designed to forecast the sequence of tokens, drawing from the rich context established by its predecessors. This intricate machinery is not just fascinating; it's the cornerstone for harnessing LLMs across a spectrum of applications, unlocking their full potential.
 8 | 
 9 | **The Critical Role of Tokenization**
10 | 
11 | Tokenization is not merely a step in processing; it's a bridge between human input and machine understanding. Grasping its intricacies is paramount for fine-tuning models to produce responses that are not only accurate but also contextually relevant. It's the delicate art of balancing precision with the fluidity of language, ensuring that every token serves its purpose in the grand tapestry of generated text.
12 | 
13 | ## Refining Input Evaluation and Processing
14 | 
15 | **Upholding Quality and Safety Standards**
16 | 
17 | The gatekeeping of quality and safety in applications leveraging LLMs is non-negotiable. It involves a meticulous process of scrutinizing user inputs, weeding out content that could harm or offend, and shaping inputs to align with the model's interpretative capabilities. This vigilant oversight is crucial in preserving the integrity and trustworthiness of LLM-powered applications.
18 | 
19 | **Elevating Problem-solving with Advanced Techniques**
20 | 
21 | Empowering LLMs with advanced reasoning techniques, such as chain of thought reasoning and task decomposition, marks a significant leap towards mimicking human cognitive processes. These methodologies enable the model to navigate through complex inquiries with grace, breaking them down into simpler, more digestible pieces, thereby enriching the quality and relevance of its outputs.
22 | 
23 | 
24 | **Ethical Deployment: A Guiding Principle**
25 | 
26 | In the realm of LLMs, technological prowess must go hand in hand with ethical responsibility. Building systems that are not only intelligent but also ethical demands a commitment to transparency, fairness, and respect for privacy. It's about crafting solutions that honor the trust placed in them by users, safeguarding against misuse, and contributing positively to society.
27 | 
28 | ## From Theory to Practice: The Journey Ahead
29 | 
30 | **Lessons from the Field**
31 | 
32 | The inclusion of case studies shines a light on the tangible impacts of LLMs, offering a treasure trove of insights from real-world deployments. These narratives are not just stories; they're beacons guiding developers through the complexities of applying LLMs, illuminating the path from conceptualization to realization.
33 | 
34 | **Navigating the Path with Best Practices**
35 | 
36 | The culmination of experiences distilled into best practices serves as a compass for aspiring developers. It emphasizes the importance of staying dynamic, with continuous updates to training data, stringent input validation, and active engagement with the AI community. These practices are not just recommendations; they're the building blocks for responsible and innovative development.
37 | 
38 | To further explore and understand the practical integration of OpenAI's API into applications, as outlined in your introduction, the following resources are invaluable for professionals looking to enhance their applications with advanced AI functionalities. These resources are curated to provide a deeper understanding of utilizing GPT models for generating text-based responses, managing API interactions securely, and incorporating AI-generated content into various applications effectively.
39 | 
40 | ### Further Reading
41 | 1. **OpenAI Documentation**: The official [OpenAI API documentation](https://beta.openai.com/docs/) offers comprehensive details on getting started, API usage, best practices, and security measures. It's a must-read for anyone planning to use the OpenAI API in their applications.
42 | 2. **Environment Variables in Python**: The [Twelve-Factor App methodology](https://12factor.net/config) provides guidelines on managing configuration data, such as API keys, outside your application's code. This principle is crucial for maintaining the security of sensitive information.
43 | 3. **Panel for Python**: Panel's [official documentation](https://panel.holoviz.org/) provides a comprehensive guide to building interactive web applications in Python. It includes examples and tutorials that can help you create a conversational interface for interacting with GPT models.
44 | 4. **Designing Chatbots with Python**: The book "Designing Chatbots with Python" by Sumit Raj dives into the principles of chatbot development, including natural language processing techniques and integration with APIs like OpenAI's, to create responsive and intelligent bots.
45 | 5. **Building Smarter Applications with AI**: The O'Reilly book "Building Smarter Applications with AI" by Madison May, Ben Wilson, and O'Reilly Media, available on [O'Reilly's platform](https://www.oreilly.com/), discusses the integration of AI technologies, including GPT models, into applications. It covers topics from model selection and optimization to user experience enhancement.
46 | 6. **AI We Can Actually Use**: Cassie Kozyrkov's [articles on Towards Data Science](https://towardsdatascience.com/@kozyrkov) provide insightful perspectives on applying AI in real-world applications. Her writing focuses on practical aspects of AI implementation, making complex concepts more accessible.
47 | 
48 | ## Closing Thoughts
49 | 
50 | As we stand on the precipice of new advancements in LLM technology, it's clear that the journey ahead is as promising as it is challenging. The insights garnered from this exploration underscore the importance of foundational knowledge, rigorous evaluation, and ethical consideration in unlocking the full potential of LLMs. By adhering to these principles, we can navigate the complexities of this evolving field, driving forward innovations that are not only technologically advanced but also socially responsible and beneficial to humanity.
51 | 
52 | In this era of rapid technological progress, the exploration of LLMs represents a fascinating blend of scientific endeavor and ethical responsibility. As we continue to push the boundaries of what's possible, let us do so with a keen awareness of the impact our creations have on the world, striving always to build systems that enhance human understanding, foster inclusivity, and uphold the highest standards of integrity and respect.
53 | 


--------------------------------------------------------------------------------
/docs/ru/CHAPTER-1/1.4 Продвинутое машинное рассуждение.md:
--------------------------------------------------------------------------------
  1 | # 1.4 Продвинутое машинное рассуждение: стратегии
  2 | 
  3 | Продвинутое машинное рассуждение объединяет набор практик, которые помогают языковым моделям решать сложные задачи надёжнее и прозрачнее. Цепочка рассуждений (Chain of Thought, CoT) предлагает вести решение поэтапно, раскладывая проблему на логичные шаги. Такой подход повышает точность и делает ход мысли проверяемым: пользователь видит, как модель пришла к ответу, что особенно полезно в задачах с несколькими условиями, сравнительном анализе и вычислениях. В образовательных сценариях CoT имитирует работу наставника, который ведёт по шагам, а не выдаёт готовый ответ, а в клиентской поддержке позволяет разбирать сложные запросы последовательно — уточнять детали, проверять предположения, исправлять недопонимания и давать корректный итог. Параллельно с CoT нередко применяют технику «внутренний монолог» (Inner Monologue), когда промежуточные рассуждения скрываются от пользователя и показывается только результат или минимально необходимая часть логики. Это уместно, когда раскрытие внутренних шагов может навредить обучению (нежелание давать «спойлеры»), когда речь идёт о конфиденциальной информации, либо когда лишние детали ухудшают пользовательский опыт.
  4 | 
  5 | Для воспроизводимых примеров начнём с подготовки окружения и клиента API.
  6 | 
  7 | ```python
  8 | # Импорт библиотек и загрузка ключей
  9 | import os
 10 | from openai import OpenAI
 11 | from dotenv import load_dotenv, find_dotenv
 12 | 
 13 | load_dotenv(find_dotenv())
 14 | client = OpenAI()
 15 | ```
 16 | 
 17 | ```python
 18 | def get_response_for_queries(query_prompts,
 19 |                              model_name="gpt-4o-mini",
 20 |                              response_temperature=0,
 21 |                              max_response_tokens=500):
 22 |     """
 23 |     Возвращает ответ модели на основе списка сообщений (system/user...).
 24 |     """
 25 |     model_response = client.chat.completions.create(
 26 |         model=model_name,
 27 |         messages=query_prompts,
 28 |         temperature=response_temperature,
 29 |         max_tokens=max_response_tokens,
 30 |     )
 31 |     return model_response.choices[0].message["content"]
 32 | ```
 33 | 
 34 | Далее зафиксируем функцию-обёртку для запросов и перейдём к CoT‑промптингу, где рассуждения структурируются шагами под специальный разделитель. Системное сообщение описывает этапы анализа, а пользовательский ввод заключён в разделители, что упрощает разбор и последующую постобработку.
 35 | 
 36 | ```python
 37 | step_delimiter = "####"
 38 | 
 39 | system_prompt = f"""
 40 | Следуй шагам, разделяя их маркером '{step_delimiter}'.
 41 | 
 42 | Шаг 1:{step_delimiter} Проверь, идёт ли речь о конкретном продукте (а не о категории).
 43 | 
 44 | Шаг 2:{step_delimiter} Если да, сопоставь его с перечнем товаров (бренд, характеристики, цена).
 45 | 
 46 | [Здесь разместите список товаров]
 47 | 
 48 | Шаг 3:{step_delimiter} Выяви предположения пользователя (сравнения/спецификации).
 49 | 
 50 | Шаг 4:{step_delimiter} Проверь эти предположения по данным о товарах.
 51 | 
 52 | Шаг 5:{step_delimiter} Исправь неточности, опираясь только на список, и ответь вежливо.
 53 | """
 54 | 
 55 | example_query_1 = "How does the BlueWave Chromebook compare to the TechPro Desktop in terms of cost?"
 56 | example_query_2 = "Are televisions available for sale?"
 57 | 
 58 | query_prompts_1 = [
 59 |     {'role': 'system', 'content': system_prompt},
 60 |     {'role': 'user', 'content': f"{step_delimiter}{example_query_1}{step_delimiter}"},
 61 | ]
 62 | 
 63 | query_prompts_2 = [
 64 |     {'role': 'system', 'content': system_prompt},
 65 |     {'role': 'user', 'content': f"{step_delimiter}{example_query_2}{step_delimiter}"},
 66 | ]
 67 | ```
 68 | 
 69 | ```python
 70 | response_to_query_1 = get_response_for_queries(query_prompts_1)
 71 | print(response_to_query_1)
 72 | 
 73 | response_to_query_2 = get_response_for_queries(query_prompts_2)
 74 | print(response_to_query_2)
 75 | ```
 76 | 
 77 | Чтобы сопоставить подходы, сначала выведем полный ответ с промежуточными шагами CoT, а затем применим вариант Inner Monologue, в котором пользователю показывается только конечная часть. Если модель возвращает текст, где шаги отделены `step_delimiter`, можно оставить лишь заключительный фрагмент — так сохраняется лаконичность интерфейса там, где «внутренняя кухня» не нужна.
 78 | 
 79 | ```python
 80 | try:
 81 |     final_response = response_to_query_2.split(step_delimiter)[-1].strip()
 82 | except Exception:
 83 |     final_response = "Извините, возникла проблема. Попробуйте другой вопрос."
 84 | 
 85 | print(final_response)
 86 | ```
 87 | 
 88 | В результате мы получаем два режима одного решения: подробный, с видимой цепочкой шагов, и краткий, где показывается только итог. В обоих случаях помогает ясная формулировка промптов; их стоит регулярно уточнять с учётом наблюдаемого поведения модели. Когда важна чистота интерфейса и отсутствие лишних деталей, целесообразно использовать Inner Monologue и показывать лишь результат, сохраняя при этом возможность внутреннего пошагового анализа для контроля качества.
 89 | 
 90 | ## Теоретические вопросы
 91 | 1. Что такое Цепочка рассуждений (Chain of Thought, CoT) и чем она полезна для задач, требующих нескольких шагов?
 92 | 2. Как прозрачность CoT повышает доверие пользователей к ответам модели?
 93 | 3. Каким образом CoT помогает в образовательных задачах?
 94 | 4. Как цепочка рассуждений улучшает качество ответов чат-ботов поддержки?
 95 | 5. Что такое «Внутренний монолог» (Inner Monologue) и чем он отличается от CoT по способу подачи информации пользователю?
 96 | 6. Почему Inner Monologue важен при работе с конфиденциальной информацией?
 97 | 7. Как Inner Monologue помогает в обучающих сценариях, не раскрывая промежуточные «спойлеры»?
 98 | 8. Какие шаги необходимы для подготовки окружения под примеры, использующие OpenAI API?
 99 | 9. Как устроена функция `get_response_for_queries`?
100 | 10. Как CoT-промптинг облегчает обработку сложных запросов?
101 | 11. Как структура system/user-промптов помогает отвечать по вопросам о товарах?
102 | 12. Чем полезно извлекать только финальную часть ответа при использовании Inner Monologue?
103 | 
104 | ## Практические задания
105 | 1. Реализуйте функцию `chain_of_thought_prompting(query)`, которая генерирует системный промпт со структурой шагов и оборачивает запрос пользователя разделителем.
106 | 2. Напишите функцию `get_final_response(output, delimiter)`, извлекающую последнюю часть ответа и обрабатывающую возможные ошибки.
107 | 3. Создайте скрипт: отправьте два запроса — один с CoT, второй с Inner Monologue; распечатайте полученные ответы.
108 | 4. Реализуйте функцию `validate_response_structure(resp, delimiter)`, которая проверяет, что в ответе присутствует необходимое число шагов.
109 | 5. Создайте класс `QueryProcessor`, который инкапсулирует логику CoT и Inner Monologue (загрузка ключей, сборка промптов, отправка запросов, постобработка и обработка ошибок).
110 | 


--------------------------------------------------------------------------------
/docs/ru/CHAPTER-1/Ответы 1.1.md:
--------------------------------------------------------------------------------
  1 | # Ответы 1.1
  2 | 
  3 | ## Теория
  4 | 
  5 | 1. Основные преимущества интеграции OpenAI API: генерация естественных текстов, автоматизация поддержки, улучшение создания контента и расширение функциональности приложений за счёт продвинутого ИИ — это повышает вовлечённость пользователей и операционную эффективность.
  6 | 2. Получение и защита API-ключа: зарегистрируйтесь на платформе OpenAI, выберите тарифный план и получите ключ в личном кабинете. Храните ключ в переменных окружения или секрет-хранилищах, не коммитьте его в репозиторий — это предотвращает несанкционированный доступ и потенциальные убытки.
  7 | 3. Параметр `temperature`: регулирует креативность и вариативность генерируемого текста. Низкое значение приводит к более предсказуемым ответам, высокое — к более разнообразным. Выбирайте значение в зависимости от задачи.
  8 | 4. Ключи следует хранить вне кода (в переменных окружения или секрет-менеджерах), чтобы исключить их утечки через исходный код и системы контроля версий (VCS).
  9 | 5. Выбор модели влияет на качество, скорость и стоимость. Необходимо балансировать возможности модели и ресурсные ограничения в соответствии с требованиями конкретного приложения.
 10 | 6. Метаданные ответа (например, количество токенов в секции `usage`) помогают оптимизировать промпты, управлять затратами и эффективнее использовать API.
 11 | 7. Интерактивный интерфейс: включает историю диалога, виджеты ввода, кнопку отправки и панели для отображения ответов. Обновление происходит в реальном времени по мере поступления ответа.
 12 | 8. Лучшие практики: постобработка (коррекция стиля и грамматики), персонализация под контекст пользователя, сбор обратной связи, мониторинг производительности и расходов.
 13 | 9. Подводные камни: избыточное доверие ответам модели без должных проверок. Необходимы валидация, сочетание автоматического и ручного контроля, постоянный мониторинг и тонкая настройка.
 14 | 10. Этика и приватность: соблюдайте нормативные требования по работе с данными, прозрачно информируйте пользователей о роли ИИ, внедряйте процессы ревью и коррекции, а также учитывайте социальные последствия.
 15 | 
 16 | ## Практика
 17 | 
 18 | Ниже представлена эволюция скрипта на Python для работы с OpenAI API: от базового запроса до обработки ошибок и использования в CLI.
 19 | 
 20 | ### Задание 1: базовый запрос к API
 21 | 
 22 | ```python
 23 | from openai import OpenAI
 24 | 
 25 | client = OpenAI()
 26 | 
 27 | response = client.chat.completions.create(
 28 |     model="gpt-4o-mini",
 29 |     messages=[{"role": "user", "content": "What is the future of AI?"}],
 30 |     max_tokens=100,
 31 | )
 32 | 
 33 | print(response.choices[0].message.content)
 34 | ```
 35 | 
 36 | ### Задание 2: безопасная работа с ключом
 37 | 
 38 | ```python
 39 | import os
 40 | from openai import OpenAI
 41 | 
 42 | client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 43 | 
 44 | response = client.chat.completions.create(
 45 |     model="gpt-4o-mini",
 46 |     messages=[{"role": "user", "content": "What is the future of AI?"}],
 47 |     max_tokens=100,
 48 | )
 49 | 
 50 | print(response.choices[0].message.content)
 51 | ```
 52 | 
 53 | ### Задание 3: интерпретация ответа
 54 | 
 55 | ```python
 56 | import os
 57 | from openai import OpenAI
 58 | 
 59 | client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 60 | 
 61 | response = client.chat.completions.create(
 62 |     model="gpt-4o-mini",
 63 |     messages=[{"role": "user", "content": "What is the future of AI?"}],
 64 |     max_tokens=100,
 65 | )
 66 | 
 67 | print("Response:", response.choices[0].message.content.strip())
 68 | print("Model used:", response.model)
 69 | print("Finish reason:", response.choices[0].finish_reason)
 70 | ```
 71 | 
 72 | ### Задание 4: обработка ошибок
 73 | 
 74 | ```python
 75 | import os
 76 | from openai import OpenAI
 77 | from openai import APIConnectionError, RateLimitError, APIStatusError
 78 | 
 79 | client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 80 | 
 81 | try:
 82 |     response = client.chat.completions.create(
 83 |         model="gpt-4o-mini",
 84 |         messages=[{"role": "user", "content": "What is the future of AI?"}],
 85 |         max_tokens=100,
 86 |     )
 87 |     print("Response:", response.choices[0].message.content.strip())
 88 |     print("Model used:", response.model)
 89 |     print("Finish reason:", response.choices[0].finish_reason)
 90 | except RateLimitError as e:
 91 |     print(f"Rate limit exceeded: {e}")
 92 | except APIConnectionError as e:
 93 |     print(f"Connection error: {e}")
 94 | except APIStatusError as e:
 95 |     print(f"API returned an error: {e}")
 96 | except Exception as e:
 97 |     print(f"Other error occurred: {e}")
 98 | ```
 99 | 
100 | ### Задание 5: CLI-чат без постобработки
101 | 
102 | ```python
103 | from openai import OpenAI
104 | import os
105 | 
106 | client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
107 | 
108 | def chat_with_openai():
109 |     print("Starting chat with OpenAI. Type 'quit' to exit.")
110 |     while True:
111 |         user_input = input("You: ")
112 |         if user_input.lower() == 'quit':
113 |             break
114 |         try:
115 |             response = client.chat.completions.create(
116 |                 model="gpt-4o-mini",
117 |                 messages=[{"role": "user", "content": user_input}],
118 |                 max_tokens=100,
119 |             )
120 |             print("OpenAI:", response.choices[0].message.content.strip())
121 |         except Exception as e:
122 |             print(f"Error: {e}")
123 | 
124 | if __name__ == "__main__":
125 |     chat_with_openai()
126 | ```
127 | 
128 | ### Задание 6: постобработка ответа
129 | 
130 | ```python
131 | from openai import OpenAI
132 | import os
133 | from textblob import TextBlob
134 | 
135 | client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
136 | 
137 | def post_process_response(response_text):
138 |     blob = TextBlob(response_text)
139 |     corrected_text = str(blob.correct())
140 |     formatted_text = " ".join(corrected_text.split())
141 |     return formatted_text
142 | 
143 | def chat_with_openai():
144 |     print("Starting chat with OpenAI. Type 'quit' to exit.")
145 |     while True:
146 |         user_input = input("You: ")
147 |         if user_input.lower() == 'quit':
148 |             break
149 |         try:
150 |             response = client.chat.completions.create(
151 |                 model="gpt-4o-mini",
152 |                 messages=[{"role": "user", "content": user_input}],
153 |                 max_tokens=100,
154 |             )
155 |             processed = post_process_response(response.choices[0].message.content)
156 |             print("OpenAI:", processed)
157 |         except Exception as e:
158 |             print(f"Other error occurred: {e}")
159 | 
160 | if __name__ == "__main__":
161 |     chat_with_openai()
162 | ```
163 | 
164 | ### Задание 7–8 (идеи)
165 | 
166 | - Генерация структуры поста по теме пользователя с выводом в виде маркированного списка.
167 | - Логирование времени ответа и расхода токенов по каждому вызову в файл для последующего анализа и оптимизации.
168 | 
169 | 


--------------------------------------------------------------------------------
/docs/en/CHAPTER-1/1.2 Classification.md:
--------------------------------------------------------------------------------
 1 | # 1.2 Classification
 2 | 
 3 | In classification tasks, we provide the model with clear context and ask it to assign a given text to one of several predefined categories. Message roles in this dialogue are simple and complementary: the system message describes the task and lists the allowed classes, while the user message contains the text fragment to be assigned to one of those classes. This order — system first, then user — establishes an unambiguous context in which the model responds predictably and consistently.
 4 | 
 5 | To lock down the format, let’s start with the smallest example: classify a customer review by sentiment — “Positive”, “Negative”, or “Neutral”. The system message gives a direct instruction, and the user message provides the review text to be evaluated.
 6 | 
 7 | ```python
 8 | system_message = """Classify the customer review into one of the categories: Positive, Negative, or Neutral."""
 9 | ```
10 | 
11 | For `user_message`, use the review you want to classify:
12 | ```python
13 | user_message = """I recently bought a product at your store. The purchase went great, and the quality exceeded my expectations!"""
14 | ```
15 | 
16 | This dialogue follows the common Chat Completions API pattern: each message is a structure with `role` and `content` keys. `role` indicates the source (system or user), and `content` carries the text. Separating roles lets you initialize the model’s behavior up front and then pass in the specific input. In the simplest case, the system message sets rules and style, and the user message formulates the task. For example, if you want a playful poem about a happy carrot, you might first set the instruction `{'role': 'system', 'content': "You are an assistant who replies in a playful poet’s style."}`, then send `{'role': 'user', 'content': "Write a very short poem about a happy carrot."}`. The very same user request would produce a different tone and format under a different system context, which is why the `system → user` sequence is key to controlling model behavior.
17 | 
18 | A complete working example for classifying reviews is built from these same elements; the only difference is that we call the model from code and return the answer:
19 | 
20 | ```python
21 | import os
22 | from openai import OpenAI
23 | from dotenv import load_dotenv, find_dotenv
24 | _ = load_dotenv(find_dotenv())  # read local .env
25 | 
26 | client = OpenAI()
27 | 
28 | def classify(messages, model="gpt-4o-mini", temperature=0, max_tokens=500):
29 |     response = client.chat.completions.create(
30 |         model=model,
31 |         messages=messages,
32 |         temperature=temperature,
33 |         max_tokens=max_tokens,
34 |     )
35 |     return response.choices[0].message["content"]
36 | 
37 | delimiter = "####"
38 | system_message = """Classify the customer review into one of the categories: Positive, Negative, or Neutral."""
39 | 
40 | user_message = """I recently bought a product at your store. The purchase went great, and the quality exceeded my expectations!"""
41 | 
42 | messages = [
43 |     {'role': 'system', 'content': system_message},
44 |     {'role': 'user', 'content': f"{delimiter}{user_message}{delimiter}"},
45 | ]
46 | 
47 | response = classify(messages)
48 | print(response)
49 | ```
50 | 
51 | The same principles power other classification scenarios. In email, it’s useful to separate work messages from personal and spam (categories: Work, Personal, Spam). A suitable system message might be: “Classify the following email as Work, Personal, or Spam.” with a sample user message: “Great discounts on our new electronics! Click now and save.” For movie review sentiment, distinguish “Positive”, “Negative”, and “Neutral”: the system message could be “Determine the sentiment of the following movie review: Positive, Negative, or Neutral.” and the user message: “Visually stunning, but the plot is predictable and shallow.” For news, classify the topic as Politics, Technology, Sports, or Entertainment: “Determine the topic of the news item: Politics, Technology, Sports, or Entertainment.” with “A new smartphone model uses breakthrough technology that’s reshaping the industry.”
52 | 
53 | For product ratings from reviews, star classes work well — 1, 2, 3, 4, or 5: “Based on the review, assign a rating from 1 to 5 stars.” and “The design is interesting, but frequent breakdowns and weak support make it hard to recommend.” When routing customer requests, common intents are Billing, Support, Sales, or General Question — “Identify the intent of the request: Billing, Support, Sales, or General Question.” and “Tell me about available plans and current promotions.” For text genre classification, use categories like Fiction, Non‑fiction, Poetry, News — “Identify the genre of the text: Fiction, Non‑fiction, Poetry, or News.” and “In the heart of the city, among noisy streets, there was a garden untouched by time.”
54 | 
55 | On social media, automatic tone assessment is valuable — Serious, Ironic, Inspiring, or Irritated. A fitting system message: “Determine the tone of the following post: Serious, Ironic, Inspiring, or Irritated.” with a user example: “There’s nothing better than starting the day with a smile. Happiness is contagious!” In academic writing, classify the field: Biology, Computer Science, Psychology, or Mathematics — “Identify the field of the following abstract: Biology, Computer Science, Psychology, or Mathematics.” and “This study examines the algorithmic complexity of sorting methods and their efficiency.” In food reviews, you might extract the flavor profile: Sweet, Salty, Sour, Bitter, Umami — “Identify the flavor profile in the review: Sweet, Salty, Sour, Bitter, or Umami.” and “A dish with a perfect balance of umami and a light sweetness that enhances the taste.” Finally, for emergency calls, quickly determine the situation type: Fire, Medical, Crime, or Other — “Identify the emergency type from the call transcript: Fire, Medical, Crime, or Other.” and “The building next door is filled with smoke; we can see flames. Please help urgently!” In all of these cases, the key to quality answers is a clear system message that defines the boundaries and lists the categories; the user message remains a concise carrier of the text to be labeled.
56 | 
57 | For each scenario, you can freely change the `user_message` content for the specific case; the important part is keeping the system message concrete and unambiguous about the set of allowed labels.
58 | 
59 | ## Theory Questions
60 | 1. What are the key components of a message when working with GPT models (`role` and `content`), and why is it important to distinguish them?
61 | 2. How does the role of `system` messages differ from `user` messages in a dialogue with the AI?
62 | 3. Provide an example of how a `system` message can set the model’s behavior or response style.
63 | 4. How does the `system → user` message sequence influence the model’s answer?
64 | 5. In the review classification example, which categories are used?
65 | 6. Describe a scenario where classifying the sentiment of a movie review is useful. Which categories fit?
66 | 7. How does classifying the topic of a news article help with content management or recommendations? Give category options.
67 | 8. Discuss the importance of classifying customer requests in business. Which categories help optimize support?
68 | 9. What is the role of `user_message` in classification tasks, and how should it be structured for accurate results?
69 | 10. How is classifying the tone of social posts useful for moderation or marketing? Provide example categories.
70 | 
71 | 


--------------------------------------------------------------------------------
/docs/ru/CHAPTER-1/1.2 Классификация.md:
--------------------------------------------------------------------------------
 1 | # 1.2 Классификация
 2 | 
 3 | В задачах классификации мы задаём модели понятный контекст и просим отнести данный текст к одной из заранее определённых категорий. Роли сообщений в таком диалоге просты и взаимодополняемы: системное сообщение описывает постановку задачи и перечисляет допустимые классы, а пользовательское сообщение содержит сам фрагмент текста для отнесения к одному из этих классов. Такой порядок — сначала система, затем пользователь — формирует однозначный контекст, в рамках которого модель отвечает предсказуемо и воспроизводимо.
 4 | 
 5 | Чтобы зафиксировать формат, начнём с самого компактного примера: классифицируем отзыв клиента по тональности — «Положительный», «Отрицательный» или «Нейтральный». Системное сообщение даёт прямую инструкцию, пользовательское — представляет текст отзыва, который требуется оценить.
 6 | 
 7 | ```python
 8 | system_message = """Классифицируй отзыв клиента в одну из категорий: Положительный, Отрицательный или Нейтральный."""
 9 | ```
10 | 
11 | Для `user_message` используем отзыв, который нужно классифицировать:
12 | ```python
13 | user_message = """Я недавно купил товар в вашем магазине. Покупка прошла отлично, а качество товара превзошло мои ожидания!"""
14 | ```
15 | 
16 | Такой диалог строится на общей схеме Chat Completions API: каждое сообщение — это структура с ключами `role` и `content`. `role` указывает источник (система или пользователь), а `content` содержит текст. Разделение ролей позволяет инициализировать поведение модели заранее, а затем передавать ей конкретный ввод. В простейшем случае последовательность выглядит так: системное сообщение задаёт правила и стиль, а пользовательское формулирует задачу. Например, если вам нужно получить игривый стих о счастливой морковке, вы вначале можете указать установку модели `{'role': 'system', 'content': "Ты — помощник, отвечающий в стиле игривого поэта."}`, а затем отправить запрос `{'role': 'user', 'content': "Напиши очень короткое стихотворение о счастливой морковке."}`. Один и тот же пользовательский запрос в другом системном контексте дал бы уже иной по тону и форме результат, поэтому последовательность `system → user` — ключ к контролю поведения модели.
17 | 
18 | Полный рабочий пример для задачи классификации отзывов собирается из этих же элементов и отличается лишь тем, что мы вызываем модель программно и возвращаем ответ:
19 | 
20 | ```python
21 | import os
22 | from openai import OpenAI
23 | from dotenv import load_dotenv, find_dotenv
24 | _ = load_dotenv(find_dotenv())  # читаем локальный .env
25 | 
26 | client = OpenAI()
27 | 
28 | def classify(messages, model="gpt-4o-mini", temperature=0, max_tokens=500):
29 |     response = client.chat.completions.create(
30 |         model=model,
31 |         messages=messages,
32 |         temperature=temperature,
33 |         max_tokens=max_tokens,
34 |     )
35 |     return response.choices[0].message["content"]
36 | 
37 | delimiter = "####"
38 | system_message = """Классифицируй отзыв клиента в одну из категорий: Положительный, Отрицательный или Нейтральный."""
39 | 
40 | user_message = """Я недавно купил товар в вашем магазине. Покупка прошла отлично, а качество товара превзошло мои ожидания!"""
41 | 
42 | messages = [
43 |     {'role': 'system', 'content': system_message},
44 |     {'role': 'user', 'content': f"{delimiter}{user_message}{delimiter}"},
45 | ]
46 | 
47 | response = classify(messages)
48 | print(response)
49 | ```
50 | 
51 | На тех же принципах строятся и другие сценарии классификации. В электронной почте удобно отличать рабочие письма от личных и спама (категории: Работа, Личное, Спам). Формулировка системного сообщения может быть: «Классифицируй следующее письмо как Работа, Личное или Спам.», а пример пользовательского: «Отличные скидки на нашу новую электронику! Жми сейчас и экономь.» В анализе тональности рецензий на фильмы полезно различать «Положительный», «Отрицательный» и «Нейтральный» оттенки: системное сообщение — «Определи тональность следующего отзыва о фильме: Положительный, Отрицательный или Нейтральный.», пользовательское — «Визуально фильм потрясающий, но сюжет предсказуем и неглубокий.» Если речь о новостях, удобно классифицировать тему материала как Политика, Технологии, Спорт или Развлечения: «Определи тему новости: Политика, Технологии, Спорт или Развлечения.» и «Новая модель смартфона использует прорывные технологии, меняющие отрасль.»
52 | 
53 | Для оценок продуктов по отзывам подойдут классы по звёздам — 1, 2, 3, 4 или 5: «По содержанию отзыва определи рейтинг от 1 до 5 звезд.» и «Дизайн интересный, но частые поломки и слабая поддержка мешают рекомендовать.» При маршрутизации клиентских обращений часто выделяют намерения: Биллинг, Техподдержка, Продажи или Общий вопрос — «Определи намерение запроса: Биллинг, Техподдержка, Продажи или Общий вопрос.» и «Расскажите про доступные тарифы и текущие акции.» Определяя жанр текста, можно использовать категории Художественный, Нон-фикшн, Поэзия, Новости — «Определи жанр текста: Художественный, Нон-фикшн, Поэзия или Новости.» и «В сердце города, среди шумных улиц, скрывался сад, не тронутый временем.»
54 | 
55 | В социальных сетях ценна автоматическая оценка тона публикации — Серьёзный, Ироничный, Вдохновляющий или Раздражённый. Здесь подойдёт системное сообщение «Определи тон следующего поста: Серьезный, Ироничный, Вдохновляющий или Раздраженный.», а пример пользовательского: «Нет ничего лучше, чем начать день с улыбки. Счастье заразительно!» В академических текстах полезно классифицировать область знания: Биология, Компьютерные науки, Психология или Математика — «Определи область следующей аннотации: Биология, Компьютерные науки, Психология или Математика.» и «Исследование изучает алгоритмическую сложность методов сортировки и их эффективность.» В гастрономических отзывах возможно выделение вкусового профиля: Сладкий, Солёный, Кислый, Горький, Умами — «Определи вкусовой профиль в отзыве: Сладкий, Соленый, Кислый, Горький или Умами.» и «Блюдо с идеальным балансом умами и легкой сладостью, усиливающей вкус.» Наконец, для экстренных вызовов важно быстро определить тип ситуации: Пожар, Медицинский, Преступление или Другое — «Определи тип экстренной ситуации по расшифровке звонка: Пожар, Медицинский, Преступление или Другое.» и «Соседнее здание в дыму, видим пламя. Просьба срочно помочь!» Во всех этих случаях ключ к качественным ответам — ясная формулировка системного сообщения, где чётко указаны границы и перечень категорий; пользовательское сообщение остаётся лаконичным носителем того текста, который требуется отнести к одному из классов.
56 | 
57 | Для каждого из таких сценариев можно свободно менять содержимое `user_message` под конкретный кейс: важно, чтобы системное сообщение оставалось конкретным и однозначным относительно набора допустимых меток.
58 | 
59 | ## Теоретические вопросы
60 | 1. Каковы ключевые компоненты сообщения при работе с моделями GPT (ключи `role` и `content`), и почему важно их различать?
61 | 2. Чем роль сообщений `system` отличается от роли сообщений `user` в диалоге с ИИ?
62 | 3. Приведите пример того, как `system`-сообщение может задавать поведение или стиль ответа модели.
63 | 4. Как последовательность сообщений `system` → `user` влияет на ответ модели?
64 | 5. В рассмотренном примере классификации отзывов, какие категории используются?
65 | 6. Опишите сценарий, где полезна классификация тональности рецензии на фильм. Какие категории подойдут?
66 | 7. Как классификация темы новостной статьи помогает в управлении контентом или рекомендациях? Приведите варианты категорий.
67 | 8. Обсудите важность классификации клиентских обращений в бизнесе. Какие категории помогут оптимизировать поддержку?
68 | 9. Какова роль `user_message` в задачах классификации, и как его структурировать для получения точных результатов?
69 | 10. Чем полезна классификация тона постов в соцсетях для модерации или маркетинга? Приведите примеры категорий.
70 | 
71 | 


--------------------------------------------------------------------------------
/docs/CHAPTER-2/Answers 2.6.md:
--------------------------------------------------------------------------------
  1 | # Answers 2.6
  2 | 
  3 | ## Theory
  4 | 1. The three main stages involved in the question answering process of a RAG system are Query Reception, Document Retrieval, and Answer Generation.
  5 | 2. The limitations of passing all retrieved chunks into the LM's context window include constraints on the context window size, leading to potential loss of relevant information. Strategies to overcome this constraint include MapReduce and Refine, which allow for the aggregation or sequential refinement of information from multiple documents.
  6 | 3. The significance of using a Vector Database (VectorDB) in document retrieval for RAG systems lies in its ability to efficiently store and retrieve document embeddings, facilitating the quick and accurate retrieval of documents relevant to a user's query.
  7 | 4. The RetrievalQA chain combines document retrieval with question answering by utilizing language models to generate responses based on the content of retrieved documents, thereby enhancing the relevance and accuracy of answers provided to users.
  8 | 5. The MapReduce technique is designed for aggregating information from multiple documents quickly, while the Refine technique allows for the sequential refinement of an answer, making it more suitable for tasks requiring high accuracy and iterative improvement. The choice between them depends on the specific requirements of the task at hand.
  9 | 6. Practical considerations when implementing MapReduce or Refine techniques in a distributed system include paying attention to network latency and data serialization costs to ensure efficient data transfer and processing, which can significantly impact overall performance.
 10 | 7. Experimenting with both MapReduce and Refine techniques is crucial in a RAG system because their effectiveness can vary based on the nature of the data and the specifics of the question-answering task, and experimentation helps determine which technique yields the best results for a particular application.
 11 | 8. A major limitation of RetrievalQA chains is their inability to preserve conversational history, which impacts the flow of follow-up queries by making it challenging to maintain context and coherence in ongoing conversations.
 12 | 9. Integrating conversational memory into RAG systems is important because it enables the system to remember previous interactions, enhancing the system's ability to engage in meaningful dialogues with users by providing context-aware responses.
 13 | 10. Recommended areas for further reading and exploration include the latest advancements in language model technologies, their implications for RAG systems, and additional strategies for integrating conversational memory into RAG frameworks to advance understanding and implementation of sophisticated AI-driven interactions.
 14 | 
 15 | ## Practice
 16 | 1.
 17 | ```python
 18 | from langchain.vectorstores import Chroma
 19 | from langchain_openai import OpenAIEmbeddings
 20 | 
 21 | def initialize_vector_database(directory_path):
 22 |     # Initialize the embeddings generator using OpenAI's embeddings
 23 |     embeddings_generator = OpenAIEmbeddings()
 24 |     
 25 |     # Initialize the vector database with the specified storage directory and embedding function
 26 |     vector_database = Chroma(persist_directory=directory_path, embedding_function=embeddings_generator)
 27 |     
 28 |     # Display the current document count in the vector database to verify initialization
 29 |     document_count = vector_database._collection.count()  # Assuming the Chroma implementation provides a count method
 30 |     print(f"Document Count in VectorDB: {document_count}")
 31 | 
 32 | # Example usage:
 33 | documents_storage_directory = 'path/to/your/directory'
 34 | initialize_vector_database(documents_storage_directory)
 35 | ```
 36 | 
 37 | 2.
 38 | ```python
 39 | from langchain.vectorstores import Chroma
 40 | from langchain_openai import OpenAIEmbeddings, ChatOpenAI
 41 | from langchain.chains import RetrievalQA
 42 | from langchain.prompts import PromptTemplate
 43 | 
 44 | def setup_retrieval_qa_chain(model_name, documents_storage_directory):
 45 |     # Initialize the embeddings generator and vector database
 46 |     embeddings_generator = OpenAIEmbeddings()
 47 |     vector_database = Chroma(persist_directory=documents_storage_directory, embedding_function=embeddings_generator)
 48 | 
 49 |     # Initialize the language model
 50 |     language_model = ChatOpenAI(model=model_name, temperature=0)
 51 | 
 52 |     # Custom prompt template
 53 |     custom_prompt_template = """To better assist with the inquiry, consider the details provided below as your reference...
 54 | {context}
 55 | Inquiry: {question}
 56 | Insightful Response:"""
 57 | 
 58 |     # Initialize the RetrievalQA chain
 59 |     question_answering_chain = RetrievalQA.from_chain_type(
 60 |         language_model,
 61 |         retriever=vector_database.as_retriever(),
 62 |         return_source_documents=True,
 63 |         chain_type_kwargs={"prompt": PromptTemplate.from_template(custom_prompt_template)}
 64 |     )
 65 |     
 66 |     return question_answering_chain
 67 | 
 68 | # Example usage:
 69 | model_name = "gpt-4o-mini"
 70 | documents_storage_directory = 'path/to/your/documents'
 71 | qa_chain = setup_retrieval_qa_chain(model_name, documents_storage_directory)
 72 | ```
 73 | 
 74 | 3.
 75 | ```python
 76 | # Assuming the `setup_retrieval_qa_chain` function is defined in the same script or imported
 77 | 
 78 | # Setup for both techniques using the same model and document storage directory
 79 | model_name = "gpt-3.5-turbo"
 80 | documents_storage_directory = 'path/to/your/documents'
 81 | qa_chain = setup_retrieval_qa_chain(model_name, documents_storage_directory)
 82 | 
 83 | # Configure the question answering chains for MapReduce and Refine
 84 | question_answering_chain_map_reduce = RetrievalQA.from_chain_type(
 85 |     qa_chain.language_model,
 86 |     retriever=qa_chain.retriever,
 87 |     chain_type="map_reduce"
 88 | )
 89 | 
 90 | question_answering_chain_refine = RetrievalQA.from_chain_type(
 91 |     qa_chain.language_model,
 92 |     retriever=qa_chain.retriever,
 93 |     chain_type="refine"
 94 | )
 95 | 
 96 | # Sample query
 97 | query = "What is the importance of probability in machine learning?"
 98 | 
 99 | # Execute the MapReduce technique
100 | response_map_reduce = question_answering_chain_map_reduce({"query": query})
101 | print("MapReduce Answer:", response_map_reduce["result"])
102 | 
103 | # Execute the Refine technique
104 | response_refine = question_answering_chain_refine({"query": query})
105 | print("Refine Answer:", response_refine["result"])
106 | ```
107 | 
108 | 4.
109 | ```python
110 | def handle_conversational_context(initial_query, follow_up_query, qa_chain):
111 |     """
112 |     Simulates the handling of a follow-up question in a conversational context.
113 |     
114 |     Parameters:
115 |     - initial_query: The first user query.
116 |     - follow_up_query: The follow-up user query.
117 |     - qa_chain: An initialized question answering chain.
118 |     
119 |     Returns: None. Prints the responses to both queries.
120 |     """
121 |     # Generate a response to the initial query
122 |     initial_response = qa_chain({"query": initial_query})
123 |     print("Response to Initial Query:", initial_response["result"])
124 |     
125 |     # Generate a response to the follow-up query
126 |     follow_up_response = qa_chain({"query": follow_up_query})
127 |     print("Response to Follow-Up Query:", follow_up_response["result"])
128 | 
129 | # Example usage (assuming a question_answering_chain like the one set up previously):
130 | initial_query = "What is the significance of probability in statistics?"
131 | follow_up_query = "How does it apply to real-world problems?"
132 | # handle_conversational_context(initial_query, follow_up_query, question_answering_chain)
133 | ```


--------------------------------------------------------------------------------
/docs/ru/CHAPTER-2/Ответы 2.6.md:
--------------------------------------------------------------------------------
  1 | # Ответы 2.6
  2 | 
  3 | ## Теория
  4 | 1. **Три стадии RAG-QA**: Прием запроса, извлечение релевантных документов и генерация ответа.
  5 | 2. **Ограничение окна контекста**: Из-за ограничения размера контекстного окна LLM невозможно передать все фрагменты документов. Стратегии MapReduce и Refine помогают агрегировать или уточнять информацию из нескольких документов.
  6 | 3. **Векторная база данных**: Хранит эмбеддинги документов и обеспечивает быстрый поиск наиболее релевантных документов на основе их смыслового сходства.
  7 | 4. **Цепочка RetrievalQA**: Объединяет этапы поиска (Retrieval) и генерации ответа (QA), что повышает уместность и точность конечного результата.
  8 | 5. **MapReduce и Refine**: MapReduce позволяет быстро получить сводный ответ из большого количества документов, тогда как Refine обеспечивает последовательное уточнение информации, что критично для задач, где важна высокая точность. Выбор между ними зависит от конкретной задачи.
  9 | 6. **Распределенные системы**: При работе в распределенных системах необходимо учитывать сетевые задержки и механизмы сериализации данных.
 10 | 7. **Экспериментирование**: Рекомендуется экспериментировать с техниками MapReduce и Refine, поскольку их эффективность сильно зависит от типа данных и характера задаваемых вопросов.
 11 | 8. **Ограничение RetrievalQA**: Основным ограничением RetrievalQA является отсутствие встроенной истории диалога, что затрудняет поддержание контекста в последующих запросах.
 12 | 9. **Память диалога**: Необходима для учета предыдущих сообщений и обеспечения контекстных ответов в ходе продолжительной беседы.
 13 | 10. **Направления для дальнейшего изучения**: Новые подходы LLM, их влияние на RAG-системы и развитие стратегий управления памятью в RAG-цепочках.
 14 | 
 15 | ## Практические задания
 16 | 1.
 17 | ```python
 18 | from langchain.vectorstores import Chroma
 19 | from langchain_openai import OpenAIEmbeddings
 20 | 
 21 | def initialize_vector_database(directory_path):
 22 |     # Инициализируем генератор эмбеддингов, используя модель OpenAI для создания векторных представлений текста.
 23 |     embeddings_generator = OpenAIEmbeddings()
 24 |     
 25 |     # Инициализируем векторную базу данных Chroma, указывая директорию для сохранения данных
 26 |     # и функцию для генерации эмбеддингов.
 27 |     vector_database = Chroma(persist_directory=directory_path, embedding_function=embeddings_generator)
 28 |     
 29 |     # Отображаем текущее количество документов в векторной базе данных для проверки корректности инициализации.
 30 |     # Предполагается, что объект `_collection` в Chroma предоставляет метод `count()`.
 31 |     document_count = vector_database._collection.count() 
 32 |     print(f"Количество документов в VectorDB: {document_count}")
 33 | 
 34 | # Пример использования функции initialize_vector_database:
 35 | documents_storage_directory = 'path/to/your/directory'
 36 | initialize_vector_database(documents_storage_directory)
 37 | ```
 38 | 
 39 | 2.
 40 | ```python
 41 | from langchain.vectorstores import Chroma
 42 | from langchain_openai import OpenAIEmbeddings, ChatOpenAI
 43 | from langchain.chains import RetrievalQA
 44 | from langchain.prompts import PromptTemplate
 45 | 
 46 | def setup_retrieval_qa_chain(model_name, documents_storage_directory):
 47 |     # Инициализируем генератор эмбеддингов и векторную базу данных Chroma.
 48 |     embeddings_generator = OpenAIEmbeddings()
 49 |     vector_database = Chroma(persist_directory=documents_storage_directory, embedding_function=embeddings_generator)
 50 | 
 51 |     # Инициализируем языковую модель (LLM) для использования в цепочке RetrievalQA.
 52 |     language_model = ChatOpenAI(model=model_name, temperature=0)
 53 | 
 54 |     # Определяем пользовательский шаблон промпта для форматирования запросов к LLM.
 55 |     custom_prompt_template = """To better assist with the inquiry, consider the details provided below as your reference...
 56 | {context}
 57 | Inquiry: {question}
 58 | Insightful Response:"""
 59 | 
 60 |     # Инициализируем цепочку RetrievalQA, передавая языковую модель,
 61 |     # ретривер из векторной базы данных, и настраивая возвращение исходных документов,
 62 |     # а также используя пользовательский шаблон промпта.
 63 |     question_answering_chain = RetrievalQA.from_chain_type(
 64 |         language_model,
 65 |         retriever=vector_database.as_retriever(),
 66 |         return_source_documents=True,
 67 |         chain_type_kwargs={"prompt": PromptTemplate.from_template(custom_prompt_template)}
 68 |     )
 69 |     
 70 |     return question_answering_chain
 71 | 
 72 | # Пример использования функции setup_retrieval_qa_chain:
 73 | model_name = "gpt-4o-mini"
 74 | documents_storage_directory = 'path/to/your/documents'
 75 | qa_chain = setup_retrieval_qa_chain(model_name, documents_storage_directory)
 76 | ```
 77 | 
 78 | 3.
 79 | ```python
 80 | # Предполагаем, что функция `setup_retrieval_qa_chain` определена в том же скрипте или импортирована.
 81 | 
 82 | # Настройка для демонстрации обеих техник (MapReduce и Refine),
 83 | # используя ту же языковую модель и директорию хранения документов.
 84 | model_name = "gpt-3.5-turbo"
 85 | documents_storage_directory = 'path/to/your/documents'
 86 | qa_chain = setup_retrieval_qa_chain(model_name, documents_storage_directory)
 87 | 
 88 | # Настраиваем цепочки ответов на вопросы: одну для MapReduce, другую для Refine.
 89 | question_answering_chain_map_reduce = RetrievalQA.from_chain_type(
 90 |     qa_chain.language_model,
 91 |     retriever=qa_chain.retriever,
 92 |     chain_type="map_reduce" # Указываем тип цепочки как MapReduce
 93 | )
 94 | 
 95 | question_answering_chain_refine = RetrievalQA.from_chain_type(
 96 |     qa_chain.language_model,
 97 |     retriever=qa_chain.retriever,
 98 |     chain_type="refine" # Указываем тип цепочки как Refine
 99 | )
100 | 
101 | # Определяем пример запроса для тестирования обеих техник.
102 | query = "What is the importance of probability in machine learning?"
103 | 
104 | # Выполняем технику MapReduce и выводим полученный ответ.
105 | response_map_reduce = question_answering_chain_map_reduce({"query": query})
106 | print("Ответ MapReduce:", response_map_reduce["result"])
107 | 
108 | # Выполняем технику Refine и выводим полученный ответ.
109 | response_refine = question_answering_chain_refine({"query": query})
110 | print("Ответ Refine:", response_refine["result"])
111 | ```
112 | 
113 | 4.
114 | ```python
115 | def handle_conversational_context(initial_query, follow_up_query, qa_chain):
116 |     """
117 |     Симулирует обработку уточняющего вопроса в контексте продолжительной беседы.
118 |     
119 |     Параметры:
120 |     - initial_query (str): Первый пользовательский запрос.
121 |     - follow_up_query (str): Уточняющий пользовательский запрос, относящийся к предыдущему контексту.
122 |     - qa_chain (RetrievalQA): Инициализированная цепочка ответов на вопросы, способная обрабатывать запросы.
123 |     
124 |     Возвращает:
125 |     - None: Функция выводит ответы на оба запроса непосредственно в консоль.
126 |     """
127 |     # Генерируем ответ на первоначальный запрос пользователя.
128 |     initial_response = qa_chain({"query": initial_query})
129 |     print("Ответ на первоначальный запрос:", initial_response["result"])
130 |     
131 |     # Генерируем ответ на уточняющий запрос, который должен использовать контекст предыдущего диалога.
132 |     follow_up_response = qa_chain({"query": follow_up_query})
133 |     print("Ответ на уточняющий запрос:", follow_up_response["result"])
134 | 
135 | # Пример использования функции handle_conversational_context:
136 | # Предполагается, что `question_answering_chain` уже настроена, как показано ранее.
137 | initial_query = "What is the significance of probability in statistics?"
138 | follow_up_query = "How does it apply to real-world problems?"
139 | # handle_conversational_context(initial_query, follow_up_query, question_answering_chain)
140 | ```
141 | 


--------------------------------------------------------------------------------
/docs/en/CHAPTER-1/1.6 Building and Evaluating LLM Applications.md:
--------------------------------------------------------------------------------
  1 | # 1.6 Building and Evaluating LLM Applications
  2 | 
  3 | Building applications powered by large language models (LLMs) requires more than clean integration — it needs a systematic quality evaluation that covers both objective and subjective aspects. In practice, you combine accuracy, recall, and F1 (when gold answers are available) with user ratings and satisfaction metrics (CSA), while also tracking operational indicators like cost and latency. This blend exposes weak spots, informs release decisions, and guides targeted improvements.
  4 | 
  5 | The typical path to production starts with simple prompts and a small dataset for quick iteration; then you broaden coverage, complicate scenarios, refine metrics and quality criteria — remembering that perfection isn’t always necessary. It’s often enough to consistently solve the target tasks within quality and budget constraints. In high‑stakes scenarios (medicine, law enforcement, finance), stricter validation becomes essential: random sampling and hold‑out tests, bias and error checks, and attention to ethical and legal issues — preventing harm, ensuring explainability, and enabling audit.
  6 | 
  7 | Good engineering style emphasizes modularity and fast iteration, automated regression tests and measurements, thoughtful metric selection aligned with business goals, and mandatory bias/fairness analysis with regular reviews.
  8 | 
  9 | To make evaluation reproducible, use rubrics and evaluation protocols: define criteria in advance — relevance to user intent and context, factual correctness, completeness, and coherence/fluency — as well as the process, scales, and thresholds. For subjective tasks, use multiple independent raters and automatic consistency checks. Where possible, compare answers to ideal (expert) responses — a “gold standard” provides an anchor for more objective judgments. Here’s a small environment scaffold and call function for reproducible experiments and evaluations:
 10 | 
 11 | ```python
 12 | import os
 13 | from openai import OpenAI
 14 | from dotenv import load_dotenv
 15 | 
 16 | load_dotenv()
 17 | client = OpenAI()
 18 | 
 19 | def fetch_llm_response(prompts, model="gpt-4o-mini", temperature=0, max_tokens=500):
 20 |     response = client.chat.completions.create(
 21 |         model=model,
 22 |         messages=prompts,
 23 |         temperature=temperature,
 24 |         max_tokens=max_tokens,
 25 |     )
 26 |     return response.choices[0].message["content"]
 27 | ```
 28 | 
 29 | Next, formalize rubric‑based evaluation and assign weights to compute an overall score with detailed feedback. Below is a template where the model produces an assessment according to given criteria; the parsing is a stub and should be replaced with logic suited to your model’s output format:
 30 | 
 31 | ```python
 32 | def evaluate_response_against_detailed_rubric(test_data, llm_response):
 33 |     """
 34 |     Evaluate the answer on accuracy, relevance, completeness, and coherence.
 35 |     Return an overall score and detailed feedback.
 36 |     """
 37 |     rubric_criteria = {
 38 |         'accuracy': {'weight': 3, 'score': None, 'feedback': ''},
 39 |         'relevance': {'weight': 2, 'score': None, 'feedback': ''},
 40 |         'completeness': {'weight': 3, 'score': None, 'feedback': ''},
 41 |         'coherence': {'weight': 2, 'score': None, 'feedback': ''}
 42 |     }
 43 |     total_weight = sum(c['weight'] for c in rubric_criteria.values())
 44 | 
 45 |     system_prompt = "Assess the support agent’s answer given the provided context."
 46 |     evaluation_prompt = f"""\
 47 |     [Question]: {test_data['customer_query']}
 48 |     [Context]: {test_data['context']}
 49 |     [Expected answers]: {test_data.get('expected_answers', 'N/A')}
 50 |     [LLM answer]: {llm_response}
 51 | 
 52 |     Evaluate the answer on accuracy, relevance, completeness, and coherence.
 53 |     Provide scores (0–10) for each criterion and specific feedback.
 54 |     """
 55 | 
 56 |     evaluation_results = fetch_llm_response([
 57 |         {"role": "system", "content": system_prompt},
 58 |         {"role": "user", "content": evaluation_prompt},
 59 |     ])
 60 | 
 61 |     # Parsing stub — replace with real parsing of your model’s output
 62 |     for k in rubric_criteria:
 63 |         rubric_criteria[k]['score'] = 8
 64 |         rubric_criteria[k]['feedback'] = "Good performance on this criterion."
 65 | 
 66 |     overall = sum(v['score'] * v['weight'] for v in rubric_criteria.values()) / total_weight
 67 |     detailed = {k: {"score": v['score'], "feedback": v['feedback']} for k, v in rubric_criteria.items()}
 68 |     return {"overall_score": overall, "detailed_scores": detailed}
 69 | ```
 70 | 
 71 | When you need a gold‑standard comparison, explicitly compare the model’s answer with the ideal expert answer and score high‑priority criteria (factual accuracy, alignment, completeness, coherence). Here’s a skeleton that returns both an aggregate score and the raw comparison text for audit:
 72 | 
 73 | ```python
 74 | def detailed_evaluation_against_ideal_answer(test_data, llm_response):
 75 |     criteria = {
 76 |         'factual_accuracy': {'weight': 4, 'score': None, 'feedback': ''},
 77 |         'alignment_with_ideal': {'weight': 3, 'score': None, 'feedback': ''},
 78 |         'completeness': {'weight': 3, 'score': None, 'feedback': ''},
 79 |         'coherence': {'weight': 2, 'score': None, 'feedback': ''}
 80 |     }
 81 |     total = sum(c['weight'] for c in criteria.values())
 82 | 
 83 |     system_prompt = "Compare the LLM answer to the ideal answer, focusing on factual content and alignment."
 84 |     comparison_prompt = f"""\
 85 |     [Question]: {test_data['customer_query']}
 86 |     [Ideal answer]: {test_data['ideal_answer']}
 87 |     [LLM answer]: {llm_response}
 88 |     """
 89 | 
 90 |     evaluation_text = fetch_llm_response([
 91 |         {"role": "system", "content": system_prompt},
 92 |         {"role": "user", "content": comparison_prompt},
 93 |     ])
 94 | 
 95 |     # Parsing stub
 96 |     for k in criteria:
 97 |         criteria[k]['score'] = 8
 98 |         criteria[k]['feedback'] = "Good alignment with the gold answer."
 99 | 
100 |     score = sum(v['score'] * v['weight'] for v in criteria.values()) / total
101 |     return {"overall_score": score, "details": criteria, "raw": evaluation_text}
102 | ```
103 | 
104 | On top of these basics, add advanced techniques: evaluate semantic similarity via embeddings and similarity metrics (not just surface overlap), bring in independent reviewers for crowd evaluation, include automated checks for coherence and logic, and build adaptive evaluation frameworks tailored to your domain and task types. In production, continuous evaluation is crucial: track version and metric history; close the loop from user feedback back to development; include diverse cases, edge cases, and cultural/linguistic variation; involve experts (including blind reviews to reduce bias); compare with alternative models; and employ specialized “judges” to detect contradictions and factual errors. Together, rigorous methods and constant iteration — plus rubrics, gold standards, expert reviews, and automated checks — help you build reliable and ethical systems.
105 | 
106 | ## Theory Questions
107 | 1. Why evaluate LLM answers, and along which dimensions?
108 | 2. Give examples of metrics and explain their role in development.
109 | 3. What does the iterative path from development to production look like?
110 | 4. Why do high‑stakes scenarios require stricter rigor? Give examples.
111 | 5. List best practices for bootstrapping, iteration, and automated testing.
112 | 6. How do automated tests help development?
113 | 7. Why should metrics be tuned to the specific task?
114 | 8. How do you build a rubric and evaluation protocols?
115 | 9. Which advanced evaluation techniques apply and why?
116 | 10. How do continuous evaluation and broad test coverage improve reliability?
117 | 
118 | ## Practical Tasks
119 | 1. Write a function that reads the API key from the environment, queries the LLM, and measures runtime and tokens used.
120 | 
121 | 


--------------------------------------------------------------------------------
/docs/en/CHAPTER-2/2.4 The Power of Embeddings.md:
--------------------------------------------------------------------------------
  1 | # 2.4 The Power of Embeddings
  2 | 
  3 | Embeddings are numeric representations of text: words, sentences, and documents are mapped to vectors in a high‑dimensional space, and semantically similar texts end up close together geometrically. These representations are learned from large corpora: the model associates a word with its context and captures semantic relations, so synonyms and terms that appear in similar contexts lie nearby. As a result, semantic search goes beyond exact “keyword” matching: compute an embedding for each document (or chunk) and for the user query, compare vector proximity via cosine or another metric, and rank materials by semantic similarity — even without exact matches. This shifts how we analyze, store, and search: interactions become more meaningful and recommendations more precise.
  4 | 
  5 | On top of embeddings sit vector stores — databases optimized for vector storage and fast nearest‑neighbor search. They use specialized indexes and algorithms to answer similarity queries over large datasets and fit both research and production. Choose based on data size (from in‑memory options for small sets to distributed systems at scale), persistence (do you need durable disk storage or a transient store for prototypes), and use case (lab vs. production). For quick prototyping, Chroma is a common choice — a lightweight in‑memory store; for larger and long‑lived systems, use distributed/cloud vector DBs. In a typical semantic‑search pipeline, documents are first split into meaningful chunks, then embeddings are computed and indexed; on a query, its embedding is computed, nearest chunks are retrieved, and the extracted parts plus the query are fed to an LLM to generate a coherent answer.
  6 | 
  7 | Before diving into embeddings and vector DBs, prepare the environment: imports, API keys, and basic config.
  8 | 
  9 | ```python
 10 | import os
 11 | from openai import OpenAI
 12 | import sys
 13 | from dotenv import load_dotenv, find_dotenv
 14 | 
 15 | sys.path.append('../..')
 16 | 
 17 | load_dotenv(find_dotenv())
 18 | 
 19 | client = OpenAI()
 20 | ```
 21 | 
 22 | Next, load documents and split them into semantically meaningful fragments — this makes data easier to manage and prepares it for embedding creation. We’ll use a series of PDFs (with some “noise” like duplicates) for demonstration:
 23 | 
 24 | ```python
 25 | from langchain.document_loaders import PyPDFLoader
 26 | 
 27 | pdf_document_loaders = [
 28 |     PyPDFLoader("docs/doc1.pdf"),
 29 |     PyPDFLoader("docs/doc2.pdf"),
 30 |     PyPDFLoader("docs/doc3.pdf"),
 31 | ]
 32 | 
 33 | loaded_documents_content = []
 34 | 
 35 | for document_loader in pdf_document_loaders:
 36 |     loaded_documents_content.extend(document_loader.load())
 37 | ```
 38 | 
 39 | After loading, split documents into chunks to improve manageability and downstream efficiency:
 40 | 
 41 | ```python
 42 | from langchain.text_splitter import RecursiveCharacterTextSplitter
 43 | 
 44 | document_splitter = RecursiveCharacterTextSplitter(
 45 |     chunk_size=1500,
 46 |     chunk_overlap=150
 47 | )
 48 | document_splits = document_splitter.split_documents(documents)
 49 | ```
 50 | 
 51 | Now compute embeddings for each chunk: turn text into vectors that reflect semantic meaning.
 52 | 
 53 | ```python
 54 | from langchain_openai import OpenAIEmbeddings
 55 | import numpy as np
 56 | 
 57 | embedding_generator = OpenAIEmbeddings()
 58 | 
 59 | sentence_examples = ["I like dogs", "I like canines", "The weather is ugly outside"]
 60 | embeddings = [embedding_generator.embed_query(sentence) for sentence in sentence_examples]
 61 | 
 62 | similarity_dog_canine = np.dot(embeddings[0], embeddings[1])
 63 | similarity_dog_weather = np.dot(embeddings[0], embeddings[2])
 64 | ```
 65 | 
 66 | Index the vectors in a vector store to enable fast similarity search. For demos, Chroma — an in‑memory option — works well:
 67 | 
 68 | ```python
 69 | from langchain.vectorstores import Chroma
 70 | 
 71 | persist_directory = 'docs/chroma/'
 72 | 
 73 | !rm -rf ./docs/chroma
 74 | 
 75 | vector_database = Chroma.from_documents(
 76 |     documents=document_splits,
 77 |     embedding=embedding_generator,
 78 |     persist_directory=persist_directory
 79 | )
 80 | ```
 81 | 
 82 | Now perform a similarity search — this is where embeddings + vector DBs shine: quickly selecting the most relevant fragments for a query.
 83 | 
 84 | ```python
 85 | query = "Is there an email I can ask for help?"
 86 | retrieved_documents = vector_database.similarity_search(query, k=3)
 87 | print(retrieved_documents[0].page_content)
 88 | ```
 89 | 
 90 | Finally, consider edge cases and search quality improvements. Even a useful baseline runs into issues: duplicates and irrelevant documents are common problems that degrade results.
 91 | 
 92 | ```python
 93 | # Query example illustrating a failure mode
 94 | query_matlab = "What did they say about MATLAB?"
 95 | 
 96 | # Detect duplicate fragments in search results
 97 | retrieved_documents_matlab = vector_database.similarity_search(query_matlab, k=5)
 98 | ```
 99 | 
100 | From there, you can apply strategies to mitigate such failures and retrieve fragments that are both relevant and sufficiently diverse. Taken together, embeddings and vector DBs are a powerful pairing for semantic search over large corpora: solid text preparation, thoughtful indexing, and fast nearest‑neighbor querying enable systems that understand complex prompts; analyzing failures and adding techniques further improves robustness and accuracy. For deeper study, see the OpenAI API docs on embedding generation and surveys of vector databases that compare technologies and usage scenarios.
101 | 
102 | ## Theory Questions
103 | 
104 | 1. What is the primary goal of turning text into embeddings?
105 | 2. How do embeddings help measure semantic similarity of words and sentences?
106 | 3. Describe how word embeddings are created and the role of context.
107 | 4. How do embeddings improve semantic search over keyword‑based approaches?
108 | 5. What roles do document and query embeddings play in semantic search?
109 | 6. What is a vector store, and why is it important for efficient search?
110 | 7. What criteria matter when choosing a vector database?
111 | 8. Why is Chroma convenient for prototypes, and what are its limitations?
112 | 9. Describe a semantic‑search pipeline using embeddings and a vector DB.
113 | 10. How does document splitting improve search granularity and relevance?
114 | 11. Why embed chunks, and how does that help retrieval?
115 | 12. Why index the vector store for similarity search?
116 | 13. How is a query processed, and which similarity metrics are used?
117 | 14. How does answer generation improve UX in semantic‑search apps?
118 | 15. What environment setup steps are needed?
119 | 16. Give an example where loading and splitting text are critical to search quality.
120 | 17. How do embeddings “transform” text, and how can you demonstrate vector similarity?
121 | 18. What should you consider when configuring Chroma?
122 | 19. How does similarity search find relevant fragments?
123 | 20. What failures are typical in semantic search, and how can you address them?
124 | 
125 | ## Practical Tasks
126 | 
127 | 1. Implement `generate_embeddings` that returns a list of “embeddings” for strings (e.g., simulated by string length).
128 | 2. Implement `cosine_similarity` to compute cosine similarity between two vectors.
129 | 3. Create `SimpleVectorStore` with `add_vector` and `find_most_similar` (cosine‑based).
130 | 4. Load text from a file, split into chunks of a given size (e.g., 500 characters), and print them.
131 | 5. Implement `query_processing`: generate a query embedding (placeholder), find the nearest chunk in `SimpleVectorStore`, and print it.
132 | 6. Implement `remove_duplicates`: return a list without duplicate chunks (exact match or by similarity threshold).
133 | 7. Initialize `SimpleVectorStore`, add placeholder embeddings, run a semantic search, and print top‑3 results.
134 | 8. Implement `embed_and_store_documents`: generate placeholder embeddings for chunks, store them in `SimpleVectorStore`, and return it.
135 | 9. Implement `vector_store_persistence`: demonstrate saving/loading `SimpleVectorStore` (serialization/deserialization).
136 | 10. Implement `evaluate_search_accuracy`: for queries and expected chunks, run search and compute match rate.
137 | 


--------------------------------------------------------------------------------
/docs/en/CHAPTER-2/2.6 RAG — Techniques for QA.md:
--------------------------------------------------------------------------------
  1 | # 2.6 RAG Systems — Techniques for QA
  2 | 
  3 | Retrieval‑Augmented Generation (RAG) combines retrieval and generation, changing how we work with large corpora to build accurate QA systems and chatbots. A critical stage is feeding retrieved documents to the model along with the original query to generate an answer. After relevant materials are retrieved, they must be synthesized into a coherent answer that blends the content with the query’s context and leverages the model’s capabilities. The overall flow is simple: the system accepts a question; retrieves relevant fragments from a vector store; then feeds the retrieved content together with the question into an LLM to form an answer. By default, you can send all retrieved parts into context, but context‑window limits often lead to strategies like MapReduce, Refine, or Map‑Rerank — they aggregate or iteratively refine answers across many documents.
  4 | 
  5 | Before using an LLM for QA, ensure the environment is set up: imports, API keys, model versions, and so on.
  6 | 
  7 | ```python
  8 | import os
  9 | from openai import OpenAI
 10 | from dotenv import load_dotenv
 11 | import datetime
 12 | 
 13 | # Load environment variables and configure the OpenAI API key
 14 | load_dotenv()
 15 | client = OpenAI()
 16 | 
 17 | # Configure LLM versioning
 18 | current_date = datetime.datetime.now().date()
 19 | llm_name = "gpt-3.5-turbo"
 20 | print(f"Using LLM version: {llm_name}")
 21 | ```
 22 | 
 23 | Next, retrieve documents relevant to the query from a vector database (VectorDB), where embeddings are stored.
 24 | 
 25 | ```python
 26 | # Import the vector store and embedding generator
 27 | from langchain.vectorstores import Chroma
 28 | from langchain_openai import OpenAIEmbeddings
 29 | 
 30 | # Directory where the vector database persists its data
 31 | documents_storage_directory = 'docs/chroma/'
 32 | 
 33 | # Initialize the embedding generator using OpenAI embeddings
 34 | embeddings_generator = OpenAIEmbeddings()
 35 | 
 36 | # Initialize the vector database with the persistence directory and embedding function
 37 | vector_database = Chroma(persist_directory=documents_storage_directory, embedding_function=embeddings_generator)
 38 | 
 39 | # Show the current number of documents in the vector database
 40 | print(f"Documents in VectorDB: {vector_database._collection.count()}")
 41 | ```
 42 | 
 43 | `RetrievalQA` combines retrieval and generation: the LLM answers based on retrieved documents. First, initialize the language model,
 44 | 
 45 | ```python
 46 | from langchain_openai import ChatOpenAI
 47 | 
 48 | # Initialize the chat model with the selected LLM
 49 | language_model = ChatOpenAI(model=llm_name, temperature=0)
 50 | ```
 51 | 
 52 | then configure the RetrievalQA chain with a custom prompt,
 53 | 
 54 | ```python
 55 | # Import required LangChain modules
 56 | from langchain.chains import RetrievalQA
 57 | from langchain.prompts import PromptTemplate
 58 | 
 59 | # Create a custom prompt template to guide the LLM to use the provided context effectively
 60 | custom_prompt_template = """To better assist with the inquiry, consider the details provided below as your reference...
 61 | {context}
 62 | Inquiry: {question}
 63 | Insightful Response:"""
 64 | 
 65 | # Initialize the RetrievalQA chain with the custom prompt
 66 | a_question_answering_chain = RetrievalQA.from_chain_type(
 67 |     language_model,
 68 |     retriever=vector_database.as_retriever(),
 69 |     return_source_documents=True,
 70 |     chain_type_kwargs={"prompt": PromptTemplate.from_template(custom_prompt_template)}
 71 | )
 72 | ```
 73 | 
 74 | and check the answer on a simple query.
 75 | 
 76 | ```python
 77 | # Provide a sample query
 78 | query = "Is probability a class topic?"
 79 | response = a_question_answering_chain({"query": query})
 80 | print("Answer:", response["result"])
 81 | ```
 82 | 
 83 | Next come advanced QA chain types. MapReduce and Refine help work around context‑window limits when handling many documents: MapReduce aggregates in parallel, while Refine improves the answer sequentially.
 84 | 
 85 | ```python
 86 | # Configure a QA chain to use MapReduce, aggregating answers from multiple documents
 87 | question_answering_chain_map_reduce = RetrievalQA.from_chain_type(
 88 |     language_model,
 89 |     retriever=vector_database.as_retriever(),
 90 |     chain_type="map_reduce"
 91 | )
 92 | 
 93 | # Run MapReduce with the user query
 94 | response_map_reduce = question_answering_chain_map_reduce({"query": query})
 95 | 
 96 | # Show the aggregated answer
 97 | print("MapReduce answer:", response_map_reduce["result"])
 98 | 
 99 | # Configure a QA chain to use Refine, which iteratively improves the answer
100 | question_answering_chain_refine = RetrievalQA.from_chain_type(
101 |     language_model,
102 |     retriever=vector_database.as_retriever(),
103 |     chain_type="refine"
104 | )
105 | 
106 | # Run Refine with the same user query
107 | response_refine = question_answering_chain_refine({"query": query})
108 | 
109 | # Show the refined answer
110 | print("Refine answer:", response_refine["result"])
111 | ```
112 | 
113 | In practice, consider: choose between MapReduce and Refine based on the task (the former for fast aggregation from many sources; the latter for higher accuracy and iterative improvement); in distributed systems, performance depends on network latency and serialization; effectiveness varies with data, so experiment.
114 | 
115 | One notable limitation of RetrievalQA is the lack of dialogue history, which degrades handling of follow‑up questions. Demonstration of the limitation:
116 | 
117 | ```python
118 | # Import a QA chain from a hypothetical library
119 | from some_library import question_answering_chain as qa_chain
120 | 
121 | # Define an initial question related to course content
122 | initial_question_about_course_content = "Does the curriculum cover probability theory?"
123 | # Generate an answer to the initial question
124 | response_to_initial_question = qa_chain({"query": initial_question_about_course_content})
125 | 
126 | # Define a follow‑up question without explicitly preserving conversation context
127 | follow_up_question_about_prerequisites = "Why are those prerequisites important?"
128 | # Generate an answer to the follow‑up question
129 | response_to_follow_up_question = qa_chain({"query": follow_up_question_about_prerequisites})
130 | 
131 | # Display both answers — initial and follow‑up
132 | print("Answer to the initial question:", response_to_initial_question["result"])
133 | print("Answer to the follow‑up question:", response_to_follow_up_question["result"])
134 | ```
135 | 
136 | This underscores the importance of integrating conversation memory into RAG systems.
137 | 
138 | ## Conclusion
139 | 
140 | Advanced QA techniques in RAG deliver more dynamic and accurate answers. A careful `RetrievalQA` implementation and handling of its limitations enable building systems capable of substantive dialogue with users.
141 | 
142 | ## Further Reading
143 | 
144 | - Explore the latest advances in LLMs and their impact on RAG.
145 | - Investigate strategies for integrating conversation memory into RAG frameworks.
146 | 
147 | This chapter provides a foundation for understanding and practicing advanced QA techniques in RAG and for further innovation in AI interactions.
148 | 
149 | ## Theory Questions
150 | 
151 | 1. Name the three stages of QA in RAG.
152 | 2. What are context‑window limits, and how do MapReduce/Refine help work around them?
153 | 3. Why is a vector database (VectorDB) needed for retrieval in RAG?
154 | 4. How does `RetrievalQA` combine retrieval and generation?
155 | 5. Compare the MapReduce and Refine approaches.
156 | 6. Which practical factors matter in distributed systems (network latency, serialization)?
157 | 7. Why is it important to experiment with both approaches?
158 | 8. How does missing dialogue history affect handling of follow‑up questions?
159 | 9. Why integrate conversation memory into RAG?
160 | 10. What should be studied next to deepen RAG expertise?
161 | 
162 | ## Practical Tasks
163 | 
164 | 1. Initialize a vector DB (Chroma + OpenAIEmbeddings) and print the number of documents it contains.
165 | 2. Configure `RetrievalQA` with a custom prompt, specifying the model and the data storage directory.
166 | 3. Demonstrate `MapReduce` and `Refine` on a single query and print the resulting answers.
167 | 4. Simulate a follow‑up question without preserving dialogue context to show the `RetrievalQA` limitation.
168 | 
169 | 


--------------------------------------------------------------------------------
/docs/ru/CHAPTER-1/1.6 Построение и оценка LLM-приложений.md:
--------------------------------------------------------------------------------
  1 | # 1.6 Построение и оценка LLM-приложений
  2 | 
  3 | Разработка приложений на основе больших языковых моделей (LLM) — это не только грамотная интеграция, но и системная оценка качества, которая охватывает как объективные, так и субъективные аспекты. На практике приходится сочетать метрики точности, полноты и F1‑меры (когда доступны эталонные ответы) с пользовательскими рейтингами и метриками удовлетворённости (CSA), а также учитывать эксплуатационные показатели, такие как стоимость и задержка ответа. Такой набор измерений помогает диагностировать слабые места, принимать обоснованные решения о релизах и целенаправленно улучшать продукт. Типичный путь из разработки в продакшен начинается с простых промптов и небольшого набора данных для быстрых итераций; затем расширяется покрытие, усложняются сценарии, уточняются метрики и критерии качества — при этом не всегда нужна «идеальность», достаточно устойчиво решать целевые задачи в заданных ограничениях по качеству и бюджету. В сценариях высокого риска, где потенциальный вред особенно чувствителен (медицина, правоприменение, финансы), возрастает роль строгой валидации: рандомные выборки и hold‑out‑тесты, проверки на предвзятость и ошибки, а также внимание к этическим и юридическим аспектам — недопущение вреда, объяснимость решений и возможность аудита. Хороший инженерный стиль здесь подчёркивает модульность и быстрые итерации, автоматизацию регрессионных тестов и измерений, осмысленный выбор метрик под бизнес‑цели и обязательный анализ предвзятости и справедливости с регулярным ревью.
  4 | 
  5 | Чтобы наладить воспроизводимую оценку, удобно использовать рубрикаторы (rubric) и протоколы оценки: заранее описать критерии качества — релевантность намерению и контексту пользователя, фактическую корректность, полноту охвата и связность/беглость — а также процедуру, шкалы и пороги. В субъективных задачах уместны несколько независимых оценщиков и автоматические проверки согласованности. Там, где это возможно, полезно сравнивать ответы с идеальными (экспертными) — такой «эталон» служит ориентиром и позволяет объективнее судить о качестве. В помощь разработчику — простой каркас окружения и функций вызова модели для воспроизводимых экспериментов и оценок:
  6 | 
  7 | ```python
  8 | import os
  9 | from openai import OpenAI
 10 | from dotenv import load_dotenv
 11 | 
 12 | load_dotenv()
 13 | client = OpenAI()
 14 | 
 15 | def fetch_llm_response(prompts, model="gpt-4o-mini", temperature=0, max_tokens=500):
 16 |     response = client.chat.completions.create(
 17 |         model=model,
 18 |         messages=prompts,
 19 |         temperature=temperature,
 20 |         max_tokens=max_tokens,
 21 |     )
 22 |     return response.choices[0].message["content"]
 23 | ```
 24 | 
 25 | Далее можно формализовать оценку по рубрике и раздать весовые коэффициенты, чтобы получать интегральный балл и развёрнутую обратную связь. Ниже представлен шаблон, в котором модель формирует оценку по заданным критериям; разбор результата показан заглушкой и в рабочем проекте должен быть заменён на парсинг фактического формата ответа:
 26 | 
 27 | ```python
 28 | def evaluate_response_against_detailed_rubric(test_data, llm_response):
 29 |     """
 30 |     Оценивает ответ по критериям: точность, релевантность, полнота, связность.
 31 |     Возвращает интегральный балл и развёрнутую обратную связь.
 32 |     """
 33 |     rubric_criteria = {
 34 |         'accuracy': {'weight': 3, 'score': None, 'feedback': ''},
 35 |         'relevance': {'weight': 2, 'score': None, 'feedback': ''},
 36 |         'completeness': {'weight': 3, 'score': None, 'feedback': ''},
 37 |         'coherence': {'weight': 2, 'score': None, 'feedback': ''}
 38 |     }
 39 |     total_weight = sum(c['weight'] for c in rubric_criteria.values())
 40 | 
 41 |     system_prompt = "Оцените ответ агента службы поддержки с учётом предоставленного контекста."
 42 |     evaluation_prompt = f"""\
 43 |     [Вопрос]: {test_data['customer_query']}
 44 |     [Контекст]: {test_data['context']}
 45 |     [Ожидаемые ответы]: {test_data.get('expected_answers', 'Н/Д')}
 46 |     [Ответ LLM]: {llm_response}
 47 | 
 48 |     Оцените ответ на основе точности, релевантности, полноты и связности.
 49 |     Предоставьте баллы (от 0 до 10) для каждого критерия и конкретную обратную связь.
 50 |     """
 51 | 
 52 |     evaluation_results = fetch_llm_response([
 53 |         {"role": "system", "content": system_prompt},
 54 |         {"role": "user", "content": evaluation_prompt},
 55 |     ])
 56 | 
 57 |     # Заглушка парсинга — замените на реальный разбор структуры ответа вашей модели
 58 |     for k in rubric_criteria:
 59 |         rubric_criteria[k]['score'] = 8
 60 |         rubric_criteria[k]['feedback'] = "Хорошо по данному критерию."
 61 | 
 62 |     overall = sum(v['score'] * v['weight'] for v in rubric_criteria.values()) / total_weight
 63 |     detailed = {k: {"score": v['score'], "feedback": v['feedback']} for k, v in rubric_criteria.items()}
 64 |     return {"overall_score": overall, "detailed_scores": detailed}
 65 | ```
 66 | 
 67 | Когда необходима сверка с эталоном, удобно явно сопоставить ответ модели с идеальным экспертным ответом и на его основе выставить баллы по приоритетным критериям (фактическая точность, соответствие, полнота и связность). Ниже — каркас такой процедуры с возвратом как агрегированной оценки, так и сырого текста сравнения для аудита:
 68 | 
 69 | ```python
 70 | def detailed_evaluation_against_ideal_answer(test_data, llm_response):
 71 |     criteria = {
 72 |         'factual_accuracy': {'weight': 4, 'score': None, 'feedback': ''},
 73 |         'alignment_with_ideal': {'weight': 3, 'score': None, 'feedback': ''},
 74 |         'completeness': {'weight': 3, 'score': None, 'feedback': ''},
 75 |         'coherence': {'weight': 2, 'score': None, 'feedback': ''}
 76 |     }
 77 |     total = sum(c['weight'] for c in criteria.values())
 78 | 
 79 |     system_prompt = "Сравните ответ LLM с идеальным ответом, сосредоточившись на фактическом содержании и соответствии."
 80 |     comparison_prompt = f"""\
 81 |     [Вопрос]: {test_data['customer_query']}
 82 |     [Идеальный ответ]: {test_data['ideal_answer']}
 83 |     [Ответ LLM]: {llm_response}
 84 |     """
 85 | 
 86 |     evaluation_text = fetch_llm_response([
 87 |         {"role": "system", "content": system_prompt},
 88 |         {"role": "user", "content": comparison_prompt},
 89 |     ])
 90 | 
 91 |     # Заглушка парсинга
 92 |     for k in criteria:
 93 |         criteria[k]['score'] = 8
 94 |         criteria[k]['feedback'] = "Хорошее соответствие эталону."
 95 | 
 96 |     score = sum(v['score'] * v['weight'] for v in criteria.values()) / total
 97 |     return {"overall_score": score, "details": criteria, "raw": evaluation_text}
 98 | ```
 99 | 
100 | Поверх этих базовых механизмов полезно добавлять продвинутые техники: оценивать семантическую близость через эмбеддинги и метрики схожести (а не только поверхностные совпадения), привлекать независимых рецензентов для крауд‑оценки, внедрять автоматические проверки когерентности и логики, а также строить динамические фреймворки оценки, адаптируемые под домен и тип задач. В продакшене особенно важны практики непрерывной оценки: ведение истории версий и метрик, замыкание пользовательской обратной связи на процесс разработки; разнообразие кейсов, включая крайние случаи и культурно‑языковые вариации; работа с экспертами, в том числе слепые оценки для снижения предвзятости; сравнение с альтернативными моделями и применение специализированных «оценщиков» для поиска противоречий и фактических ошибок. Всё это складывается в цикл, где строгие методики соединяются с постоянными итерациями, а рубрикаторы, эталоны, экспертные рецензии и автоматические проверки помогают строить надёжные и этичные системы.
101 | 
102 | ## Теоретические вопросы
103 | 1. Зачем оценивать ответы LLM и по каким измерениям это следует делать?
104 | 2. Приведите примеры метрик и объясните их роль в разработке.
105 | 3. Как выглядит итеративный переход от разработки к продакшену?
106 | 4. Почему сценарии высокого риска требуют особой строгости, и приведите примеры таких приложений?
107 | 5. Перечислите лучшие практики для старта, итераций и автотестов.
108 | 6. Как автоматизация тестов помогает в процессе разработки?
109 | 7. Почему метрики следует настраивать под конкретную задачу?
110 | 8. Как построить рубрикатор и протоколы оценки?
111 | 9. Какие продвинутые техники оценки применимы и для чего?
112 | 10. Как непрерывная оценка и широкий охват тест-кейсов повышают надёжность системы?
113 | 
114 | ## Практические задания
115 | 1. Напишите функцию, которая читает API-ключ из окружения, запрашивает ответ у LLM и измеряет время выполнения и количество использованных токенов.
116 | 


--------------------------------------------------------------------------------
/docs/en/CHAPTER-1/Answers 1.5.md:
--------------------------------------------------------------------------------
  1 | # Answers 1.5
  2 | 
  3 | ## Theory
  4 | 
  5 | 1. Prompt chaining decomposes a complex task into sequential, interconnected steps (prompts), each solving a subtask. Unlike the “monolithic” approach, it simplifies and improves control.
  6 | 2. Analogies: step‑by‑step cooking of a complex dish; modular development where each module contributes to the final result.
  7 | 3. Workflow management in chaining means checkpointing state after each step and adapting the next step to results so far.
  8 | 4. Resource savings: each step processes only what’s needed, reducing computation versus one long prompt.
  9 | 5. Error reduction: focusing on a single subtask simplifies debugging and enables targeted improvements.
 10 | 6. Dynamic information loading matters due to context limits; chaining injects relevant data as needed.
 11 | 7. Core steps: task decomposition, state management, prompt design, data loading/pre‑processing, dynamic context injection.
 12 | 8. Best practices: avoid unnecessary complexity, write clear prompts, manage external context, aim for efficiency, and test continuously.
 13 | 9. The examples use `dotenv` and `openai` for configuration and API calls.
 14 | 10. The system message defines structure and format, increasing precision and consistency.
 15 | 11. The product database stores details; lookup functions by name or category support effective support answers.
 16 | 12. Converting JSON strings to Python objects simplifies downstream processing in chains.
 17 | 13. Formatting a user answer from data keeps interactions informative and relevant.
 18 | 14. Chaining lets the system move from the initial request to troubleshooting, warranty, and recommendations — covering complex support scenarios.
 19 | 
 20 | ## Practice
 21 | 
 22 | 1. `retrieve_model_response` function:
 23 |     ```python
 24 |     from openai import OpenAI
 25 | 
 26 |     client = OpenAI()
 27 | 
 28 |     def retrieve_model_response(message_sequence, model="gpt-4o-mini", temperature=0, max_tokens=500):
 29 |         response = client.chat.completions.create(
 30 |             model=model,
 31 |             messages=message_sequence,
 32 |             temperature=temperature,
 33 |             max_tokens=max_tokens,
 34 |         )
 35 |         return response.choices[0].message.content
 36 |     ```
 37 | 
 38 | 2. Extracting products/categories from a request:
 39 |     ```python
 40 |     system_instruction = """
 41 |     You will receive support requests. The request will be delimited by '####'.
 42 |     Output a Python list of objects, each representing a product or category mentioned in the request.
 43 |     """
 44 | 
 45 |     user_query = "#### Tell me about SmartX ProPhone and FotoSnap DSLR Camera, and also your televisions ####"
 46 | 
 47 |     message_sequence = [
 48 |         {'role': 'system', 'content': system_instruction},
 49 |         {'role': 'user', 'content': user_query},
 50 |     ]
 51 | 
 52 |     extracted_info = retrieve_model_response(message_sequence)
 53 |     print(extracted_info)
 54 |     ```
 55 | 
 56 | 3. Product database helpers:
 57 |     ```python
 58 |     product_database = {
 59 |         "SmartX ProPhone": {
 60 |             "name": "SmartX ProPhone",
 61 |             "category": "Smartphones and Accessories",
 62 |         },
 63 |         "FotoSnap DSLR Camera": {
 64 |             "name": "FotoSnap DSLR Camera",
 65 |             "category": "Cameras & Photography",
 66 |         },
 67 |         "UltraView HD TV": {
 68 |             "name": "UltraView HD TV",
 69 |             "category": "Televisions",
 70 |         },
 71 |     }
 72 | 
 73 |     def get_product_details_by_name(product_name):
 74 |         return product_database.get(product_name, "Product not found.")
 75 | 
 76 |     def get_products_in_category(category_name):
 77 |         return [p for p in product_database.values() if p["category"] == category_name]
 78 | 
 79 |     print(get_product_details_by_name("SmartX ProPhone"))
 80 |     print(get_products_in_category("Smartphones and Accessories"))
 81 |     ```
 82 | 
 83 | 4. JSON string to list:
 84 |     ```python
 85 |     import json
 86 | 
 87 |     def json_string_to_python_list(json_string):
 88 |         try:
 89 |             return json.loads(json_string)
 90 |         except json.JSONDecodeError as e:
 91 |             print(f"JSON decode error: {e}")
 92 |             return None
 93 | 
 94 |     json_input = '[{"category": "Smartphones and Accessories", "products": ["SmartX ProPhone"]}]'
 95 |     python_list = json_string_to_python_list(json_input)
 96 |     print(python_list)
 97 |     ```
 98 | 
 99 | 5. Generate a user‑facing answer:
100 |     ```python
101 |     def generate_response_from_data(product_data_list):
102 |         if not product_data_list:
103 |             return "We couldn't find products matching your request."
104 | 
105 |         response_string = ""
106 |         for product_data in product_data_list:
107 |             response_string += f"Product: {product_data['name']}\n"
108 |             response_string += f"Category: {product_data['category']}\n\n"
109 |         return response_string
110 | 
111 |     python_list = [{'category': 'Smartphones and Accessories', 'products': ['SmartX ProPhone']}]
112 |     final_response = generate_response_from_data(python_list)
113 |     print(final_response)
114 |     ```
115 | 
116 | 6. End‑to‑end support scenario: describe how the assistant handles an initial product inquiry, troubleshooting, a warranty question, and accessory recommendations using the functions above.
117 |     ```python
118 |     # 1) Initial product inquiry: extract entities and list details
119 |     system_instruction_catalog = """
120 |     You will receive support requests delimited by '####'.
121 |     Return a Python list of objects: mentioned products/categories.
122 |     """
123 | 
124 |     user_query_1 = "#### I'm interested in upgrading my smartphone. What can you tell me about the latest models? ####"
125 | 
126 |     message_sequence_1 = [
127 |         {'role': 'system', 'content': system_instruction_catalog},
128 |         {'role': 'user', 'content': user_query_1},
129 |     ]
130 |     extracted = retrieve_model_response(message_sequence_1)
131 |     print("Extracted entities:", extracted)
132 | 
133 |     # Suppose we parsed 'extracted' to a Python list called parsed_entities (omitted for brevity)
134 |     # You could then look up details via your product DB helpers:
135 |     # for e in parsed_entities: ... get_product_details_by_name(...), get_products_in_category(...)
136 | 
137 |     # 2) Troubleshooting: step‑by‑step guidance for a specific product issue
138 |     troubleshooting_query = "#### I just bought the FotoSnap DSLR Camera you recommended, but I can't pair it with my smartphone. What should I do? ####"
139 |     system_instruction_troubleshooting = "Provide step‑by‑step troubleshooting advice for the customer’s issue."
140 |     message_sequence_2 = [
141 |         {'role': 'system', 'content': system_instruction_troubleshooting},
142 |         {'role': 'user', 'content': troubleshooting_query},
143 |     ]
144 |     troubleshooting_response = retrieve_model_response(message_sequence_2)
145 |     print("Troubleshooting response:\n", troubleshooting_response)
146 | 
147 |     # 3) Warranty: clarify coverage details
148 |     follow_up_query = "#### Also, could you clarify what the warranty covers for the FotoSnap DSLR Camera? ####"
149 |     system_instruction_warranty = "Provide detailed information about the product’s warranty coverage."
150 |     message_sequence_3 = [
151 |         {'role': 'system', 'content': system_instruction_warranty},
152 |         {'role': 'user', 'content': follow_up_query},
153 |     ]
154 |     warranty_response = retrieve_model_response(message_sequence_3)
155 |     print("Warranty response:\n", warranty_response)
156 | 
157 |     # 4) Recommendations: suggest compatible accessories based on user interest
158 |     additional_assistance_query = "#### Given your interest in photography, would you like recommendations for lenses and tripods compatible with the FotoSnap DSLR Camera? ####"
159 |     system_instruction_recommendations = "Suggest accessories that complement the user’s existing products."
160 |     message_sequence_4 = [
161 |         {'role': 'system', 'content': system_instruction_recommendations},
162 |         {'role': 'user', 'content': additional_assistance_query},
163 |     ]
164 |     recommendations_response = retrieve_model_response(message_sequence_4)
165 |     print("Accessory recommendations:\n", recommendations_response)
166 |     ```
167 | 
168 |     This sequence demonstrates a complete, chained workflow where the assistant:
169 |     - Extracts mentioned entities and consults a product database.
170 |     - Provides step‑wise troubleshooting tailored to the problem.
171 |     - Explains warranty coverage clearly and concisely.
172 |     - Offers personalized accessory recommendations aligned with the user’s interests.
173 | 


--------------------------------------------------------------------------------
/docs/CHAPTER-1/Answers 1.3.md:
--------------------------------------------------------------------------------
  1 | # Answers 1.3 
  2 | 
  3 | ## Theory
  4 | 
  5 | 1. The key steps for integrating the OpenAI Moderation API into a platform include obtaining an API key from OpenAI, incorporating the API into the platform's backend using the OpenAI client library, and integrating it into the content submission workflow for real-time content analysis.
  6 | 2. Platforms can customize the OpenAI Moderation API by adjusting the sensitivity of the moderation filter, focusing on specific types of content violations, or incorporating custom blacklists or whitelists to tailor the moderation process to their specific needs.
  7 | 3. The OpenAI Moderation API's capabilities can be extended to images and videos by employing additional OpenAI tools or integrating third-party solutions, creating a robust moderation system that ensures all forms of content adhere to safety and appropriateness standards.
  8 | 4. Delimiters play a critical role in mitigating prompt injections by clearly separating user commands from system instructions, thus maintaining the integrity of system responses and preventing the system from misinterpreting concatenated inputs as part of its executable commands.
  9 | 5. Command isolation with delimiters enhances system security against prompt injections by ensuring a clear separation between executable commands and user data, accurately identifying the boundaries of user inputs, and preventing attackers from injecting malicious commands.
 10 | 6. Additional strategies to bolster defense against prompt injections include implementing strict input validation rules, operating with the least privilege necessary, defining allowlists for acceptable commands and inputs, employing regular expression checks, and implementing comprehensive monitoring and logging.
 11 | 7. Detecting prompt injections through direct evaluation involves the model evaluating user inputs for potential injections and responding with a nuanced understanding of whether an attempt is being made, thereby reducing false positives and enhancing the response mechanism.
 12 | 8. Once a potential prompt injection is detected, the system can respond by alerting the user, requesting clarification, isolating the input for human review, or dynamically adjusting sensitivity based on user behavior and context, thereby maintaining user engagement and trust.
 13 | 9. The benefits of direct evaluation for injection detection include precision in understanding user inputs, adaptability to evolve with new types of injections, and maintaining a positive user experience. The challenges include the complexity of developing such a model, the need for constant updates, and balancing security with usability.
 14 | 10. The integration of OpenAI's APIs and strategic measures against prompt injections significantly contributes to the safety and integrity of user-generated content platforms by providing real-time content analysis, customizing moderation processes, and employing sophisticated strategies against prompt injections, ensuring a positive and compliant user experience.
 15 | 
 16 | ## Practice
 17 | 1. 
 18 | ```python
 19 | from openai import OpenAI
 20 | 
 21 | client = OpenAI()
 22 | 
 23 | def moderate_content(content):
 24 |     response = client.moderations.create(model="omni-moderation-latest", input=content)
 25 |     return response.results[0].flagged
 26 | ```
 27 | 
 28 | 2. 
 29 | ```python
 30 | def sanitize_delimiter(input_text, delimiter):
 31 |     return input_text.replace(delimiter, "")
 32 | ```
 33 | 
 34 | 3. 
 35 | ```python
 36 | def validate_input_length(input_text, min_length=1, max_length=200):
 37 |     return min_length <= len(input_text) <= max_length
 38 | ```
 39 | 
 40 | 4. 
 41 | ```python
 42 | class UserSession:
 43 |     def __init__(self, user_id):
 44 |         self.user_id = user_id
 45 |         self.trust_level = 0  # Initialize trust level at 0
 46 |         self.sensitivity_level = 5  # Initialize sensitivity level at 5
 47 | 
 48 |     def adjust_sensitivity(self):
 49 |         # Adjust sensitivity based on trust level
 50 |         if self.trust_level > 5:
 51 |             self.sensitivity_level = max(1, self.sensitivity_level - 1)
 52 |         else:
 53 |             self.sensitivity_level = min(10, self.sensitivity_level + 1)
 54 | 
 55 |     def evaluate_input(self, user_input):
 56 |         # Simple heuristic for demonstration: consider input dangerous if it contains certain keywords
 57 |         dangerous_keywords = ["exec", "delete", "drop"]
 58 |         return any(keyword in user_input.lower() for keyword in dangerous_keywords)
 59 | 
 60 |     def handle_input(self, user_input):
 61 |         if self.evaluate_input(user_input):
 62 |             if self.trust_level < 5:
 63 |                 print("Your input has been flagged for review by our security team.")
 64 |             else:
 65 |                 print("Your input seems suspicious. Could you rephrase it or clarify your intention?")
 66 |         else:
 67 |             print("Input accepted. Thank you!")
 68 |         print("Remember: Always ensure your inputs are clear and do not contain commands that could be harmful or misunderstood.")
 69 | ```
 70 | 
 71 | 5. 
 72 | ```python
 73 | def direct_evaluation_for_injection(user_input):
 74 |     # Mock evaluation logic for detecting prompt injections
 75 |     if "ignore instructions" in user_input.lower() or "disregard previous guidelines" in user_input.lower():
 76 |         return 'Y'  # Injection attempt detected
 77 |     return 'N'  # No injection attempt detected
 78 | ```
 79 | 
 80 | 6. 
 81 | ```python
 82 | from openai import OpenAI
 83 | 
 84 | client = OpenAI()
 85 | 
 86 | # Function from Task 1: Moderate a single piece of content
 87 | def moderate_content(content):
 88 |     response = client.moderations.create(model="omni-moderation-latest", input=content)
 89 |     return response.results[0].flagged
 90 | 
 91 | # Function from Task 2: Sanitize delimiter from the input
 92 | def sanitize_delimiter(input_text, delimiter):
 93 |     return input_text.replace(delimiter, "")
 94 | 
 95 | # Function from Task 3: Validate input length
 96 | def validate_input_length(input_text, min_length=1, max_length=200):
 97 |     return min_length <= len(input_text) <= max_length
 98 | 
 99 | # Class from Task 4: UserSession with methods for handling user input
100 | class UserSession:
101 |     def __init__(self, user_id):
102 |         self.user_id = user_id
103 |         self.trust_level = 0
104 |         self.sensitivity_level = 5
105 | 
106 |     def adjust_sensitivity(self):
107 |         if self.trust_level > 5:
108 |             self.sensitivity_level = max(1, self.sensitivity_level - 1)
109 |         else:
110 |             self.sensitivity_level = min(10, self.sensitivity_level + 1)
111 | 
112 |     def evaluate_input(self, user_input):
113 |         dangerous_keywords = ["exec", "delete", "drop"]
114 |         return any(keyword in user_input.lower() for keyword in dangerous_keywords)
115 | 
116 |     def handle_input(self, user_input):
117 |         if self.evaluate_input(user_input):
118 |             if self.trust_level < 5:
119 |                 print("Your input has been flagged for review by our security team.")
120 |             else:
121 |                 print("Your input seems suspicious. Could you rephrase it or clarify your intention?")
122 |         else:
123 |             print("Input accepted. Thank you!")
124 |         print("Remember: Always ensure your inputs are clear and do not contain commands that could be harmful or misunderstood.")
125 | 
126 | # Function from Task 5: Direct evaluation for injection detection
127 | def direct_evaluation_for_injection(user_input):
128 |     if "ignore instructions" in user_input.lower() or "disregard previous guidelines" in user_input.lower():
129 |         return 'Y'
130 |     return 'N'
131 | 
132 | # Main workflow integration
133 | if __name__ == "__main__":
134 |     # Initialize a UserSession instance
135 |     session = UserSession(user_id=1)
136 |     
137 |     while True:
138 |         user_input = input("Enter your content (or type 'exit' to quit): ")
139 |         if user_input.lower() == 'exit':
140 |             break
141 | 
142 |         # Sanitize input
143 |         user_input = sanitize_delimiter(user_input, "####")
144 |         
145 |         # Validate input length
146 |         if not validate_input_length(user_input):
147 |             print("Input is either too short or too long. Please try again.")
148 |             continue
149 | 
150 |         # Moderate content
151 |         if moderate_content(user_input):
152 |             print("Content flagged as inappropriate. Please review your content.")
153 |             continue
154 | 
155 |         # Direct evaluation for injection
156 |         injection_attempt = direct_evaluation_for_injection(user_input)
157 |         if injection_attempt == 'Y':
158 |             print("Suspicious content detected. Please ensure your content adheres to our guidelines.")
159 |             continue
160 | 
161 |         # Handle input normally
162 |         session.handle_input(user_input)
163 | ```


--------------------------------------------------------------------------------