├── .gitattributes ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.yml │ ├── config.yml │ ├── feature_request.yml │ └── improve_existing_docs.yml └── workflows │ ├── python-package.yml │ └── release.yml ├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENCE ├── Makefile ├── README.md ├── SECURITY.md ├── app.py ├── example.ipynb ├── examples └── files │ └── facts.txt ├── megabots ├── __init__.py ├── api.py ├── bot.py ├── memory.py ├── prompt.py ├── utils.py └── vectorstore.py ├── requirements.txt ├── setup.py └── tests ├── __init__.py ├── test_api.py ├── test_bots.py ├── test_memory.py └── test_ui.py /.gitattributes: -------------------------------------------------------------------------------- 1 | # Ignore Jupyter notebooks from Git stats 2 | *.ipynb linguist-documentation -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.yml: -------------------------------------------------------------------------------- 1 | name: Bug Report 2 | description: File a bug report to help us improve 3 | labels: ['bug', 'triage'] 4 | assignees: 5 | - momegas 6 | body: 7 | - type: markdown 8 | attributes: 9 | value: '## :beetle: Bug Report :beetle:' 10 | - type: input 11 | id: title 12 | attributes: 13 | label: 'Title' 14 | description: 'Provide a brief, clear title for the bug report' 15 | placeholder: 'Example: Unexpected behavior when clicking "Submit"' 16 | validations: 17 | required: true 18 | - type: textarea 19 | id: description 20 | attributes: 21 | label: 'Description' 22 | description: 'Please provide a clear and concise description of the bug' 23 | placeholder: 'When I click "Submit", the page refreshes instead of showing a confirmation message' 24 | validations: 25 | required: true 26 | - type: textarea 27 | id: steps 28 | attributes: 29 | label: 'Steps to Reproduce' 30 | description: 'Provide the steps to reproduce the bug' 31 | placeholder: | 32 | 1. Go to '...' 33 | 2. Click on '....' 34 | 3. Scroll down to '....' 35 | 4. See the error 36 | validations: 37 | required: true 38 | - type: textarea 39 | id: expected 40 | attributes: 41 | label: 'Expected Behavior' 42 | description: 'Please describe the expected behavior' 43 | placeholder: 'After clicking "Submit", a confirmation message should appear' 44 | validations: 45 | required: true 46 | - type: textarea 47 | id: actual 48 | attributes: 49 | label: 'Actual Behavior' 50 | description: 'Please describe the actual behavior that you experienced' 51 | placeholder: 'The page refreshes without showing a confirmation message' 52 | validations: 53 | required: true 54 | - type: textarea 55 | id: additional-context 56 | attributes: 57 | label: 'Additional Context' 58 | description: 'Provide any additional context or information that may help diagnose the issue (e.g., browser version, operating system, etc.)' 59 | placeholder: 'The issue occurs in Chrome 96.0.4664.93 on macOS 11.6.1' 60 | validations: 61 | required: false 62 | - type: checkboxes 63 | id: python-version 64 | attributes: 65 | label: 'Python Version' 66 | description: 'Select the Python version(s) affected by this bug' 67 | options: 68 | - label: 'Python <= 3.9' 69 | - label: 'Python 3.10' 70 | - label: 'Python 3.11' 71 | validations: 72 | required: true 73 | - type: checkboxes 74 | id: acknowledgements 75 | attributes: 76 | label: 'Acknowledgements' 77 | description: 'Please confirm the following:' 78 | options: 79 | - label: 'I have searched the existing issues to make sure this bug has not been reported yet' 80 | required: true 81 | - label: 'I am using the latest version of the software' 82 | required: true 83 | - label: 'I have provided enough information for the maintainers to reproduce and diagnose the issue' 84 | required: true 85 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | contact_links: 3 | - name: Ask a question 4 | url: https://github.com/momegas/megabots/discussions/categories/q-a 5 | about: Ask questions and discuss with other community members 6 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.yml: -------------------------------------------------------------------------------- 1 | name: Feature Request 2 | description: Suggest a new feature or enhancement for this project 3 | labels: ['enhancement'] 4 | assignees: [momegas] 5 | 6 | body: 7 | - type: markdown 8 | attributes: 9 | value: '## :sparkles: Feature Request :sparkles:' 10 | - type: input 11 | id: title 12 | attributes: 13 | label: Title 14 | description: 'Provide a brief, clear title for the feature request' 15 | placeholder: 'Example: Add a "Dark Mode" theme option' 16 | validations: 17 | required: true 18 | - type: textarea 19 | id: problem 20 | attributes: 21 | label: Problem Description 22 | description: 'Describe the problem you are trying to solve or the limitation you are facing' 23 | placeholder: 'I often work at night and find the bright interface hard on my eyes / if there is an open issue please link it with #number' 24 | validations: 25 | required: true 26 | - type: textarea 27 | id: solution 28 | attributes: 29 | label: Proposed Solution 30 | description: 'Describe the solution you would like to see implemented' 31 | placeholder: 'Add a "Dark Mode" theme option in the settings that applies a dark color scheme to the interface' 32 | validations: 33 | required: true 34 | - type: textarea 35 | id: alternatives 36 | attributes: 37 | label: Alternatives Considered 38 | description: 'List any alternative solutions or features you have considered' 39 | placeholder: 'I have tried using browser extensions to apply a dark theme, but they do not work well with this application' 40 | validations: 41 | required: false 42 | - type: textarea 43 | id: additional-context 44 | attributes: 45 | label: Additional Context 46 | description: 'Provide any additional context or information about the feature request (e.g., screenshots, mockups, etc.)' 47 | placeholder: 'Here is a link to a sample "Dark Mode" color scheme I found online: https://example.com/dark-theme' 48 | validations: 49 | required: false 50 | - type: checkboxes 51 | id: acknowledgements 52 | attributes: 53 | label: Acknowledgements 54 | description: 'Please confirm the following:' 55 | options: 56 | - label: 'I have searched the existing issues to make sure this feature has not been requested yet' 57 | required: true 58 | - label: 'I have provided enough information for the maintainers to understand and evaluate the feature request' 59 | required: true 60 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/improve_existing_docs.yml: -------------------------------------------------------------------------------- 1 | name: Improve existing content 2 | description: Make a suggestion to improve the content in an existing article. 3 | labels: 4 | - content 5 | body: 6 | - type: markdown 7 | attributes: 8 | value: | 9 | * For questions, ask in [Discussions](https://github.com/momegas/megabots/discussions/categories/q-a). 10 | * Before you file an issue read the [Contributing guide](https://github.com/momegas/megabots/blob/docs/CONTRIBUTING.md). 11 | * Check to make sure someone hasn't already opened a similar [issue](https://github.com/momegas/megabots/issues). 12 | 13 | - type: checkboxes 14 | id: terms 15 | attributes: 16 | label: Code of Conduct 17 | description: This project has a Code of Conduct that all participants are expected to understand and follow. 18 | options: 19 | - label: I have read and agree to the projects [Code of Conduct](https://github.com/momegas/megabots/blob/main/CODE_OF_CONDUCT.md) 20 | required: true 21 | 22 | - type: textarea 23 | attributes: 24 | label: What article is affected? 25 | description: Please link to the article you'd like to see updated. 26 | validations: 27 | required: true 28 | 29 | - type: textarea 30 | attributes: 31 | label: What part(s) of the article would you like to see updated? 32 | description: | 33 | - Give as much detail as you can to help us understand the change you want to see. 34 | - Why should the docs be changed? What use cases does it support? 35 | - What is the expected outcome? 36 | validations: 37 | required: true 38 | 39 | - type: textarea 40 | attributes: 41 | label: Additional information 42 | description: Add any other context or screenshots about the feature request for the documentation here. 43 | validations: 44 | required: false 45 | -------------------------------------------------------------------------------- /.github/workflows/python-package.yml: -------------------------------------------------------------------------------- 1 | name: Python package 2 | 3 | on: 4 | push: 5 | branches: ["main"] 6 | pull_request: 7 | branches: ["main"] 8 | 9 | jobs: 10 | build_and_test: 11 | runs-on: ubuntu-latest 12 | environment: Development 13 | strategy: 14 | fail-fast: false 15 | matrix: 16 | python-version: ["3.10", "3.11"] 17 | 18 | steps: 19 | - uses: actions/checkout@v3 20 | - name: Set up Python ${{ matrix.python-version }} 21 | uses: actions/setup-python@v3 22 | with: 23 | python-version: ${{ matrix.python-version }} 24 | - name: Install dependencies 25 | run: | 26 | python -m pip install --upgrade pip 27 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 28 | - name: Test with pytest 29 | run: | 30 | export OPENAI_API_KEY=${{secrets.OPENAI_API_KEY}} 31 | pytest ./tests 32 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release PyPI Package 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | jobs: 8 | release_package: 9 | permissions: 10 | contents: write 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: Checkout 14 | uses: actions/checkout@v2 15 | with: 16 | fetch-depth: 0 17 | 18 | - name: Set up Python 3.11.3 19 | uses: actions/setup-python@v2 20 | with: 21 | python-version: 3.11.3 22 | 23 | - name: Build a binary wheel and a source tarball. 24 | run: pip install wheel && python setup.py sdist bdist_wheel 25 | 26 | - name: Publish distribution 📦 to PyPI 27 | if: startsWith(github.ref, 'refs/tags') 28 | uses: pypa/gh-action-pypi-publish@release/v1 29 | with: 30 | password: ${{ secrets.PYPI_API_TOKEN }} 31 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # General 2 | __pycache__ 3 | .pytest_cache 4 | **.egg-info 5 | dist 6 | build 7 | **.pickle 8 | **.pkl 9 | volumes 10 | docker-compose.yml 11 | 12 | # Environments 13 | .venv 14 | .env 15 | 16 | # Distribution / packaging 17 | .Python 18 | build/ 19 | develop-eggs/ 20 | dist/ 21 | downloads/ 22 | eggs/ 23 | .eggs/ 24 | lib/ 25 | lib64/ 26 | parts/ 27 | sdist/ 28 | var/ 29 | wheels/ 30 | pip-wheel-metadata/ 31 | share/python-wheels/ 32 | *.egg-info/ 33 | .installed.cfg 34 | *.egg 35 | MANIFEST 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Jupyter Notebook checkpoints 42 | .ipynb_checkpoints -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our 6 | community a harassment-free experience for everyone, regardless of age, body 7 | size, visible or invisible disability, ethnicity, sex characteristics, gender 8 | identity and expression, level of experience, education, socio-economic status, 9 | nationality, personal appearance, race, religion, or sexual identity 10 | and orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, welcoming, 13 | diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our 18 | community include: 19 | 20 | * Demonstrating empathy and kindness toward other people 21 | * Being respectful of differing opinions, viewpoints, and experiences 22 | * Giving and gracefully accepting constructive feedback 23 | * Accepting responsibility and apologizing to those affected by our mistakes, 24 | and learning from the experience 25 | * Focusing on what is best not just for us as individuals, but for the 26 | overall community 27 | 28 | Examples of unacceptable behavior include: 29 | 30 | * The use of sexualized language or imagery, and sexual attention or 31 | advances of any kind 32 | * Trolling, insulting or derogatory comments, and personal or political attacks 33 | * Public or private harassment 34 | * Publishing others' private information, such as a physical or email 35 | address, without their explicit permission 36 | * Other conduct which could reasonably be considered inappropriate in a 37 | professional setting 38 | 39 | ## Enforcement Responsibilities 40 | 41 | Community leaders are responsible for clarifying and enforcing our standards of 42 | acceptable behavior and will take appropriate and fair corrective action in 43 | response to any behavior that they deem inappropriate, threatening, offensive, 44 | or harmful. 45 | 46 | Community leaders have the right and responsibility to remove, edit, or reject 47 | comments, commits, code, wiki edits, issues, and other contributions that are 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation 49 | decisions when appropriate. 50 | 51 | ## Scope 52 | 53 | This Code of Conduct applies within all community spaces, and also applies when 54 | an individual is officially representing the community in public spaces. 55 | Examples of representing our community include using an official e-mail address, 56 | posting via an official social media account, or acting as an appointed 57 | representative at an online or offline event. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported to the community leaders responsible for enforcement at 63 | megaklis.vasilakis@gmail.com. 64 | All complaints will be reviewed and investigated promptly and fairly. 65 | 66 | All community leaders are obligated to respect the privacy and security of the 67 | reporter of any incident. 68 | 69 | ## Enforcement Guidelines 70 | 71 | Community leaders will follow these Community Impact Guidelines in determining 72 | the consequences for any action they deem in violation of this Code of Conduct: 73 | 74 | ### 1. Correction 75 | 76 | **Community Impact**: Use of inappropriate language or other behavior deemed 77 | unprofessional or unwelcome in the community. 78 | 79 | **Consequence**: A private, written warning from community leaders, providing 80 | clarity around the nature of the violation and an explanation of why the 81 | behavior was inappropriate. A public apology may be requested. 82 | 83 | ### 2. Warning 84 | 85 | **Community Impact**: A violation through a single incident or series 86 | of actions. 87 | 88 | **Consequence**: A warning with consequences for continued behavior. No 89 | interaction with the people involved, including unsolicited interaction with 90 | those enforcing the Code of Conduct, for a specified period of time. This 91 | includes avoiding interactions in community spaces as well as external channels 92 | like social media. Violating these terms may lead to a temporary or 93 | permanent ban. 94 | 95 | ### 3. Temporary Ban 96 | 97 | **Community Impact**: A serious violation of community standards, including 98 | sustained inappropriate behavior. 99 | 100 | **Consequence**: A temporary ban from any sort of interaction or public 101 | communication with the community for a specified period of time. No public or 102 | private interaction with the people involved, including unsolicited interaction 103 | with those enforcing the Code of Conduct, is allowed during this period. 104 | Violating these terms may lead to a permanent ban. 105 | 106 | ### 4. Permanent Ban 107 | 108 | **Community Impact**: Demonstrating a pattern of violation of community 109 | standards, including sustained inappropriate behavior, harassment of an 110 | individual, or aggression toward or disparagement of classes of individuals. 111 | 112 | **Consequence**: A permanent ban from any sort of public interaction within 113 | the community. 114 | 115 | ## Attribution 116 | 117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 118 | version 2.0, available at 119 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. 120 | 121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct 122 | enforcement ladder](https://github.com/mozilla/diversity). 123 | 124 | [homepage]: https://www.contributor-covenant.org 125 | 126 | For answers to common questions about this code of conduct, see the FAQ at 127 | https://www.contributor-covenant.org/faq. Translations are available at 128 | https://www.contributor-covenant.org/translations. 129 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | First of all, thank you for your interest in contributing! We appreciate your time and effort, and we value your contributions to make this project better. This document will provide you with the information you need to start contributing. 4 | 5 | ## How to Get Started 6 | 7 | 1. Clone the repository and create a new branch 8 | 2. Make your changes 9 | 3. Submit a pull request 10 | 4. Wait for a review 11 | 5. Tada! You're done! 12 | 13 | ## How to Report a Bug 14 | 15 | If you find a bug, please file an issue on the using the bug report template. 16 | 17 | ## How to Suggest a Feature or Enhancement 18 | 19 | If you have an idea for a new feature or enhancement, please file an issue on the using the feature request template. 20 | 21 | 🙏 Thank you 22 | -------------------------------------------------------------------------------- /LICENCE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Megaklis Vasilakis 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Define variables 2 | PYTHON=python 3 | PIP=pip 4 | PACKAGE=megabots 5 | 6 | .PHONY: install test clean build publish 7 | 8 | install: 9 | $(PIP) install -r requirements.txt 10 | 11 | test: 12 | $(PYTHON) -m pytest ./tests 13 | 14 | clean: 15 | rm -rf build dist *.egg-info .pytest_cache ./**/__pycache__ 16 | 17 | build: 18 | $(PYTHON) setup.py sdist bdist_wheel 19 | 20 | publish: clean build 21 | $(PYTHON) -m twine upload dist/* 22 | 23 | trace: 24 | langchain-server 25 | 26 | freeze: 27 | $(PIP) freeze > requirements.txt 28 | 29 | gradio: 30 | gradio app.py 31 | 32 | help: 33 | @echo "install - install dependencies" 34 | @echo "test - run tests" 35 | @echo "clean - remove build artifacts" 36 | @echo "build - build package" 37 | @echo "publish - publish package to PyPI" 38 | @echo "help - show this help message" -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 🤖 Megabots 2 | 3 | [![Tests](https://github.com/momegas/qnabot/actions/workflows/python-package.yml/badge.svg)](https://github.com/momegas/qnabot/actions/workflows/python-package.yml) 4 | [![Python Version](https://img.shields.io/badge/python-%203.10%20-blue.svg)](#supported-python-versions) 5 | [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) 6 | [![License](https://img.shields.io/badge/License-MIT-informational.svg)](https://github.com/momegas/megabots/blob/main/LICENCE) 7 | ![](https://dcbadge.vercel.app/api/server/zkqDWk5S7P?style=flat&n&compact=true) 8 | 9 | 🤖 Megabots provides State-of-the-art, production ready LLM apps made mega-easy, so you don't have to build them from scratch 🤯 Create a bot, now 🫵 10 | 11 | - 👉 Join us on Discord: https://discord.gg/zkqDWk5S7P 12 | - ✈️ Work is managed in this project: https://github.com/users/momegas/projects/5/views/2 13 | - 🤖 Documentation bot: https://huggingface.co/spaces/momegas/megabots 14 | 15 | **The Megabots library can be used to create bots that:** 16 | 17 | - ⌚️ are production ready, in minutes 18 | - 🗂️ can answer questions over documents 19 | - 💾 can connect to vector databases 20 | - 🎖️ automatically expose the bot as a rebust API using FastAPI (early release) 21 | - 🏓 automatically expose the bot as a UI using Gradio 22 | 23 | 🤖 Megabots is backed by some of the most famous tools for productionalising AI. It uses [LangChain](https://docs.langchain.com/docs/) for managing LLM chains, [langchain-serve](https://github.com/jina-ai/langchain-serve) to create a production ready API, [Gradio](https://gradio.app/) to create a UI. At the moment it uses [OpenAI](https://openai.com/) to generate answers, but we plan to support other LLMs in the future. 24 | 25 | ## Getting started 26 | 27 | Note: This is a work in progress. The API might change. 28 | 29 | ```bash 30 | pip install megabots 31 | ``` 32 | 33 | ```python 34 | from megabots import bot 35 | import os 36 | 37 | os.environ["OPENAI_API_KEY"] = "my key" 38 | 39 | # Create a bot 👉 with one line of code. Automatically loads your data from ./index or index.pkl. 40 | # Keep in mind that you need to have one or another. 41 | qnabot = bot("qna-over-docs") 42 | 43 | # Ask a question 44 | answer = qnabot.ask("How do I use this bot?") 45 | 46 | # Save the index to save costs (GPT is used to create the index) 47 | qnabot.save_index("index.pkl") 48 | 49 | # Load the index from a previous run 50 | qnabot = bot("qna-over-docs", index="./index.pkl") 51 | 52 | # Or create the index from a directory of documents 53 | qnabot = bot("qna-over-docs", index="./index") 54 | 55 | # Change the model 56 | qnabot = bot("qna-over-docs", model="text-davinci-003") 57 | ``` 58 | 59 | ## Changing the bot's prompt 60 | 61 | You can change the bots promnpt to customize it to your needs. In the `qna-over-docs` type of bot you will need to pass 2 variables for the `context` (knwoledge searched from the index) and the `question` (the human question). 62 | 63 | ```python 64 | from megabots import bot 65 | 66 | prompt = """ 67 | Use the following pieces of context to answer the question at the end. 68 | If you don't know the answer, just say that you don't know, don't try to make up an answer. 69 | Answer in the style of Tony Stark. 70 | 71 | {context} 72 | 73 | Question: {question} 74 | Helpful humorous answer:""" 75 | 76 | qnabot = bot("qna-over-docs", index="./index.pkl", prompt=prompt) 77 | 78 | qnabot.ask("what was the first roster of the avengers?") 79 | ``` 80 | 81 | ## Working with memory 82 | 83 | You can easily add memory to your `bot` using the `memory` parameter. It accepts a string with the type of the memory to be used. This defaults to some sane dafaults. 84 | Should you need more configuration, you can use the `memory` function and pass the type of memory and the configuration you need. 85 | 86 | ```python 87 | from megabots import bot 88 | 89 | qnabot = bot("qna-over-docs", index="./index.pkl", memory="conversation-buffer") 90 | 91 | print(qnabot.ask("who is iron man?")) 92 | print(qnabot.ask("was he in the first roster?")) 93 | # Bot should understand who "he" refers to. 94 | ``` 95 | 96 | Or using the `memory`factory function 97 | 98 | ```python 99 | from megabots import bot, memory 100 | 101 | mem("conversation-buffer-window", k=5) 102 | 103 | qnabot = bot("qna-over-docs", index="./index.pkl", memory=mem) 104 | 105 | print(qnabot.ask("who is iron man?")) 106 | print(qnabot.ask("was he in the first roster?")) 107 | ``` 108 | 109 | NOTE: For the `qna-over-docs` bot, when using memory and passing your custom prompt, it is important to remember to pass one more variable to your custom prompt to facilitate for chat history. The variable name is `history`. 110 | 111 | ```python 112 | from megabots import bot 113 | 114 | prompt = """ 115 | Use the following pieces of context to answer the question at the end. 116 | If you don't know the answer, just say that you don't know, don't try to make up an answer. 117 | 118 | {context} 119 | 120 | {history} 121 | Human: {question} 122 | AI:""" 123 | 124 | qnabot = bot("qna-over-docs", prompt=prompt, index="./index.pkl", memory="conversation-buffer") 125 | 126 | print(qnabot.ask("who is iron man?")) 127 | print(qnabot.ask("was he in the first roster?")) 128 | ``` 129 | 130 | ## Using Megabots with Milvus (more DBs comming soon) 131 | 132 | Megabots `bot` can also use Milvus as a backend for its search engine. You can find an example of how to do it below. 133 | 134 | In order to run Milvus you need to follow [this guide](https://milvus.io/docs/example_code.md) to download a docker compose file and run it. 135 | The command is: 136 | 137 | ```bash 138 | wget https://raw.githubusercontent.com/milvus-io/pymilvus/v2.2.7/examples/hello_milvus.py 139 | ``` 140 | 141 | You can then [install Attu](https://milvus.io/docs/attu_install-docker.md) as a management tool for Milvus 142 | 143 | ```python 144 | from megabots import bot 145 | 146 | # Attach a vectorstore by passing the name of the database. Default port for milvus is 19530 and default host is localhost 147 | # Point it to your files directory so that it can index the files and add them to the vectorstore 148 | bot = bot("qna-over-docs", index="./examples/files/", vectorstore="milvus") 149 | 150 | bot.ask("what was the first roster of the avengers?") 151 | ``` 152 | 153 | Or use the `vectorstore` factory function for more customisation 154 | 155 | ```python 156 | 157 | from megabots import bot, vectorstore 158 | 159 | milvus = vectorstore("milvus", host="localhost", port=19530) 160 | 161 | bot = bot("qna-over-docs", index="./examples/files/", vectorstore=milvus) 162 | ``` 163 | 164 | ## Exposing an API with [langchain-serve](https://github.com/jina-ai/langchain-serve) 165 | 166 | You can also expose the bot endpoints locally using langchain-serve. A sample file `api.py` is provided in the `megabots` folder. 167 | 168 | To expose the API locally, you can do 169 | ```bash 170 | lc-serve deploy local megabots.api 171 | ``` 172 | 173 | You should then be able to visit `http://localhost:8000/docs` to see & interact with the API documentation. 174 | 175 | To deploy your API to the cloud, you can do and connect to the API using the endpoint provided in the output. 176 | ```bash 177 | lc-serve deploy jcloud megabots.api 178 | ``` 179 | 180 |
181 | Show command output 182 | 183 | ```text 184 | ╭──────────────┬──────────────────────────────────────────────────────────────────────────────────────╮ 185 | │ App ID │ langchain-dec14439a6 │ 186 | ├──────────────┼──────────────────────────────────────────────────────────────────────────────────────┤ 187 | │ Phase │ Serving │ 188 | ├──────────────┼──────────────────────────────────────────────────────────────────────────────────────┤ 189 | │ Endpoint │ https://langchain-dec14439a6.wolf.jina.ai │ 190 | ├──────────────┼──────────────────────────────────────────────────────────────────────────────────────┤ 191 | │ App logs │ dashboards.wolf.jina.ai │ 192 | ├──────────────┼──────────────────────────────────────────────────────────────────────────────────────┤ 193 | │ Swagger UI │ https://langchain-dec14439a6.wolf.jina.ai/docs │ 194 | ├──────────────┼──────────────────────────────────────────────────────────────────────────────────────┤ 195 | │ OpenAPI JSON │ https://langchain-dec14439a6.wolf.jina.ai/openapi.json │ 196 | ╰──────────────┴──────────────────────────────────────────────────────────────────────────────────────╯ 197 | ``` 198 |
199 | 200 | 201 | You can read more about langchain-serve [here](https://github.com/jina-ai/langchain-server). 202 | 203 | ## Exposing a Gradio chat-like interface 204 | 205 | You can expose a gradio UI for the bot using `create_interface` function. 206 | Assuming your file is called `ui.py` run `gradio qnabot/ui.py` to run the UI locally. 207 | You should then be able to visit `http://127.0.0.1:7860` to see the API documentation. 208 | 209 | ```python 210 | from megabots import bot, create_interface 211 | 212 | demo = create_interface(bot("qna-over-docs")) 213 | ``` 214 | 215 | ## Customising bot 216 | 217 | The `bot` function should serve as the starting point for creating and customising your bot. Below is a list of the available arguments in `bot`. 218 | 219 | | Argument | Description | 220 | | ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | 221 | | task | The type of bot to create. Available options: `qna-over-docs`. More comming soon | 222 | | index | Specifies the index to use for the bot. It can either be a saved index file (e.g., `index.pkl`) or a directory of documents (e.g., `./index`). In the case of the directory the index will be automatically created. If no index is specified `bot` will look for `index.pkl` or `./index` | 223 | | model | The name of the model to use for the bot. You can specify a different model by providing its name, like "text-davinci-003". Supported models: `gpt-3.5-turbo` (default),`text-davinci-003` More comming soon. | 224 | | prompt | A string template for the prompt, which defines the format of the question and context passed to the model. The template should include placeholder variables like so: `context`, `{question}` and in the case of using memory `history`. | 225 | | memory | The type of memory to be used by the bot. Can be a string with the type of the memory or you can use `memory` factory function. Supported memories: `conversation-buffer`, `conversation-buffer-window` | 226 | | vectorstore | The vectorstore to be used for the index. Can be a string with the name of the databse or you can use `vectorstore` factory function. Supported DBs: `milvus`. | 227 | 228 | | sources | When `sources` is `True` the bot will also include sources in the response. A known [issue](https://github.com/hwchase17/langchain/issues/2858) exists, where if you pass a custom prompt with sources the code breaks. | 229 | 230 | ## How QnA bot works 231 | 232 | Large language models (LLMs) are powerful, but they can't answer questions about documents they haven't seen. If you want to use an LLM to answer questions about documents it was not trained on, you have to give it information about those documents. To solve this, we use "retrieval augmented generation." 233 | 234 | In simple terms, when you have a question, you first search for relevant documents. Then, you give the documents and the question to the language model to generate an answer. To make this work, you need your documents in a searchable format (an index). This process involves two main steps: (1) preparing your documents for easy querying, and (2) using the retrieval augmented generation method. 235 | 236 | `qna-over-docs` uses FAISS to create an index of documents and GPT to generate answers. 237 | 238 | ```mermaid 239 | sequenceDiagram 240 | actor User 241 | participant API 242 | participant LLM 243 | participant Vectorstore 244 | participant IngestionEngine 245 | participant DataLake 246 | autonumber 247 | 248 | Note over API, DataLake: Ingestion phase 249 | loop Every X time 250 | IngestionEngine ->> DataLake: Load documents 251 | DataLake -->> IngestionEngine: Return data 252 | IngestionEngine -->> IngestionEngine: Split documents and Create embeddings 253 | IngestionEngine ->> Vectorstore: Store documents and embeddings 254 | end 255 | 256 | Note over API, DataLake: Generation phase 257 | 258 | User ->> API: Receive user question 259 | API ->> Vectorstore: Lookup documents in the index relevant to the question 260 | API ->> API: Construct a prompt from the question and any relevant documents 261 | API ->> LLM: Pass the prompt to the model 262 | LLM -->> API: Get response from model 263 | API -->> User: Return response 264 | 265 | ``` 266 | 267 | ## How to contribute? 268 | 269 | We welcome any suggestions, problem reports, and contributions! 270 | For any changes you would like to make to this project, we invite you to submit an [issue](https://github.com/momegas/megabots/issues). 271 | 272 | For more information, see [`CONTRIBUTING`](https://github.com/momegas/megabots/blob/main/CONTRIBUTING.md) instructions. 273 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | ## Supported Versions 4 | 5 | Use this section to tell people about which versions of your project are 6 | currently being supported with security updates. 7 | 8 | | Version | Supported | 9 | | ------- | ------------------ | 10 | | 0.0.x | :white_check_mark: | 11 | 12 | ## Reporting a Vulnerability 13 | 14 | Use this section to tell people how to report a vulnerability. 15 | 16 | To report a vulnerability use [this link](https://github.com/momegas/megabots/issues/new/choose) to open a new bug 17 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file is an example of what you can build with 🤖Megabots. 3 | It is hosted here: https://huggingface.co/spaces/momegas/megabots 4 | 5 | """ 6 | 7 | from megabots import bot, create_interface 8 | 9 | prompt = """ 10 | You are programming assistant that helps programmers develop apps with the Megabots library. 11 | Use the following pieces of context to answer the question at the end. 12 | If you don't know the answer, just say that you don't know, don't try to make up an answer. 13 | If the question asks for python code you can provide it. 14 | 15 | Context: 16 | {context} 17 | 18 | Conversation history: 19 | {history} 20 | Human: {question} 21 | AI: 22 | """ 23 | 24 | qnabot = bot( 25 | "qna-over-docs", 26 | index="./examples/files", 27 | memory="conversation-buffer-window", 28 | prompt=prompt, 29 | ) 30 | 31 | 32 | text = """ 33 | You can ask this bot anything about 🤖Megabots. Here are some examples: 34 | - What is Megabots? 35 | - How can I create a bot? 36 | - How can I change the prompt? 37 | - How can I create a bot that has memory and can connect to a milvus vector database? 38 | - How can I customise the bot function? 39 | - How can I an API out of my bot? 40 | - How can I an intrface out of my bot? 41 | - Where can i find the megabots repo? 42 | """ 43 | 44 | iface = create_interface(qnabot, text) 45 | iface.launch() 46 | -------------------------------------------------------------------------------- /example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": {}, 7 | "source": [ 8 | "# Examples\n", 9 | "\n", 10 | "Below you can find some examples of how to use the 🤖 `Megabots` library.\n" 11 | ] 12 | }, 13 | { 14 | "attachments": {}, 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "### Creating a bot\n", 19 | "\n", 20 | "The `bot` object is the main object of the library. It is used to create a bot and to interact with it.\n", 21 | "\n", 22 | "The `index` argument specifies the index to use for the bot. It can either be a saved index file (e.g., `index.pkl`) or a directory of documents (e.g., `./index`). In the case of the directory the index will be automatically created. If no index is specified the `bot` will look for `index.pkl` or `./index`." 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 1, 28 | "metadata": {}, 29 | "outputs": [ 30 | { 31 | "name": "stderr", 32 | "output_type": "stream", 33 | "text": [ 34 | "/Users/momegas/Desktop/qnabot/.venv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", 35 | " from .autonotebook import tqdm as notebook_tqdm\n" 36 | ] 37 | }, 38 | { 39 | "name": "stdout", 40 | "output_type": "stream", 41 | "text": [ 42 | "Using model: gpt-3.5-turbo\n", 43 | "Loading path from pickle file: ./index.pkl ...\n" 44 | ] 45 | }, 46 | { 47 | "data": { 48 | "text/plain": [ 49 | "'The first roster of the Avengers included Iron Man, Thor, Hulk, Ant-Man, and the Wasp.'" 50 | ] 51 | }, 52 | "execution_count": 1, 53 | "metadata": {}, 54 | "output_type": "execute_result" 55 | } 56 | ], 57 | "source": [ 58 | "from megabots import bot\n", 59 | "\n", 60 | "qnabot = bot(\"qna-over-docs\", index=\"./index.pkl\")\n", 61 | "\n", 62 | "qnabot.ask(\"what was the first roster of the avengers?\")\n" 63 | ] 64 | }, 65 | { 66 | "attachments": {}, 67 | "cell_type": "markdown", 68 | "metadata": {}, 69 | "source": [ 70 | "### Changing the bot's prompt\n", 71 | "\n", 72 | "You can change the bot's promnpt to customize it to your needs. In the `qna-over-docs` type of bot you will need to pass 2 variables for the `context` (knwoledge searched from the index) and the `question` (the human question).\n" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 2, 78 | "metadata": {}, 79 | "outputs": [ 80 | { 81 | "name": "stdout", 82 | "output_type": "stream", 83 | "text": [ 84 | "Using model: gpt-3.5-turbo\n", 85 | "Loading path from pickle file: ./index.pkl ...\n" 86 | ] 87 | }, 88 | { 89 | "data": { 90 | "text/plain": [ 91 | "'The first roster of the Avengers included Iron Man, Thor, Hulk, Ant-Man, and the Wasp.'" 92 | ] 93 | }, 94 | "execution_count": 2, 95 | "metadata": {}, 96 | "output_type": "execute_result" 97 | } 98 | ], 99 | "source": [ 100 | "from megabots import bot\n", 101 | "\n", 102 | "prompt = \"\"\"\n", 103 | "Use the following pieces of context to answer the question at the end. \n", 104 | "If you don't know the answer, just say that you don't know, don't try to make up an answer.\n", 105 | "Answer in the style of Tony Stark.\n", 106 | "\n", 107 | "{context}\n", 108 | "\n", 109 | "Question: {question}\n", 110 | "Helpful humorous answer:\"\"\"\n", 111 | "\n", 112 | "qnabot = bot(\"qna-over-docs\", index=\"./index.pkl\", prompt=prompt)\n", 113 | "\n", 114 | "qnabot.ask(\"what was the first roster of the avengers?\")\n" 115 | ] 116 | }, 117 | { 118 | "attachments": {}, 119 | "cell_type": "markdown", 120 | "metadata": {}, 121 | "source": [ 122 | "### Using Megabots with Milvus\n", 123 | "\n", 124 | "Megabots `bot` can also use Milvus as a backend for its search engine. You can find an example of how to do it below.\n", 125 | "\n", 126 | "In order to run Milvus you need to follow [this guide](https://milvus.io/docs/example_code.md) to download a docker compose file and run it.\n", 127 | "The command is:\n", 128 | "\n", 129 | "```bash\n", 130 | "wget https://raw.githubusercontent.com/milvus-io/pymilvus/v2.2.7/examples/hello_milvus.py\n", 131 | "```\n", 132 | "\n", 133 | "You can then [install Attu](https://milvus.io/docs/attu_install-docker.md) as a management tool for Milvus\n" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": 3, 139 | "metadata": {}, 140 | "outputs": [ 141 | { 142 | "name": "stdout", 143 | "output_type": "stream", 144 | "text": [ 145 | "Using model: gpt-3.5-turbo\n" 146 | ] 147 | }, 148 | { 149 | "data": { 150 | "text/plain": [ 151 | "'The first roster of the Avengers included Iron Man, Thor, Hulk, Ant-Man, and the Wasp.'" 152 | ] 153 | }, 154 | "execution_count": 3, 155 | "metadata": {}, 156 | "output_type": "execute_result" 157 | } 158 | ], 159 | "source": [ 160 | "from megabots import bot\n", 161 | "\n", 162 | "# Attach a vectorstore by passing the name of the database. Default port for milvus is 19530 and default host is localhost\n", 163 | "# Point it to your files directory so that it can index the files and add them to the vectorstore\n", 164 | "bot = bot(\"qna-over-docs\", index=\"./examples/files/\", vectorstore=\"milvus\")\n", 165 | "\n", 166 | "bot.ask(\"what was the first roster of the avengers?\")\n" 167 | ] 168 | }, 169 | { 170 | "attachments": {}, 171 | "cell_type": "markdown", 172 | "metadata": {}, 173 | "source": [ 174 | "Or use the `vectorstore` factory function for more customisation\n" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": 4, 180 | "metadata": {}, 181 | "outputs": [ 182 | { 183 | "name": "stdout", 184 | "output_type": "stream", 185 | "text": [ 186 | "Using model: gpt-3.5-turbo\n" 187 | ] 188 | } 189 | ], 190 | "source": [ 191 | "from megabots import bot, vectorstore\n", 192 | "\n", 193 | "milvus = vectorstore(\"milvus\", host=\"localhost\", port=19530)\n", 194 | "\n", 195 | "bot = bot(\"qna-over-docs\", index=\"./examples/files/\", vectorstore=milvus)\n" 196 | ] 197 | }, 198 | { 199 | "attachments": {}, 200 | "cell_type": "markdown", 201 | "metadata": {}, 202 | "source": [ 203 | "### Working with memory\n", 204 | "\n", 205 | "You can easily add memory to your `bot` using the `memory` parameter. It accepts a string with the type of the memory to be used. This defaults to some sane dafaults.\n", 206 | "Should you need more configuration, you can use the `memory` function and pass the type of memory and the configuration you need.\n" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": 5, 212 | "metadata": {}, 213 | "outputs": [ 214 | { 215 | "name": "stdout", 216 | "output_type": "stream", 217 | "text": [ 218 | "Using model: gpt-3.5-turbo\n", 219 | "Loading path from pickle file: ./index.pkl ...\n", 220 | "Iron Man is a superhero character who is a member of the Avengers. He is known for his high-tech suit of armor and his alter ego, Tony Stark.\n", 221 | "Yes, Iron Man was part of the original Avengers lineup.\n" 222 | ] 223 | } 224 | ], 225 | "source": [ 226 | "from megabots import bot\n", 227 | "\n", 228 | "qnabot = bot(\"qna-over-docs\", index=\"./index.pkl\", memory=\"conversation-buffer\")\n", 229 | "\n", 230 | "print(qnabot.ask(\"who is iron man?\"))\n", 231 | "print(qnabot.ask(\"was he in the first roster?\"))\n" 232 | ] 233 | }, 234 | { 235 | "attachments": {}, 236 | "cell_type": "markdown", 237 | "metadata": {}, 238 | "source": [ 239 | "Or using the `memory`factory function" 240 | ] 241 | }, 242 | { 243 | "cell_type": "code", 244 | "execution_count": 6, 245 | "metadata": {}, 246 | "outputs": [ 247 | { 248 | "name": "stdout", 249 | "output_type": "stream", 250 | "text": [ 251 | "Using model: gpt-3.5-turbo\n", 252 | "Loading path from pickle file: ./index.pkl ...\n", 253 | "Iron Man is a superhero character who is a member of the Avengers. He is known for his high-tech suit of armor and his alter ego, Tony Stark.\n", 254 | "Yes, Iron Man was part of the original Avengers lineup.\n" 255 | ] 256 | } 257 | ], 258 | "source": [ 259 | "from megabots import bot, memory\n", 260 | "\n", 261 | "qnabot = bot(\n", 262 | " \"qna-over-docs\",\n", 263 | " index=\"./index.pkl\",\n", 264 | " memory=memory(\"conversation-buffer-window\", k=5),\n", 265 | ")\n", 266 | "\n", 267 | "print(qnabot.ask(\"who is iron man?\"))\n", 268 | "print(qnabot.ask(\"was he in the first roster?\"))\n" 269 | ] 270 | }, 271 | { 272 | "attachments": {}, 273 | "cell_type": "markdown", 274 | "metadata": {}, 275 | "source": [ 276 | "NOTE: For the `qna-over-docs` bot, when using memory and passing your custom prompt, it is important to remember to pass one more variable to your custom prompt to facilitate for chat history. The variable name is `history`.\n" 277 | ] 278 | }, 279 | { 280 | "cell_type": "code", 281 | "execution_count": 7, 282 | "metadata": {}, 283 | "outputs": [ 284 | { 285 | "name": "stdout", 286 | "output_type": "stream", 287 | "text": [ 288 | "Using model: gpt-3.5-turbo\n", 289 | "Loading path from pickle file: ./index.pkl ...\n", 290 | "Iron Man is a superhero character who is a member of the Avengers. He is a wealthy businessman named Tony Stark who uses his advanced technology to create a suit of armor that gives him superhuman abilities.\n", 291 | "Yes, Iron Man was part of the original Avengers lineup.\n" 292 | ] 293 | } 294 | ], 295 | "source": [ 296 | "from megabots import bot\n", 297 | "\n", 298 | "prompt = \"\"\"\n", 299 | "Use the following pieces of context to answer the question at the end. \n", 300 | "If you don't know the answer, just say that you don't know, don't try to make up an answer.\n", 301 | "\n", 302 | "{context}\n", 303 | "\n", 304 | "{history}\n", 305 | "Human: {question}\n", 306 | "AI:\"\"\"\n", 307 | "\n", 308 | "qnabot = bot(\n", 309 | " \"qna-over-docs\",\n", 310 | " prompt=prompt,\n", 311 | " index=\"./index.pkl\",\n", 312 | " memory=\"conversation-buffer\",\n", 313 | ")\n", 314 | "\n", 315 | "print(qnabot.ask(\"who is iron man?\"))\n", 316 | "print(qnabot.ask(\"was he in the first roster?\"))" 317 | ] 318 | } 319 | ], 320 | "metadata": { 321 | "kernelspec": { 322 | "display_name": ".venv", 323 | "language": "python", 324 | "name": "python3" 325 | }, 326 | "language_info": { 327 | "codemirror_mode": { 328 | "name": "ipython", 329 | "version": 3 330 | }, 331 | "file_extension": ".py", 332 | "mimetype": "text/x-python", 333 | "name": "python", 334 | "nbconvert_exporter": "python", 335 | "pygments_lexer": "ipython3", 336 | "version": "3.10.0" 337 | }, 338 | "orig_nbformat": 4 339 | }, 340 | "nbformat": 4, 341 | "nbformat_minor": 2 342 | } 343 | -------------------------------------------------------------------------------- /examples/files/facts.txt: -------------------------------------------------------------------------------- 1 | # 🤖 Megabots 2 | 3 | author: Megaklis Vasilakis 4 | author_email: megaklis.vasilakis@gmail.com 5 | repo: https://github.com/momegas/megabots 6 | 7 | 🤖 Megabots provides State-of-the-art, production ready LLM apps made mega-easy, so you don't have to build them from scratch 🤯 Create a bot, now 🫵 8 | 9 | - 👉 Join us on Discord: https://discord.gg/zkqDWk5S7P 10 | - ✈️ Work is managed in this project: https://github.com/users/momegas/projects/5/views/2 11 | 12 | **The Megabots library can be used to create bots that:** 13 | 14 | - ⌚️ are production ready, in minutes 15 | - 🗂️ can answer questions over documents 16 | - 💾 can connect to vector databases 17 | - 🎖️ automatically expose the bot as a rebust API using FastAPI (early release) 18 | - 🏓 automatically expose the bot as a UI using Gradio 19 | 20 | **Coming soon:** 21 | 22 | - 🗣️ accept voice as an input using [whisper](https://github.com/openai/whisper) 23 | - 👍 validate and correct the outputs of LLMs using [guardrails](https://github.com/ShreyaR/guardrails) 24 | - 💰 semanticly cache LLM Queries and reduce Costs by 10x using [GPTCache](https://github.com/zilliztech/GPTCache) 25 | - 🏋️ mega-easy LLM training 26 | - 🚀 mega-easy deployment 27 | 28 | 🤖 Megabots is backed by some of the most famous tools for productionalising AI. It uses [LangChain](https://docs.langchain.com/docs/) for managing LLM chains, [FastAPI](https://fastapi.tiangolo.com/) to create a production ready API, [Gradio](https://gradio.app/) to create a UI. At the moment it uses [OpenAI](https://openai.com/) to generate answers, but we plan to support other LLMs in the future. 29 | 30 | ## Getting started 31 | 32 | Note: This is a work in progress. The API might change. 33 | 34 | ```bash 35 | pip install megabots 36 | ``` 37 | 38 | ```python 39 | from megabots import bot 40 | import os 41 | 42 | os.environ["OPENAI_API_KEY"] = "my key" 43 | 44 | # Create a bot 👉 with one line of code. Automatically loads your data from ./index or index.pkl. 45 | # Keep in mind that you need to have one or another. 46 | qnabot = bot("qna-over-docs") 47 | 48 | # Ask a question 49 | answer = bot.ask("How do I use this bot?") 50 | 51 | # Save the index to save costs (GPT is used to create the index) 52 | bot.save_index("index.pkl") 53 | 54 | # Load the index from a previous run 55 | qnabot = bot("qna-over-docs", index="./index.pkl") 56 | 57 | # Or create the index from a directory of documents 58 | qnabot = bot("qna-over-docs", index="./index") 59 | 60 | # Change the model 61 | qnabot = bot("qna-over-docs", model="text-davinci-003") 62 | ``` 63 | 64 | ## Changing the bot's prompt 65 | 66 | You can change the bots promnpt to customize it to your needs. In the `qna-over-docs` type of bot you will need to pass 2 variables for the `context` (knwoledge searched from the index) and the `question` (the human question). 67 | 68 | ```python 69 | from megabots import bot 70 | 71 | prompt = """ 72 | Use the following pieces of context to answer the question at the end. 73 | If you don't know the answer, just say that you don't know, don't try to make up an answer. 74 | Answer in the style of Tony Stark. 75 | 76 | {context} 77 | 78 | Question: {question} 79 | Helpful humorous answer:""" 80 | 81 | qnabot = bot("qna-over-docs", index="./index.pkl", prompt=prompt) 82 | 83 | qnabot.ask("what was the first roster of the avengers?") 84 | ``` 85 | 86 | ## Working with memory 87 | 88 | You can easily add memory to your `bot` using the `memory` parameter. It accepts a string with the type of the memory to be used. This defaults to some sane dafaults. 89 | Should you need more configuration, you can use the `memory` function and pass the type of memory and the configuration you need. 90 | 91 | ```python 92 | from megabots import bot 93 | 94 | qnabot = bot("qna-over-docs", index="./index.pkl", memory="conversation-buffer") 95 | 96 | print(qnabot.ask("who is iron man?")) 97 | print(qnabot.ask("was he in the first roster?")) 98 | # Bot should understand who "he" refers to. 99 | ``` 100 | 101 | Or using the `memory`factory function 102 | 103 | ```python 104 | from megabots import bot, memory 105 | 106 | mem("conversation-buffer-window", k=5) 107 | 108 | qnabot = bot("qna-over-docs", index="./index.pkl", memory=mem) 109 | 110 | print(qnabot.ask("who is iron man?")) 111 | print(qnabot.ask("was he in the first roster?")) 112 | ``` 113 | 114 | NOTE: For the `qna-over-docs` bot, when using memory and passing your custom prompt, it is important to remember to pass one more variable to your custom prompt to facilitate for chat history. The variable name is `history`. 115 | 116 | ```python 117 | from megabots import bot 118 | 119 | prompt = """ 120 | Use the following pieces of context to answer the question at the end. 121 | If you don't know the answer, just say that you don't know, don't try to make up an answer. 122 | 123 | {context} 124 | 125 | {history} 126 | Human: {question} 127 | AI:""" 128 | 129 | qnabot = bot("qna-over-docs", prompt=prompt, index="./index.pkl", memory="conversation-buffer") 130 | 131 | print(qnabot.ask("who is iron man?")) 132 | print(qnabot.ask("was he in the first roster?")) 133 | ``` 134 | 135 | ## Using Megabots with Milvus (more DBs comming soon) 136 | 137 | Megabots `bot` can also use Milvus as a backend for its search engine. You can find an example of how to do it below. 138 | 139 | In order to run Milvus you need to follow [this guide](https://milvus.io/docs/example_code.md) to download a docker compose file and run it. 140 | The command is: 141 | 142 | ```bash 143 | wget https://raw.githubusercontent.com/milvus-io/pymilvus/v2.2.7/examples/hello_milvus.py 144 | ``` 145 | 146 | You can then [install Attu](https://milvus.io/docs/attu_install-docker.md) as a management tool for Milvus 147 | 148 | ```python 149 | from megabots import bot 150 | 151 | # Attach a vectorstore by passing the name of the database. Default port for milvus is 19530 and default host is localhost 152 | # Point it to your files directory so that it can index the files and add them to the vectorstore 153 | bot = bot("qna-over-docs", index="./examples/files/", vectorstore="milvus") 154 | 155 | bot.ask("what was the first roster of the avengers?") 156 | ``` 157 | 158 | Or use the `vectorstore` factory function for more customisation 159 | 160 | ```python 161 | 162 | from megabots import bot, vectorstore 163 | 164 | milvus = vectorstore("milvus", host="localhost", port=19530) 165 | 166 | bot = bot("qna-over-docs", index="./examples/files/", vectorstore=milvus) 167 | ``` 168 | 169 | ## Exposing an API with FastAPI 170 | 171 | You can also create a FastAPI app that will expose the bot as an API using the create_app function. 172 | Assuming you file is called `main.py` run `uvicorn main:app --reload` to run the API locally. 173 | You should then be able to visit `http://localhost:8000/docs` to see the API documentation. 174 | 175 | ```python 176 | from megabots import bot, create_api 177 | 178 | app = create_app(bot("qna-over-docs")) 179 | ``` 180 | 181 | ## Exposing a Gradio chat-like interface 182 | 183 | You can expose a gradio UI for the bot using `create_interface` function. 184 | Assuming your file is called `ui.py` run `gradio qnabot/ui.py` to run the UI locally. 185 | You should then be able to visit `http://127.0.0.1:7860` to see the API documentation. 186 | 187 | ```python 188 | from megabots import bot, create_interface 189 | 190 | demo = create_interface(bot("qna-over-docs")) 191 | ``` 192 | 193 | ## Customising bot 194 | 195 | The `bot` function should serve as the starting point for creating and customising your bot. Below is a list of the available arguments in `bot`. 196 | 197 | | Argument | Description | 198 | | ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | 199 | | task | The type of bot to create. Available options: `qna-over-docs`. More comming soon | 200 | | index | Specifies the index to use for the bot. It can either be a saved index file (e.g., `index.pkl`) or a directory of documents (e.g., `./index`). In the case of the directory the index will be automatically created. If no index is specified `bot` will look for `index.pkl` or `./index` | 201 | | model | The name of the model to use for the bot. You can specify a different model by providing its name, like "text-davinci-003". Supported models: `gpt-3.5-turbo` (default),`text-davinci-003` More comming soon. | 202 | | prompt | A string template for the prompt, which defines the format of the question and context passed to the model. The template should include placeholder variables like so: `context`, `{question}` and in the case of using memory `history`. | 203 | | memory | The type of memory to be used by the bot. Can be a string with the type of the memory or you can use `memory` factory function. Supported memories: `conversation-buffer`, `conversation-buffer-window` | 204 | | vectorstore | The vectorstore to be used for the index. Can be a string with the name of the databse or you can use `vectorstore` factory function. Supported DBs: `milvus`. | 205 | 206 | | sources | When `sources` is `True` the bot will also include sources in the response. A known [issue](https://github.com/hwchase17/langchain/issues/2858) exists, where if you pass a custom prompt with sources the code breaks. | 207 | 208 | ## How QnA bot works 209 | 210 | Large language models (LLMs) are powerful, but they can't answer questions about documents they haven't seen. If you want to use an LLM to answer questions about documents it was not trained on, you have to give it information about those documents. To solve this, we use "retrieval augmented generation." 211 | 212 | In simple terms, when you have a question, you first search for relevant documents. Then, you give the documents and the question to the language model to generate an answer. To make this work, you need your documents in a searchable format (an index). This process involves two main steps: (1) preparing your documents for easy querying, and (2) using the retrieval augmented generation method. 213 | 214 | `qna-over-docs` uses FAISS to create an index of documents and GPT to generate answers. 215 | 216 | ## How to contribute? 217 | 218 | We welcome any suggestions, problem reports, and contributions! 219 | For any changes you would like to make to this project, we invite you to submit an [issue](https://github.com/momegas/megabots/issues). 220 | 221 | For more information, see [`CONTRIBUTING`](https://github.com/momegas/megabots/blob/main/CONTRIBUTING.md) instructions. 222 | -------------------------------------------------------------------------------- /megabots/__init__.py: -------------------------------------------------------------------------------- 1 | from megabots.vectorstore import VectorStore, vectorstore 2 | from megabots.memory import Memory, memory 3 | from megabots.bot import Bot, bot 4 | from megabots.prompt import prompt 5 | from megabots.utils import create_api, create_interface 6 | 7 | 8 | from dotenv import load_dotenv 9 | 10 | load_dotenv() 11 | -------------------------------------------------------------------------------- /megabots/api.py: -------------------------------------------------------------------------------- 1 | import os 2 | from megabots import bot 3 | from megabots.utils import create_api 4 | 5 | # from lcserve import serving 6 | 7 | cur_dir = os.path.dirname(os.path.abspath(__file__)) 8 | index_dir = os.path.join(cur_dir, "..", "examples", "files") 9 | 10 | 11 | mybot = bot("qna-over-docs", index="./index.pkl") 12 | 13 | 14 | # @serving 15 | # def ask(question: str) -> str: 16 | # return mybot.ask(question) 17 | 18 | 19 | app = create_api(mybot) 20 | -------------------------------------------------------------------------------- /megabots/bot.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | from langchain.llms import OpenAI 3 | from langchain.chat_models import ChatOpenAI 4 | from langchain.embeddings import OpenAIEmbeddings 5 | from langchain.chains.qa_with_sources import load_qa_with_sources_chain 6 | from langchain.vectorstores.faiss import FAISS 7 | import pickle 8 | import os 9 | from langchain.prompts import PromptTemplate 10 | from langchain.chains.question_answering import load_qa_chain 11 | from langchain.chains.conversational_retrieval.prompts import QA_PROMPT 12 | from langchain.document_loaders import DirectoryLoader 13 | from megabots.prompt import QA_MEMORY_PROMPT 14 | from megabots.vectorstore import VectorStore 15 | from megabots.memory import Memory 16 | import megabots 17 | 18 | 19 | class Bot: 20 | def __init__( 21 | self, 22 | model: str | None = None, 23 | prompt: PromptTemplate | None = None, 24 | index: str | None = None, 25 | sources: bool | None = False, 26 | vectorstore: VectorStore | None = None, 27 | memory: Memory | None = None, 28 | verbose: bool = False, 29 | temperature: int = 0, 30 | ): 31 | self.vectorstore = vectorstore 32 | self.memory = memory 33 | self.prompt = prompt or QA_MEMORY_PROMPT if self.memory else QA_PROMPT 34 | self.select_model(model, temperature) 35 | self.create_loader(index) 36 | self.load_or_create_index(index, vectorstore) 37 | 38 | # Load the question-answering chain for the selected model 39 | self.chain = self.create_chain(sources=sources, verbose=verbose) 40 | 41 | def create_chain( 42 | self, 43 | sources: bool | None = False, 44 | verbose: bool = False, 45 | ): 46 | # TODO: Changing the prompt here is not working. Leave it as is for now. 47 | # Reference: https://github.com/hwchase17/langchain/issues/2858 48 | if sources: 49 | return load_qa_with_sources_chain( 50 | self.llm, 51 | chain_type="stuff", 52 | memory=self.memory.memory if self.memory else None, 53 | verbose=verbose, 54 | ) 55 | return load_qa_chain( 56 | self.llm, 57 | chain_type="stuff", 58 | verbose=verbose, 59 | prompt=self.prompt, 60 | memory=self.memory.memory if self.memory else None, 61 | ) 62 | 63 | def select_model(self, model: str | None, temperature: float): 64 | # Select and set the appropriate model based on the provided input 65 | if model is None or model == "gpt-3.5-turbo": 66 | print("Using model: gpt-3.5-turbo") 67 | self.llm = ChatOpenAI(temperature=temperature) 68 | 69 | if model == "text-davinci-003": 70 | print("Using model: text-davinci-003") 71 | self.llm = OpenAI(temperature=temperature) 72 | 73 | def create_loader(self, index: str | None): 74 | # Create a loader based on the provided directory (either local or S3) 75 | if index is None: 76 | raise RuntimeError( 77 | """ 78 | Impossible to find a valid index. 79 | Either provide a valid path to a pickle file or a directory. 80 | """ 81 | ) 82 | self.loader = DirectoryLoader(index, recursive=True) 83 | 84 | def load_or_create_index(self, index: str, vectorstore: VectorStore | None = None): 85 | # Load an existing index from disk or create a new one if not available 86 | if vectorstore is not None: 87 | self.search_index = vectorstore.client.from_documents( 88 | self.loader.load_and_split(), 89 | OpenAIEmbeddings(), 90 | connection_args={"host": vectorstore.host, "port": vectorstore.port}, 91 | ) 92 | return 93 | 94 | # Is pickle 95 | if index is not None and "pkl" in index or "pickle" in index: 96 | print("Loading path from pickle file: ", index, "...") 97 | with open(index, "rb") as f: 98 | self.search_index = pickle.load(f) 99 | return 100 | 101 | # Is directory 102 | if index is not None and os.path.isdir(index): 103 | print("Creating index...") 104 | self.search_index = FAISS.from_documents( 105 | self.loader.load_and_split(), OpenAIEmbeddings() 106 | ) 107 | return 108 | 109 | raise RuntimeError( 110 | """ 111 | Impossible to find a valid index. 112 | Either provide a valid path to a pickle file or a directory. 113 | """ 114 | ) 115 | 116 | def save_index(self, index_path: str): 117 | # Save the index to the specified path 118 | with open(index_path, "wb") as f: 119 | pickle.dump(self.search_index, f) 120 | 121 | def ask(self, question: str, k=1) -> str: 122 | # Retrieve the answer to the given question and return it 123 | input_documents = self.search_index.similarity_search(question, k=k) 124 | answer = self.chain.run(input_documents=input_documents, question=question) 125 | return answer 126 | 127 | 128 | SUPPORTED_TASKS = { 129 | "qna-over-docs": { 130 | "impl": Bot, 131 | "default": { 132 | "model": "gpt-3.5-turbo", 133 | "temperature": 0, 134 | "index": "./index", 135 | "input_variables": ["context", "question"], 136 | }, 137 | } 138 | } 139 | 140 | SUPPORTED_MODELS = {} 141 | 142 | 143 | def bot( 144 | task: str | None = None, 145 | *, 146 | model: str | None = None, 147 | index: str | None = None, 148 | prompt: str | None = None, 149 | memory: str | Memory | None = None, 150 | vectorstore: str | VectorStore | None = None, 151 | verbose: bool = False, 152 | temperature: int = 0, 153 | ) -> Bot: 154 | """Instanciate a bot based on the provided task. Each supported tasks has it's own default sane defaults. 155 | 156 | Args: 157 | task (str | None, optional): The given task. Can be one of the SUPPORTED_TASKS. 158 | 159 | model (str | None, optional): Model to be used. Can be one of the SUPPORTED_MODELS. 160 | 161 | index (str | None, optional): Data that the model will load and store index info. 162 | Can be either a local file path, a pickle file, or a url of a vector database. 163 | By default it will look for a local directory called "files" in the current working directory. 164 | 165 | prompt (str | None, optional): The prompt that the bot will take in. Mark variables like this: {variable}. 166 | Variables are context, question, and history if the bot has memory. 167 | 168 | vectorstore: (str | VectorStore | None, optional): The vectorstore that the bot will save the index to. 169 | If only a string is passed, the defaults values willl be used. 170 | 171 | verbose (bool, optional): Verbocity. Defaults to False. 172 | 173 | temperature (int, optional): Temperature. Defaults to 0. 174 | 175 | Raises: 176 | RuntimeError: _description_ 177 | ValueError: _description_ 178 | 179 | Returns: 180 | Bot: Bot instance 181 | """ 182 | 183 | if task is None: 184 | raise RuntimeError("Impossible to instantiate a bot without a task.") 185 | if task not in SUPPORTED_TASKS: 186 | raise ValueError(f"Task {task} is not supported.") 187 | 188 | task_defaults = SUPPORTED_TASKS[task]["default"] 189 | 190 | if memory is not None: 191 | task_defaults["input_variables"].append("history") 192 | 193 | return SUPPORTED_TASKS[task]["impl"]( 194 | model=model or task_defaults["model"], 195 | index=index or task_defaults["index"], 196 | prompt=None 197 | if prompt is None 198 | else PromptTemplate( 199 | template=prompt, input_variables=task_defaults["input_variables"] 200 | ), 201 | temperature=temperature, 202 | verbose=verbose, 203 | vectorstore=megabots.vectorstore(vectorstore) 204 | if isinstance(vectorstore, str) 205 | else vectorstore, 206 | memory=megabots.memory(memory) if isinstance(memory, str) else memory, 207 | ) 208 | -------------------------------------------------------------------------------- /megabots/memory.py: -------------------------------------------------------------------------------- 1 | from langchain.memory import ConversationBufferMemory, ConversationBufferWindowMemory 2 | 3 | 4 | class ConversationBuffer: 5 | def __init__(self): 6 | self.memory = ConversationBufferMemory(input_key="question") 7 | 8 | 9 | class ConversationBufferWindow: 10 | def __init__(self, k: int): 11 | self.k: int = k 12 | self.memory = ConversationBufferWindowMemory(k=self.k, input_key="question") 13 | 14 | 15 | SUPPORTED_MEMORY = { 16 | "conversation-buffer": { 17 | "impl": ConversationBuffer, 18 | "default": {}, 19 | }, 20 | "conversation-buffer-window": { 21 | "impl": ConversationBufferWindow, 22 | "default": {"k": 3}, 23 | }, 24 | } 25 | 26 | 27 | Memory = type("Memory", (ConversationBuffer, ConversationBufferWindow), {}) 28 | 29 | 30 | def memory( 31 | name: str = "conversation-buffer-window", 32 | k: int | None = None, 33 | ) -> Memory: 34 | if name is None: 35 | raise RuntimeError("Impossible to instantiate memory without a name.") 36 | 37 | if name not in SUPPORTED_MEMORY: 38 | raise ValueError(f"Memory {name} is not supported.") 39 | 40 | cl = SUPPORTED_MEMORY[name]["impl"] 41 | 42 | if name == "conversation-buffer-window": 43 | return cl(k=k or SUPPORTED_MEMORY[name]["default"]["k"]) 44 | 45 | return SUPPORTED_MEMORY[name]["impl"]() 46 | -------------------------------------------------------------------------------- /megabots/prompt.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | from langchain import PromptTemplate 3 | 4 | QNA_TEMPLATE = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. 5 | 6 | {context} 7 | 8 | {history} 9 | Human: {question} 10 | AI:""" 11 | 12 | QA_MEMORY_PROMPT = PromptTemplate( 13 | template=QNA_TEMPLATE, input_variables=["context", "history", "question"] 14 | ) 15 | 16 | 17 | def prompt(template: str, variables: List[str]): 18 | return PromptTemplate(template=template, input_variables=variables) 19 | -------------------------------------------------------------------------------- /megabots/utils.py: -------------------------------------------------------------------------------- 1 | import gradio as gr 2 | from fastapi import FastAPI 3 | from megabots.bot import Bot 4 | from fastapi.openapi.utils import get_openapi 5 | from pydantic import BaseModel 6 | 7 | 8 | def _custom_openapi(app: FastAPI, version: str): 9 | if app.openapi_schema: 10 | return app.openapi_schema 11 | 12 | openapi_schema = get_openapi( 13 | title="🤖 Megabots API", 14 | version=version, 15 | description="Use this API to interact with the bot.", 16 | routes=app.routes, 17 | ) 18 | return openapi_schema 19 | 20 | 21 | class Answer(BaseModel): 22 | text: str 23 | 24 | 25 | def create_api(bot: Bot, version: str = "0.0.1"): 26 | app = FastAPI() 27 | 28 | @app.get( 29 | "/v1/bot/ask/{question}", 30 | tags=["Bot"], 31 | summary="Ask bot", 32 | description="Send question to the bot.", 33 | responses={200: {"description": "Bot answer"}}, 34 | response_model=Answer, 35 | ) 36 | async def ask(question: str) -> Answer: 37 | answer = bot.ask(question) 38 | return Answer(text=answer) 39 | 40 | app.openapi_schema = _custom_openapi(app, version) 41 | 42 | return app 43 | 44 | 45 | def create_interface(bot_instance: Bot, markdown: str = ""): 46 | with gr.Blocks() as interface: 47 | gr.Markdown(markdown) 48 | chatbot = gr.Chatbot([], elem_id="chatbot").style(height=450) 49 | msg = gr.Textbox( 50 | show_label=False, 51 | placeholder="Enter text and press enter", 52 | ).style(container=False) 53 | 54 | def user(user_message, history): 55 | return "", history + [[user_message, None]] 56 | 57 | def bot(history): 58 | print("im here") 59 | response = bot_instance.ask(history[-1][0]) 60 | history[-1][1] = response 61 | return history 62 | 63 | msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then( 64 | bot, chatbot, chatbot 65 | ) 66 | 67 | return interface 68 | -------------------------------------------------------------------------------- /megabots/vectorstore.py: -------------------------------------------------------------------------------- 1 | from typing import Type, TypeVar 2 | from langchain.vectorstores import Milvus 3 | from abc import ABC 4 | 5 | 6 | class MilvusVectorStore: 7 | def __init__(self, host: str, port: int): 8 | self.host = host 9 | self.port = port 10 | self.client = Milvus 11 | 12 | 13 | class ChromaVectorStore: 14 | pass 15 | 16 | 17 | # Generic type variable for all vectorstores 18 | VectorStore = type("VectorStore", (MilvusVectorStore, ChromaVectorStore), {}) 19 | 20 | 21 | SUPPORTED_VECTORSTORES = { 22 | "milvus": { 23 | "impl": MilvusVectorStore, 24 | "default": {"host": "localhost", "port": 19530}, 25 | } 26 | } 27 | 28 | 29 | def vectorstore( 30 | name: str, host: str | None = None, port: int | None = None 31 | ) -> VectorStore: 32 | """Return a vectorstore object.""" 33 | 34 | if name is None: 35 | raise RuntimeError("Impossible to instantiate a vectorstore without a name.") 36 | 37 | if name not in SUPPORTED_VECTORSTORES: 38 | raise ValueError(f"Vectorstore {name} is not supported.") 39 | 40 | return SUPPORTED_VECTORSTORES[name]["impl"]( 41 | host=host or SUPPORTED_VECTORSTORES[name]["default"]["host"], 42 | port=port or SUPPORTED_VECTORSTORES[name]["default"]["port"], 43 | ) 44 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | faiss-cpu>=1.7 2 | fastapi>=0.95 3 | gradio==3.26.0 4 | gradio_client>=0.1 5 | httpx==0.23 6 | langchain==0.0.137 7 | openai>=0.27 8 | pytest>=7.3 9 | rich 10 | unstructured>=0.5 11 | python-dotenv==1.0.0 12 | pdfminer.six 13 | tiktoken==0.3.3 -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | VERSION = "0.0.11" 4 | 5 | setup( 6 | name="megabots", 7 | version=VERSION, 8 | packages=find_packages(), 9 | install_requires=[ 10 | "langchain", 11 | "tiktoken", 12 | "unstructured", 13 | "fastapi", 14 | "faiss-cpu", 15 | "pdfminer.six", 16 | "gradio", 17 | "python-dotenv", 18 | "openai", 19 | "langchain-serve", 20 | ], 21 | author="Megaklis Vasilakis", 22 | author_email="megaklis.vasilakis@gmail.com", 23 | description="🤖 Megabots provides State-of-the-art, production ready bots made mega-easy, so you don't have to build them from scratch 🤯 Create a bot, now 🫵", 24 | long_description=open("README.md").read(), 25 | long_description_content_type="text/markdown", 26 | url="https://github.com/momegas/megabots", 27 | keywords=[ 28 | "bot", 29 | "qna-bot", 30 | "information-retrieval", 31 | "chatbot", 32 | "question-answering", 33 | "prompt-engineering", 34 | ], 35 | license="MIT", 36 | classifiers=[ 37 | # Choose appropriate classifiers from 38 | # https://pypi.org/classifiers/ 39 | "Development Status :: 4 - Beta" 40 | ], 41 | ) 42 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/momegas/megabots/55914acc023069a5a9e921e96aeb99d1c9a97f18/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_api.py: -------------------------------------------------------------------------------- 1 | # def _ignore_warnings(): 2 | # import logging 3 | # import warnings 4 | 5 | # logging.captureWarnings(True) 6 | # warnings.filterwarnings( 7 | # "ignore", 8 | # category=DeprecationWarning, 9 | # message="Deprecated call to `pkg_resources.declare_namespace('google')`.", 10 | # ) 11 | 12 | 13 | # _ignore_warnings() 14 | 15 | # import os 16 | # import signal 17 | # import subprocess 18 | 19 | # import requests 20 | # from requests.adapters import HTTPAdapter, Retry 21 | 22 | # megabot_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 23 | 24 | 25 | # def _session_with_retry() -> requests.Session: 26 | # s = requests.Session() 27 | # retries = Retry( 28 | # total=50, backoff_factor=1, status_forcelist=[404, 500, 502, 503, 504] 29 | # ) 30 | # s.mount("http://", HTTPAdapter(max_retries=retries)) 31 | # return s 32 | 33 | 34 | # class LCServeLocally: 35 | # def __init__(self, port: int = 8000): 36 | # self.port = port 37 | # self.command = " ".join( 38 | # [ 39 | # "lc-serve", 40 | # "deploy", 41 | # "local", 42 | # "megabots.api", 43 | # "--port", 44 | # str(self.port), 45 | # ] 46 | # ) 47 | 48 | # def __enter__(self): 49 | # self.p = subprocess.Popen( 50 | # self.command, cwd=megabot_dir, shell=True, preexec_fn=os.setsid 51 | # ) 52 | 53 | # def __exit__(self, exc_type, exc_val, exc_tb): 54 | # self.p.terminate() 55 | # os.killpg(os.getpgid(self.p.pid), signal.SIGTERM) 56 | 57 | 58 | # def test_lcserve_successful(): 59 | # port = 8000 60 | # lcserve_host = f"http://localhost:{port}" 61 | 62 | # with LCServeLocally(port=port): 63 | # resp = _session_with_retry().post( 64 | # url=f"{lcserve_host}/ask", 65 | # json={"question": "What is your name?"}, 66 | # ) 67 | # assert resp.status_code == 200 68 | # assert "result" in resp.json() 69 | # assert isinstance(resp.json()["result"], str) 70 | 71 | 72 | # def test_lcserve_invalid_request(): 73 | # port = 8000 74 | # lcserve_host = f"http://localhost:{port}" 75 | 76 | # with LCServeLocally(port=port): 77 | # resp = _session_with_retry().post( 78 | # url=f"{lcserve_host}/ask", 79 | # json={"foo": "bar"}, 80 | # ) 81 | # assert resp.status_code == 422 82 | # assert "detail" in resp.json() 83 | # assert resp.json()["detail"] == [ 84 | # { 85 | # "loc": ["body", "question"], 86 | # "msg": "field required", 87 | # "type": "value_error.missing", 88 | # } 89 | # ] 90 | -------------------------------------------------------------------------------- /tests/test_bots.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tempfile 3 | from megabots import bot 4 | import pickle 5 | from langchain.vectorstores.faiss import FAISS 6 | 7 | 8 | # Define test data 9 | test_directory = "./examples/files" 10 | test_question = "what is megabots?" 11 | correct_answer = "state-of-the-art, production" 12 | sources = "SOURCES:" 13 | 14 | 15 | def test_ask(): 16 | qnabot = bot("qna-over-docs", index=test_directory) 17 | answer = qnabot.ask(test_question) 18 | 19 | print(answer) 20 | 21 | # Assert that the answer contains the correct answer 22 | assert correct_answer in answer 23 | # Assert that the answer contains the sources 24 | assert sources not in answer 25 | 26 | 27 | def test_save_load_index(): 28 | # Create a temporary directory and file path for the test index 29 | with tempfile.TemporaryDirectory() as temp_dir: 30 | index_path = os.path.join(temp_dir, "test_index.pkl") 31 | 32 | # Create a bot and save the index to the temporary file path 33 | qnabot = bot("qna-over-docs", index=test_directory) 34 | qnabot.save_index(index_path) 35 | 36 | # Load the saved index and assert that it is the same as the original index 37 | with open(index_path, "rb") as f: 38 | saved_index = pickle.load(f) 39 | assert isinstance(saved_index, FAISS) 40 | 41 | bot_with_predefined_index = bot("qna-over-docs", index=index_path) 42 | 43 | # Assert that the bot returns the correct answer to the test question 44 | assert correct_answer in bot_with_predefined_index.ask(test_question) 45 | -------------------------------------------------------------------------------- /tests/test_memory.py: -------------------------------------------------------------------------------- 1 | from pytest import raises 2 | from megabots import memory 3 | from megabots.memory import ConversationBuffer, ConversationBufferWindow 4 | 5 | 6 | def test_memory_conversation_buffer(): 7 | mem = memory(name="conversation-buffer") 8 | assert isinstance(mem, ConversationBuffer) 9 | 10 | 11 | def test_memory_conversation_buffer_window(): 12 | mem = memory(name="conversation-buffer-window", k=10) 13 | assert isinstance(mem, ConversationBufferWindow) 14 | 15 | 16 | def test_memory_unsupported_name(): 17 | with raises(ValueError, match=r"Memory invalid-name is not supported."): 18 | memory(name="invalid-name") 19 | 20 | 21 | def test_memory_no_name(): 22 | with raises( 23 | RuntimeError, match=r"Impossible to instantiate memory without a name." 24 | ): 25 | memory(name=None) 26 | -------------------------------------------------------------------------------- /tests/test_ui.py: -------------------------------------------------------------------------------- 1 | import gradio as gr 2 | from megabots import create_interface 3 | 4 | 5 | def test_create_interface(): 6 | # create a mock Bot object 7 | class MockBot: 8 | def ask(self, question: str): 9 | return "Answer" 10 | 11 | markdown = "test" 12 | 13 | # call the function with the mock bot and example 14 | interface = create_interface(MockBot(), markdown=markdown) 15 | 16 | # check if the interface has the correct properties 17 | assert isinstance(interface, gr.Blocks) 18 | --------------------------------------------------------------------------------