├── .gitattributes
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.yml
    │   ├── config.yml
    │   ├── feature_request.yml
    │   └── improve_existing_docs.yml
    └── workflows
    │   ├── python-package.yml
    │   └── release.yml
├── .gitignore
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENCE
├── Makefile
├── README.md
├── SECURITY.md
├── app.py
├── example.ipynb
├── examples
    └── files
    │   └── facts.txt
├── megabots
    ├── __init__.py
    ├── api.py
    ├── bot.py
    ├── memory.py
    ├── prompt.py
    ├── utils.py
    └── vectorstore.py
├── requirements.txt
├── setup.py
└── tests
    ├── __init__.py
    ├── test_api.py
    ├── test_bots.py
    ├── test_memory.py
    └── test_ui.py


/.gitattributes:
--------------------------------------------------------------------------------
1 | # Ignore Jupyter notebooks from Git stats
2 | *.ipynb linguist-documentation


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.yml:
--------------------------------------------------------------------------------
 1 | name: Bug Report
 2 | description: File a bug report to help us improve
 3 | labels: ['bug', 'triage']
 4 | assignees: 
 5 |   - momegas
 6 | body:
 7 |   - type: markdown
 8 |     attributes:
 9 |       value: '## :beetle: Bug Report :beetle:'
10 |   - type: input
11 |     id: title
12 |     attributes:
13 |       label: 'Title'
14 |       description: 'Provide a brief, clear title for the bug report'
15 |       placeholder: 'Example: Unexpected behavior when clicking "Submit"'
16 |     validations:
17 |       required: true
18 |   - type: textarea
19 |     id: description
20 |     attributes:
21 |       label: 'Description'
22 |       description: 'Please provide a clear and concise description of the bug'
23 |       placeholder: 'When I click "Submit", the page refreshes instead of showing a confirmation message'
24 |     validations:
25 |       required: true
26 |   - type: textarea
27 |     id: steps
28 |     attributes:
29 |       label: 'Steps to Reproduce'
30 |       description: 'Provide the steps to reproduce the bug'
31 |       placeholder: |
32 |         1. Go to '...'
33 |         2. Click on '....'
34 |         3. Scroll down to '....'
35 |         4. See the error
36 |     validations:
37 |       required: true
38 |   - type: textarea
39 |     id: expected
40 |     attributes:
41 |       label: 'Expected Behavior'
42 |       description: 'Please describe the expected behavior'
43 |       placeholder: 'After clicking "Submit", a confirmation message should appear'
44 |     validations:
45 |       required: true
46 |   - type: textarea
47 |     id: actual
48 |     attributes:
49 |       label: 'Actual Behavior'
50 |       description: 'Please describe the actual behavior that you experienced'
51 |       placeholder: 'The page refreshes without showing a confirmation message'
52 |     validations:
53 |       required: true
54 |   - type: textarea
55 |     id: additional-context
56 |     attributes:
57 |       label: 'Additional Context'
58 |       description: 'Provide any additional context or information that may help diagnose the issue (e.g., browser version, operating system, etc.)'
59 |       placeholder: 'The issue occurs in Chrome 96.0.4664.93 on macOS 11.6.1'
60 |     validations:
61 |       required: false
62 |   - type: checkboxes
63 |     id: python-version
64 |     attributes:
65 |       label: 'Python Version'
66 |       description: 'Select the Python version(s) affected by this bug'
67 |       options:
68 |         - label: 'Python <= 3.9'
69 |         - label: 'Python 3.10'
70 |         - label: 'Python 3.11'
71 |     validations:
72 |       required: true
73 |   - type: checkboxes
74 |     id: acknowledgements
75 |     attributes:
76 |       label: 'Acknowledgements'
77 |       description: 'Please confirm the following:'
78 |       options:
79 |         - label: 'I have searched the existing issues to make sure this bug has not been reported yet'
80 |           required: true
81 |         - label: 'I am using the latest version of the software'
82 |           required: true
83 |         - label: 'I have provided enough information for the maintainers to reproduce and diagnose the issue'
84 |           required: true
85 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: false
2 | contact_links:
3 |   - name: Ask a question
4 |     url: https://github.com/momegas/megabots/discussions/categories/q-a
5 |     about: Ask questions and discuss with other community members
6 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.yml:
--------------------------------------------------------------------------------
 1 | name: Feature Request
 2 | description: Suggest a new feature or enhancement for this project
 3 | labels: ['enhancement']
 4 | assignees: [momegas]
 5 | 
 6 | body:
 7 |   - type: markdown
 8 |     attributes:
 9 |       value: '## :sparkles: Feature Request :sparkles:'
10 |   - type: input
11 |     id: title
12 |     attributes:
13 |       label: Title
14 |       description: 'Provide a brief, clear title for the feature request'
15 |       placeholder: 'Example: Add a "Dark Mode" theme option'
16 |     validations:
17 |       required: true
18 |   - type: textarea
19 |     id: problem
20 |     attributes:
21 |       label: Problem Description
22 |       description: 'Describe the problem you are trying to solve or the limitation you are facing'
23 |       placeholder: 'I often work at night and find the bright interface hard on my eyes / if there is an open issue please link it with #number'
24 |     validations:
25 |       required: true
26 |   - type: textarea
27 |     id: solution
28 |     attributes:
29 |       label: Proposed Solution
30 |       description: 'Describe the solution you would like to see implemented'
31 |       placeholder: 'Add a "Dark Mode" theme option in the settings that applies a dark color scheme to the interface'
32 |     validations:
33 |       required: true
34 |   - type: textarea
35 |     id: alternatives
36 |     attributes:
37 |       label: Alternatives Considered
38 |       description: 'List any alternative solutions or features you have considered'
39 |       placeholder: 'I have tried using browser extensions to apply a dark theme, but they do not work well with this application'
40 |     validations:
41 |       required: false
42 |   - type: textarea
43 |     id: additional-context
44 |     attributes:
45 |       label: Additional Context
46 |       description: 'Provide any additional context or information about the feature request (e.g., screenshots, mockups, etc.)'
47 |       placeholder: 'Here is a link to a sample "Dark Mode" color scheme I found online: https://example.com/dark-theme'
48 |     validations:
49 |       required: false
50 |   - type: checkboxes
51 |     id: acknowledgements
52 |     attributes:
53 |       label: Acknowledgements
54 |       description: 'Please confirm the following:'
55 |       options:
56 |         - label: 'I have searched the existing issues to make sure this feature has not been requested yet'
57 |           required: true
58 |         - label: 'I have provided enough information for the maintainers to understand and evaluate the feature request'
59 |           required: true
60 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/improve_existing_docs.yml:
--------------------------------------------------------------------------------
 1 | name: Improve existing content
 2 | description: Make a suggestion to improve the content in an existing article.
 3 | labels:
 4 |   - content
 5 | body:
 6 |   - type: markdown
 7 |     attributes:
 8 |       value: |
 9 |         * For questions, ask in [Discussions](https://github.com/momegas/megabots/discussions/categories/q-a).
10 |         * Before you file an issue read the [Contributing guide](https://github.com/momegas/megabots/blob/docs/CONTRIBUTING.md).
11 |         * Check to make sure someone hasn't already opened a similar [issue](https://github.com/momegas/megabots/issues).
12 | 
13 |   - type: checkboxes
14 |     id: terms
15 |     attributes:
16 |       label: Code of Conduct
17 |       description: This project has a Code of Conduct that all participants are expected to understand and follow.
18 |       options:
19 |         - label: I have read and agree to the projects [Code of Conduct](https://github.com/momegas/megabots/blob/main/CODE_OF_CONDUCT.md)
20 |           required: true
21 | 
22 |   - type: textarea
23 |     attributes:
24 |       label: What article is affected?
25 |       description: Please link to the article you'd like to see updated.
26 |     validations:
27 |       required: true
28 | 
29 |   - type: textarea
30 |     attributes:
31 |       label: What part(s) of the article would you like to see updated?
32 |       description: |
33 |         - Give as much detail as you can to help us understand the change you want to see. 
34 |         - Why should the docs be changed? What use cases does it support? 
35 |         - What is the expected outcome?
36 |     validations:
37 |       required: true
38 | 
39 |   - type: textarea
40 |     attributes:
41 |       label: Additional information
42 |       description: Add any other context or screenshots about the feature request for the documentation here.
43 |     validations:
44 |       required: false
45 | 


--------------------------------------------------------------------------------
/.github/workflows/python-package.yml:
--------------------------------------------------------------------------------
 1 | name: Python package
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: ["main"]
 6 |   pull_request:
 7 |     branches: ["main"]
 8 | 
 9 | jobs:
10 |   build_and_test:
11 |     runs-on: ubuntu-latest
12 |     environment: Development
13 |     strategy:
14 |       fail-fast: false
15 |       matrix:
16 |         python-version: ["3.10", "3.11"]
17 | 
18 |     steps:
19 |       - uses: actions/checkout@v3
20 |       - name: Set up Python ${{ matrix.python-version }}
21 |         uses: actions/setup-python@v3
22 |         with:
23 |           python-version: ${{ matrix.python-version }}
24 |       - name: Install dependencies
25 |         run: |
26 |           python -m pip install --upgrade pip
27 |           if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
28 |       - name: Test with pytest
29 |         run: |
30 |           export OPENAI_API_KEY=${{secrets.OPENAI_API_KEY}}
31 |           pytest ./tests
32 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | name: Release PyPI Package
 2 | 
 3 | on:
 4 |   release:
 5 |     types: [published]
 6 | 
 7 | jobs:
 8 |   release_package:
 9 |     permissions:
10 |       contents: write
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |       - name: Checkout
14 |         uses: actions/checkout@v2
15 |         with:
16 |           fetch-depth: 0
17 | 
18 |       - name: Set up Python 3.11.3
19 |         uses: actions/setup-python@v2
20 |         with:
21 |           python-version: 3.11.3
22 | 
23 |       - name: Build a binary wheel and a source tarball.
24 |         run: pip install wheel && python setup.py sdist bdist_wheel
25 | 
26 |       - name: Publish distribution 📦 to PyPI
27 |         if: startsWith(github.ref, 'refs/tags')
28 |         uses: pypa/gh-action-pypi-publish@release/v1
29 |         with:
30 |           password: ${{ secrets.PYPI_API_TOKEN }}
31 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # General
 2 | __pycache__
 3 | .pytest_cache
 4 | **.egg-info
 5 | dist
 6 | build
 7 | **.pickle
 8 | **.pkl
 9 | volumes
10 | docker-compose.yml
11 | 
12 | # Environments
13 | .venv
14 | .env
15 | 
16 | # Distribution / packaging
17 | .Python
18 | build/
19 | develop-eggs/
20 | dist/
21 | downloads/
22 | eggs/
23 | .eggs/
24 | lib/
25 | lib64/
26 | parts/
27 | sdist/
28 | var/
29 | wheels/
30 | pip-wheel-metadata/
31 | share/python-wheels/
32 | *.egg-info/
33 | .installed.cfg
34 | *.egg
35 | MANIFEST
36 | 
37 | # Installer logs
38 | pip-log.txt
39 | pip-delete-this-directory.txt
40 | 
41 | # Jupyter Notebook checkpoints
42 | .ipynb_checkpoints


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
  1 | # Contributor Covenant Code of Conduct
  2 | 
  3 | ## Our Pledge
  4 | 
  5 | We as members, contributors, and leaders pledge to make participation in our
  6 | community a harassment-free experience for everyone, regardless of age, body
  7 | size, visible or invisible disability, ethnicity, sex characteristics, gender
  8 | identity and expression, level of experience, education, socio-economic status,
  9 | nationality, personal appearance, race, religion, or sexual identity
 10 | and orientation.
 11 | 
 12 | We pledge to act and interact in ways that contribute to an open, welcoming,
 13 | diverse, inclusive, and healthy community.
 14 | 
 15 | ## Our Standards
 16 | 
 17 | Examples of behavior that contributes to a positive environment for our
 18 | community include:
 19 | 
 20 | * Demonstrating empathy and kindness toward other people
 21 | * Being respectful of differing opinions, viewpoints, and experiences
 22 | * Giving and gracefully accepting constructive feedback
 23 | * Accepting responsibility and apologizing to those affected by our mistakes,
 24 |   and learning from the experience
 25 | * Focusing on what is best not just for us as individuals, but for the
 26 |   overall community
 27 | 
 28 | Examples of unacceptable behavior include:
 29 | 
 30 | * The use of sexualized language or imagery, and sexual attention or
 31 |   advances of any kind
 32 | * Trolling, insulting or derogatory comments, and personal or political attacks
 33 | * Public or private harassment
 34 | * Publishing others' private information, such as a physical or email
 35 |   address, without their explicit permission
 36 | * Other conduct which could reasonably be considered inappropriate in a
 37 |   professional setting
 38 | 
 39 | ## Enforcement Responsibilities
 40 | 
 41 | Community leaders are responsible for clarifying and enforcing our standards of
 42 | acceptable behavior and will take appropriate and fair corrective action in
 43 | response to any behavior that they deem inappropriate, threatening, offensive,
 44 | or harmful.
 45 | 
 46 | Community leaders have the right and responsibility to remove, edit, or reject
 47 | comments, commits, code, wiki edits, issues, and other contributions that are
 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation
 49 | decisions when appropriate.
 50 | 
 51 | ## Scope
 52 | 
 53 | This Code of Conduct applies within all community spaces, and also applies when
 54 | an individual is officially representing the community in public spaces.
 55 | Examples of representing our community include using an official e-mail address,
 56 | posting via an official social media account, or acting as an appointed
 57 | representative at an online or offline event.
 58 | 
 59 | ## Enforcement
 60 | 
 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
 62 | reported to the community leaders responsible for enforcement at
 63 | megaklis.vasilakis@gmail.com.
 64 | All complaints will be reviewed and investigated promptly and fairly.
 65 | 
 66 | All community leaders are obligated to respect the privacy and security of the
 67 | reporter of any incident.
 68 | 
 69 | ## Enforcement Guidelines
 70 | 
 71 | Community leaders will follow these Community Impact Guidelines in determining
 72 | the consequences for any action they deem in violation of this Code of Conduct:
 73 | 
 74 | ### 1. Correction
 75 | 
 76 | **Community Impact**: Use of inappropriate language or other behavior deemed
 77 | unprofessional or unwelcome in the community.
 78 | 
 79 | **Consequence**: A private, written warning from community leaders, providing
 80 | clarity around the nature of the violation and an explanation of why the
 81 | behavior was inappropriate. A public apology may be requested.
 82 | 
 83 | ### 2. Warning
 84 | 
 85 | **Community Impact**: A violation through a single incident or series
 86 | of actions.
 87 | 
 88 | **Consequence**: A warning with consequences for continued behavior. No
 89 | interaction with the people involved, including unsolicited interaction with
 90 | those enforcing the Code of Conduct, for a specified period of time. This
 91 | includes avoiding interactions in community spaces as well as external channels
 92 | like social media. Violating these terms may lead to a temporary or
 93 | permanent ban.
 94 | 
 95 | ### 3. Temporary Ban
 96 | 
 97 | **Community Impact**: A serious violation of community standards, including
 98 | sustained inappropriate behavior.
 99 | 
100 | **Consequence**: A temporary ban from any sort of interaction or public
101 | communication with the community for a specified period of time. No public or
102 | private interaction with the people involved, including unsolicited interaction
103 | with those enforcing the Code of Conduct, is allowed during this period.
104 | Violating these terms may lead to a permanent ban.
105 | 
106 | ### 4. Permanent Ban
107 | 
108 | **Community Impact**: Demonstrating a pattern of violation of community
109 | standards, including sustained inappropriate behavior,  harassment of an
110 | individual, or aggression toward or disparagement of classes of individuals.
111 | 
112 | **Consequence**: A permanent ban from any sort of public interaction within
113 | the community.
114 | 
115 | ## Attribution
116 | 
117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
118 | version 2.0, available at
119 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
120 | 
121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct
122 | enforcement ladder](https://github.com/mozilla/diversity).
123 | 
124 | [homepage]: https://www.contributor-covenant.org
125 | 
126 | For answers to common questions about this code of conduct, see the FAQ at
127 | https://www.contributor-covenant.org/faq. Translations are available at
128 | https://www.contributor-covenant.org/translations.
129 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing
 2 | 
 3 | First of all, thank you for your interest in contributing! We appreciate your time and effort, and we value your contributions to make this project better. This document will provide you with the information you need to start contributing.
 4 | 
 5 | ## How to Get Started
 6 | 
 7 | 1. Clone the repository and create a new branch
 8 | 2. Make your changes
 9 | 3. Submit a pull request
10 | 4. Wait for a review
11 | 5. Tada! You're done!
12 | 
13 | ## How to Report a Bug
14 | 
15 | If you find a bug, please file an issue on the using the bug report template.
16 | 
17 | ## How to Suggest a Feature or Enhancement
18 | 
19 | If you have an idea for a new feature or enhancement, please file an issue on the using the feature request template.
20 | 
21 | 🙏 Thank you
22 | 


--------------------------------------------------------------------------------
/LICENCE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Megaklis Vasilakis
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | # Define variables
 2 | PYTHON=python
 3 | PIP=pip
 4 | PACKAGE=megabots
 5 | 
 6 | .PHONY: install test clean build publish
 7 | 
 8 | install:
 9 | 	$(PIP) install -r requirements.txt
10 | 
11 | test:
12 | 	$(PYTHON) -m pytest ./tests
13 | 
14 | clean:
15 | 	rm -rf build dist *.egg-info .pytest_cache ./**/__pycache__
16 | 
17 | build:
18 | 	$(PYTHON) setup.py sdist bdist_wheel
19 | 
20 | publish: clean build
21 | 	$(PYTHON) -m twine upload dist/*
22 | 
23 | trace:
24 | 	langchain-server	
25 | 
26 | freeze:
27 | 	$(PIP) freeze > requirements.txt
28 | 
29 | gradio:
30 | 	gradio app.py
31 | 
32 | help:
33 | 	@echo "install - install dependencies"
34 | 	@echo "test - run tests"
35 | 	@echo "clean - remove build artifacts"
36 | 	@echo "build - build package"
37 | 	@echo "publish - publish package to PyPI"
38 | 	@echo "help - show this help message"


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # 🤖 Megabots
  2 | 
  3 | [![Tests](https://github.com/momegas/qnabot/actions/workflows/python-package.yml/badge.svg)](https://github.com/momegas/qnabot/actions/workflows/python-package.yml)
  4 | [![Python Version](https://img.shields.io/badge/python-%203.10%20-blue.svg)](#supported-python-versions)
  5 | [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
  6 | [![License](https://img.shields.io/badge/License-MIT-informational.svg)](https://github.com/momegas/megabots/blob/main/LICENCE)
  7 | ![](https://dcbadge.vercel.app/api/server/zkqDWk5S7P?style=flat&n&compact=true)
  8 | 
  9 | 🤖 Megabots provides State-of-the-art, production ready LLM apps made mega-easy, so you don't have to build them from scratch 🤯 Create a bot, now 🫵
 10 | 
 11 | - 👉 Join us on Discord: https://discord.gg/zkqDWk5S7P
 12 | - ✈️ Work is managed in this project: https://github.com/users/momegas/projects/5/views/2
 13 | - 🤖 Documentation bot: https://huggingface.co/spaces/momegas/megabots
 14 | 
 15 | **The Megabots library can be used to create bots that:**
 16 | 
 17 | - ⌚️ are production ready, in minutes
 18 | - 🗂️ can answer questions over documents
 19 | - 💾 can connect to vector databases
 20 | - 🎖️ automatically expose the bot as a rebust API using FastAPI (early release)
 21 | - 🏓 automatically expose the bot as a UI using Gradio
 22 | 
 23 | 🤖 Megabots is backed by some of the most famous tools for productionalising AI. It uses [LangChain](https://docs.langchain.com/docs/) for managing LLM chains, [langchain-serve](https://github.com/jina-ai/langchain-serve) to create a production ready API, [Gradio](https://gradio.app/) to create a UI. At the moment it uses [OpenAI](https://openai.com/) to generate answers, but we plan to support other LLMs in the future.
 24 | 
 25 | ## Getting started
 26 | 
 27 | Note: This is a work in progress. The API might change.
 28 | 
 29 | ```bash
 30 | pip install megabots
 31 | ```
 32 | 
 33 | ```python
 34 | from megabots import bot
 35 | import os
 36 | 
 37 | os.environ["OPENAI_API_KEY"] = "my key"
 38 | 
 39 | # Create a bot 👉 with one line of code. Automatically loads your data from ./index or index.pkl.
 40 | # Keep in mind that you need to have one or another.
 41 | qnabot = bot("qna-over-docs")
 42 | 
 43 | # Ask a question
 44 | answer = qnabot.ask("How do I use this bot?")
 45 | 
 46 | # Save the index to save costs (GPT is used to create the index)
 47 | qnabot.save_index("index.pkl")
 48 | 
 49 | # Load the index from a previous run
 50 | qnabot = bot("qna-over-docs", index="./index.pkl")
 51 | 
 52 | # Or create the index from a directory of documents
 53 | qnabot = bot("qna-over-docs", index="./index")
 54 | 
 55 | # Change the model
 56 | qnabot = bot("qna-over-docs", model="text-davinci-003")
 57 | ```
 58 | 
 59 | ## Changing the bot's prompt
 60 | 
 61 | You can change the bots promnpt to customize it to your needs. In the `qna-over-docs` type of bot you will need to pass 2 variables for the `context` (knwoledge searched from the index) and the `question` (the human question).
 62 | 
 63 | ```python
 64 | from megabots import bot
 65 | 
 66 | prompt = """
 67 | Use the following pieces of context to answer the question at the end.
 68 | If you don't know the answer, just say that you don't know, don't try to make up an answer.
 69 | Answer in the style of Tony Stark.
 70 | 
 71 | {context}
 72 | 
 73 | Question: {question}
 74 | Helpful humorous answer:"""
 75 | 
 76 | qnabot = bot("qna-over-docs", index="./index.pkl", prompt=prompt)
 77 | 
 78 | qnabot.ask("what was the first roster of the avengers?")
 79 | ```
 80 | 
 81 | ## Working with memory
 82 | 
 83 | You can easily add memory to your `bot` using the `memory` parameter. It accepts a string with the type of the memory to be used. This defaults to some sane dafaults.
 84 | Should you need more configuration, you can use the `memory` function and pass the type of memory and the configuration you need.
 85 | 
 86 | ```python
 87 | from megabots import bot
 88 | 
 89 | qnabot = bot("qna-over-docs", index="./index.pkl", memory="conversation-buffer")
 90 | 
 91 | print(qnabot.ask("who is iron man?"))
 92 | print(qnabot.ask("was he in the first roster?"))
 93 | # Bot should understand who "he" refers to.
 94 | ```
 95 | 
 96 | Or using the `memory`factory function
 97 | 
 98 | ```python
 99 | from megabots import bot, memory
100 | 
101 | mem("conversation-buffer-window", k=5)
102 | 
103 | qnabot = bot("qna-over-docs", index="./index.pkl", memory=mem)
104 | 
105 | print(qnabot.ask("who is iron man?"))
106 | print(qnabot.ask("was he in the first roster?"))
107 | ```
108 | 
109 | NOTE: For the `qna-over-docs` bot, when using memory and passing your custom prompt, it is important to remember to pass one more variable to your custom prompt to facilitate for chat history. The variable name is `history`.
110 | 
111 | ```python
112 | from megabots import bot
113 | 
114 | prompt = """
115 | Use the following pieces of context to answer the question at the end.
116 | If you don't know the answer, just say that you don't know, don't try to make up an answer.
117 | 
118 | {context}
119 | 
120 | {history}
121 | Human: {question}
122 | AI:"""
123 | 
124 | qnabot = bot("qna-over-docs", prompt=prompt, index="./index.pkl", memory="conversation-buffer")
125 | 
126 | print(qnabot.ask("who is iron man?"))
127 | print(qnabot.ask("was he in the first roster?"))
128 | ```
129 | 
130 | ## Using Megabots with Milvus (more DBs comming soon)
131 | 
132 | Megabots `bot` can also use Milvus as a backend for its search engine. You can find an example of how to do it below.
133 | 
134 | In order to run Milvus you need to follow [this guide](https://milvus.io/docs/example_code.md) to download a docker compose file and run it.
135 | The command is:
136 | 
137 | ```bash
138 | wget https://raw.githubusercontent.com/milvus-io/pymilvus/v2.2.7/examples/hello_milvus.py
139 | ```
140 | 
141 | You can then [install Attu](https://milvus.io/docs/attu_install-docker.md) as a management tool for Milvus
142 | 
143 | ```python
144 | from megabots import bot
145 | 
146 | # Attach a vectorstore by passing the name of the database. Default port for milvus is 19530 and default host is localhost
147 | # Point it to your files directory so that it can index the files and add them to the vectorstore
148 | bot = bot("qna-over-docs", index="./examples/files/", vectorstore="milvus")
149 | 
150 | bot.ask("what was the first roster of the avengers?")
151 | ```
152 | 
153 | Or use the `vectorstore` factory function for more customisation
154 | 
155 | ```python
156 | 
157 | from megabots import bot, vectorstore
158 | 
159 | milvus = vectorstore("milvus", host="localhost", port=19530)
160 | 
161 | bot = bot("qna-over-docs", index="./examples/files/", vectorstore=milvus)
162 | ```
163 | 
164 | ## Exposing an API with [langchain-serve](https://github.com/jina-ai/langchain-serve)
165 | 
166 | You can also expose the bot endpoints locally using langchain-serve. A sample file `api.py` is provided in the `megabots` folder.
167 | 
168 | To expose the API locally, you can do 
169 | ```bash
170 | lc-serve deploy local megabots.api
171 | ```
172 | 
173 | You should then be able to visit `http://localhost:8000/docs` to see & interact with the API documentation.
174 | 
175 | To deploy your API to the cloud, you can do and connect to the API using the endpoint provided in the output.
176 | ```bash
177 | lc-serve deploy jcloud megabots.api
178 | ```
179 | 
180 | <details>
181 | <summary>Show command output</summary>
182 | 
183 | ```text
184 | ╭──────────────┬──────────────────────────────────────────────────────────────────────────────────────╮
185 | │ App ID       │                                 langchain-dec14439a6                                 │
186 | ├──────────────┼──────────────────────────────────────────────────────────────────────────────────────┤
187 | │ Phase        │                                       Serving                                        │
188 | ├──────────────┼──────────────────────────────────────────────────────────────────────────────────────┤
189 | │ Endpoint     │                      https://langchain-dec14439a6.wolf.jina.ai                       │
190 | ├──────────────┼──────────────────────────────────────────────────────────────────────────────────────┤
191 | │ App logs     │                               dashboards.wolf.jina.ai                                │
192 | ├──────────────┼──────────────────────────────────────────────────────────────────────────────────────┤
193 | │ Swagger UI   │                    https://langchain-dec14439a6.wolf.jina.ai/docs                    │
194 | ├──────────────┼──────────────────────────────────────────────────────────────────────────────────────┤
195 | │ OpenAPI JSON │                https://langchain-dec14439a6.wolf.jina.ai/openapi.json                │
196 | ╰──────────────┴──────────────────────────────────────────────────────────────────────────────────────╯
197 | ```
198 | </details>
199 | 
200 | 
201 | You can read more about langchain-serve [here](https://github.com/jina-ai/langchain-server).
202 | 
203 | ## Exposing a Gradio chat-like interface
204 | 
205 | You can expose a gradio UI for the bot using `create_interface` function.
206 | Assuming your file is called `ui.py` run `gradio qnabot/ui.py` to run the UI locally.
207 | You should then be able to visit `http://127.0.0.1:7860` to see the API documentation.
208 | 
209 | ```python
210 | from megabots import bot, create_interface
211 | 
212 | demo = create_interface(bot("qna-over-docs"))
213 | ```
214 | 
215 | ## Customising bot
216 | 
217 | The `bot` function should serve as the starting point for creating and customising your bot. Below is a list of the available arguments in `bot`.
218 | 
219 | | Argument    | Description                                                                                                                                                                                                                                                                                |
220 | | ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
221 | | task        | The type of bot to create. Available options: `qna-over-docs`. More comming soon                                                                                                                                                                                                           |
222 | | index       | Specifies the index to use for the bot. It can either be a saved index file (e.g., `index.pkl`) or a directory of documents (e.g., `./index`). In the case of the directory the index will be automatically created. If no index is specified `bot` will look for `index.pkl` or `./index` |
223 | | model       | The name of the model to use for the bot. You can specify a different model by providing its name, like "text-davinci-003". Supported models: `gpt-3.5-turbo` (default),`text-davinci-003` More comming soon.                                                                              |
224 | | prompt      | A string template for the prompt, which defines the format of the question and context passed to the model. The template should include placeholder variables like so: `context`, `{question}` and in the case of using memory `history`.                                                  |
225 | | memory      | The type of memory to be used by the bot. Can be a string with the type of the memory or you can use `memory` factory function. Supported memories: `conversation-buffer`, `conversation-buffer-window`                                                                                    |
226 | | vectorstore | The vectorstore to be used for the index. Can be a string with the name of the databse or you can use `vectorstore` factory function. Supported DBs: `milvus`.                                                                                                                             |
227 | 
228 | | sources | When `sources` is `True` the bot will also include sources in the response. A known [issue](https://github.com/hwchase17/langchain/issues/2858) exists, where if you pass a custom prompt with sources the code breaks. |
229 | 
230 | ## How QnA bot works
231 | 
232 | Large language models (LLMs) are powerful, but they can't answer questions about documents they haven't seen. If you want to use an LLM to answer questions about documents it was not trained on, you have to give it information about those documents. To solve this, we use "retrieval augmented generation."
233 | 
234 | In simple terms, when you have a question, you first search for relevant documents. Then, you give the documents and the question to the language model to generate an answer. To make this work, you need your documents in a searchable format (an index). This process involves two main steps: (1) preparing your documents for easy querying, and (2) using the retrieval augmented generation method.
235 | 
236 | `qna-over-docs` uses FAISS to create an index of documents and GPT to generate answers.
237 | 
238 | ```mermaid
239 | sequenceDiagram
240 |     actor User
241 |     participant API
242 |     participant LLM
243 |     participant Vectorstore
244 |     participant IngestionEngine
245 |     participant DataLake
246 |     autonumber
247 | 
248 |     Note over API, DataLake: Ingestion phase
249 |     loop Every X time
250 |     IngestionEngine ->> DataLake: Load documents
251 |     DataLake -->> IngestionEngine: Return data
252 |     IngestionEngine -->> IngestionEngine: Split documents and Create embeddings
253 |     IngestionEngine ->> Vectorstore: Store documents and embeddings
254 |     end
255 | 
256 |     Note over API, DataLake: Generation phase
257 | 
258 |     User ->> API: Receive user question
259 |     API ->> Vectorstore: Lookup documents in the index relevant to the question
260 |     API ->> API: Construct a prompt from the question and any relevant documents
261 |     API ->> LLM: Pass the prompt to the model
262 |     LLM -->> API: Get response from model
263 |     API -->> User: Return response
264 | 
265 | ```
266 | 
267 | ## How to contribute?
268 | 
269 | We welcome any suggestions, problem reports, and contributions!
270 | For any changes you would like to make to this project, we invite you to submit an [issue](https://github.com/momegas/megabots/issues).
271 | 
272 | For more information, see [`CONTRIBUTING`](https://github.com/momegas/megabots/blob/main/CONTRIBUTING.md) instructions.
273 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | # Security Policy
 2 | 
 3 | ## Supported Versions
 4 | 
 5 | Use this section to tell people about which versions of your project are
 6 | currently being supported with security updates.
 7 | 
 8 | | Version | Supported          |
 9 | | ------- | ------------------ |
10 | | 0.0.x   | :white_check_mark: |
11 | 
12 | ## Reporting a Vulnerability
13 | 
14 | Use this section to tell people how to report a vulnerability.
15 | 
16 | To report a vulnerability use [this link](https://github.com/momegas/megabots/issues/new/choose) to open a new bug
17 | 


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This file is an example of what you can build with 🤖Megabots.
 3 | It is hosted here: https://huggingface.co/spaces/momegas/megabots
 4 | 
 5 | """
 6 | 
 7 | from megabots import bot, create_interface
 8 | 
 9 | prompt = """
10 | You are programming assistant that helps programmers develop apps with the Megabots library.
11 | Use the following pieces of context to answer the question at the end. 
12 | If you don't know the answer, just say that you don't know, don't try to make up an answer.
13 | If the question asks for python code you can provide it.
14 | 
15 | Context:
16 | {context}
17 | 
18 | Conversation history:
19 | {history}
20 | Human: {question}
21 | AI:
22 | """
23 | 
24 | qnabot = bot(
25 |     "qna-over-docs",
26 |     index="./examples/files",
27 |     memory="conversation-buffer-window",
28 |     prompt=prompt,
29 | )
30 | 
31 | 
32 | text = """
33 | You can ask this bot anything about 🤖Megabots. Here are some examples:
34 | - What is Megabots?
35 | - How can I create a bot?
36 | - How can I change the prompt?
37 | - How can I create a bot that has memory and can connect to a milvus vector database?
38 | - How can I customise the bot function?
39 | - How can I an API out of my bot?
40 | - How can I an intrface out of my bot?
41 | - Where can i find the megabots repo?
42 | """
43 | 
44 | iface = create_interface(qnabot, text)
45 | iface.launch()
46 | 


--------------------------------------------------------------------------------
/example.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "attachments": {},
  5 |    "cell_type": "markdown",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# Examples\n",
  9 |     "\n",
 10 |     "Below you can find some examples of how to use the 🤖 `Megabots` library.\n"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "attachments": {},
 15 |    "cell_type": "markdown",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "### Creating a bot\n",
 19 |     "\n",
 20 |     "The `bot` object is the main object of the library. It is used to create a bot and to interact with it.\n",
 21 |     "\n",
 22 |     "The `index` argument specifies the index to use for the bot. It can either be a saved index file (e.g., `index.pkl`) or a directory of documents (e.g., `./index`). In the case of the directory the index will be automatically created. If no index is specified the `bot` will look for `index.pkl` or `./index`."
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": 1,
 28 |    "metadata": {},
 29 |    "outputs": [
 30 |     {
 31 |      "name": "stderr",
 32 |      "output_type": "stream",
 33 |      "text": [
 34 |       "/Users/momegas/Desktop/qnabot/.venv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
 35 |       "  from .autonotebook import tqdm as notebook_tqdm\n"
 36 |      ]
 37 |     },
 38 |     {
 39 |      "name": "stdout",
 40 |      "output_type": "stream",
 41 |      "text": [
 42 |       "Using model: gpt-3.5-turbo\n",
 43 |       "Loading path from pickle file:  ./index.pkl ...\n"
 44 |      ]
 45 |     },
 46 |     {
 47 |      "data": {
 48 |       "text/plain": [
 49 |        "'The first roster of the Avengers included Iron Man, Thor, Hulk, Ant-Man, and the Wasp.'"
 50 |       ]
 51 |      },
 52 |      "execution_count": 1,
 53 |      "metadata": {},
 54 |      "output_type": "execute_result"
 55 |     }
 56 |    ],
 57 |    "source": [
 58 |     "from megabots import bot\n",
 59 |     "\n",
 60 |     "qnabot = bot(\"qna-over-docs\", index=\"./index.pkl\")\n",
 61 |     "\n",
 62 |     "qnabot.ask(\"what was the first roster of the avengers?\")\n"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "attachments": {},
 67 |    "cell_type": "markdown",
 68 |    "metadata": {},
 69 |    "source": [
 70 |     "### Changing the bot's prompt\n",
 71 |     "\n",
 72 |     "You can change the bot's promnpt to customize it to your needs. In the `qna-over-docs` type of bot you will need to pass 2 variables for the `context` (knwoledge searched from the index) and the `question` (the human question).\n"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": 2,
 78 |    "metadata": {},
 79 |    "outputs": [
 80 |     {
 81 |      "name": "stdout",
 82 |      "output_type": "stream",
 83 |      "text": [
 84 |       "Using model: gpt-3.5-turbo\n",
 85 |       "Loading path from pickle file:  ./index.pkl ...\n"
 86 |      ]
 87 |     },
 88 |     {
 89 |      "data": {
 90 |       "text/plain": [
 91 |        "'The first roster of the Avengers included Iron Man, Thor, Hulk, Ant-Man, and the Wasp.'"
 92 |       ]
 93 |      },
 94 |      "execution_count": 2,
 95 |      "metadata": {},
 96 |      "output_type": "execute_result"
 97 |     }
 98 |    ],
 99 |    "source": [
100 |     "from megabots import bot\n",
101 |     "\n",
102 |     "prompt = \"\"\"\n",
103 |     "Use the following pieces of context to answer the question at the end. \n",
104 |     "If you don't know the answer, just say that you don't know, don't try to make up an answer.\n",
105 |     "Answer in the style of Tony Stark.\n",
106 |     "\n",
107 |     "{context}\n",
108 |     "\n",
109 |     "Question: {question}\n",
110 |     "Helpful humorous answer:\"\"\"\n",
111 |     "\n",
112 |     "qnabot = bot(\"qna-over-docs\", index=\"./index.pkl\", prompt=prompt)\n",
113 |     "\n",
114 |     "qnabot.ask(\"what was the first roster of the avengers?\")\n"
115 |    ]
116 |   },
117 |   {
118 |    "attachments": {},
119 |    "cell_type": "markdown",
120 |    "metadata": {},
121 |    "source": [
122 |     "### Using Megabots with Milvus\n",
123 |     "\n",
124 |     "Megabots `bot` can also use Milvus as a backend for its search engine. You can find an example of how to do it below.\n",
125 |     "\n",
126 |     "In order to run Milvus you need to follow [this guide](https://milvus.io/docs/example_code.md) to download a docker compose file and run it.\n",
127 |     "The command is:\n",
128 |     "\n",
129 |     "```bash\n",
130 |     "wget https://raw.githubusercontent.com/milvus-io/pymilvus/v2.2.7/examples/hello_milvus.py\n",
131 |     "```\n",
132 |     "\n",
133 |     "You can then [install Attu](https://milvus.io/docs/attu_install-docker.md) as a management tool for Milvus\n"
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "code",
138 |    "execution_count": 3,
139 |    "metadata": {},
140 |    "outputs": [
141 |     {
142 |      "name": "stdout",
143 |      "output_type": "stream",
144 |      "text": [
145 |       "Using model: gpt-3.5-turbo\n"
146 |      ]
147 |     },
148 |     {
149 |      "data": {
150 |       "text/plain": [
151 |        "'The first roster of the Avengers included Iron Man, Thor, Hulk, Ant-Man, and the Wasp.'"
152 |       ]
153 |      },
154 |      "execution_count": 3,
155 |      "metadata": {},
156 |      "output_type": "execute_result"
157 |     }
158 |    ],
159 |    "source": [
160 |     "from megabots import bot\n",
161 |     "\n",
162 |     "# Attach a vectorstore by passing the name of the database. Default port for milvus is 19530 and default host is localhost\n",
163 |     "# Point it to your files directory so that it can index the files and add them to the vectorstore\n",
164 |     "bot = bot(\"qna-over-docs\", index=\"./examples/files/\", vectorstore=\"milvus\")\n",
165 |     "\n",
166 |     "bot.ask(\"what was the first roster of the avengers?\")\n"
167 |    ]
168 |   },
169 |   {
170 |    "attachments": {},
171 |    "cell_type": "markdown",
172 |    "metadata": {},
173 |    "source": [
174 |     "Or use the `vectorstore` factory function for more customisation\n"
175 |    ]
176 |   },
177 |   {
178 |    "cell_type": "code",
179 |    "execution_count": 4,
180 |    "metadata": {},
181 |    "outputs": [
182 |     {
183 |      "name": "stdout",
184 |      "output_type": "stream",
185 |      "text": [
186 |       "Using model: gpt-3.5-turbo\n"
187 |      ]
188 |     }
189 |    ],
190 |    "source": [
191 |     "from megabots import bot, vectorstore\n",
192 |     "\n",
193 |     "milvus = vectorstore(\"milvus\", host=\"localhost\", port=19530)\n",
194 |     "\n",
195 |     "bot = bot(\"qna-over-docs\", index=\"./examples/files/\", vectorstore=milvus)\n"
196 |    ]
197 |   },
198 |   {
199 |    "attachments": {},
200 |    "cell_type": "markdown",
201 |    "metadata": {},
202 |    "source": [
203 |     "### Working with memory\n",
204 |     "\n",
205 |     "You can easily add memory to your `bot` using the `memory` parameter. It accepts a string with the type of the memory to be used. This defaults to some sane dafaults.\n",
206 |     "Should you need more configuration, you can use the `memory` function and pass the type of memory and the configuration you need.\n"
207 |    ]
208 |   },
209 |   {
210 |    "cell_type": "code",
211 |    "execution_count": 5,
212 |    "metadata": {},
213 |    "outputs": [
214 |     {
215 |      "name": "stdout",
216 |      "output_type": "stream",
217 |      "text": [
218 |       "Using model: gpt-3.5-turbo\n",
219 |       "Loading path from pickle file:  ./index.pkl ...\n",
220 |       "Iron Man is a superhero character who is a member of the Avengers. He is known for his high-tech suit of armor and his alter ego, Tony Stark.\n",
221 |       "Yes, Iron Man was part of the original Avengers lineup.\n"
222 |      ]
223 |     }
224 |    ],
225 |    "source": [
226 |     "from megabots import bot\n",
227 |     "\n",
228 |     "qnabot = bot(\"qna-over-docs\", index=\"./index.pkl\", memory=\"conversation-buffer\")\n",
229 |     "\n",
230 |     "print(qnabot.ask(\"who is iron man?\"))\n",
231 |     "print(qnabot.ask(\"was he in the first roster?\"))\n"
232 |    ]
233 |   },
234 |   {
235 |    "attachments": {},
236 |    "cell_type": "markdown",
237 |    "metadata": {},
238 |    "source": [
239 |     "Or using the `memory`factory function"
240 |    ]
241 |   },
242 |   {
243 |    "cell_type": "code",
244 |    "execution_count": 6,
245 |    "metadata": {},
246 |    "outputs": [
247 |     {
248 |      "name": "stdout",
249 |      "output_type": "stream",
250 |      "text": [
251 |       "Using model: gpt-3.5-turbo\n",
252 |       "Loading path from pickle file:  ./index.pkl ...\n",
253 |       "Iron Man is a superhero character who is a member of the Avengers. He is known for his high-tech suit of armor and his alter ego, Tony Stark.\n",
254 |       "Yes, Iron Man was part of the original Avengers lineup.\n"
255 |      ]
256 |     }
257 |    ],
258 |    "source": [
259 |     "from megabots import bot, memory\n",
260 |     "\n",
261 |     "qnabot = bot(\n",
262 |     "    \"qna-over-docs\",\n",
263 |     "    index=\"./index.pkl\",\n",
264 |     "    memory=memory(\"conversation-buffer-window\", k=5),\n",
265 |     ")\n",
266 |     "\n",
267 |     "print(qnabot.ask(\"who is iron man?\"))\n",
268 |     "print(qnabot.ask(\"was he in the first roster?\"))\n"
269 |    ]
270 |   },
271 |   {
272 |    "attachments": {},
273 |    "cell_type": "markdown",
274 |    "metadata": {},
275 |    "source": [
276 |     "NOTE: For the `qna-over-docs` bot, when using memory and passing your custom prompt, it is important to remember to pass one more variable to your custom prompt to facilitate for chat history. The variable name is `history`.\n"
277 |    ]
278 |   },
279 |   {
280 |    "cell_type": "code",
281 |    "execution_count": 7,
282 |    "metadata": {},
283 |    "outputs": [
284 |     {
285 |      "name": "stdout",
286 |      "output_type": "stream",
287 |      "text": [
288 |       "Using model: gpt-3.5-turbo\n",
289 |       "Loading path from pickle file:  ./index.pkl ...\n",
290 |       "Iron Man is a superhero character who is a member of the Avengers. He is a wealthy businessman named Tony Stark who uses his advanced technology to create a suit of armor that gives him superhuman abilities.\n",
291 |       "Yes, Iron Man was part of the original Avengers lineup.\n"
292 |      ]
293 |     }
294 |    ],
295 |    "source": [
296 |     "from megabots import bot\n",
297 |     "\n",
298 |     "prompt = \"\"\"\n",
299 |     "Use the following pieces of context to answer the question at the end. \n",
300 |     "If you don't know the answer, just say that you don't know, don't try to make up an answer.\n",
301 |     "\n",
302 |     "{context}\n",
303 |     "\n",
304 |     "{history}\n",
305 |     "Human: {question}\n",
306 |     "AI:\"\"\"\n",
307 |     "\n",
308 |     "qnabot = bot(\n",
309 |     "    \"qna-over-docs\",\n",
310 |     "    prompt=prompt,\n",
311 |     "    index=\"./index.pkl\",\n",
312 |     "    memory=\"conversation-buffer\",\n",
313 |     ")\n",
314 |     "\n",
315 |     "print(qnabot.ask(\"who is iron man?\"))\n",
316 |     "print(qnabot.ask(\"was he in the first roster?\"))"
317 |    ]
318 |   }
319 |  ],
320 |  "metadata": {
321 |   "kernelspec": {
322 |    "display_name": ".venv",
323 |    "language": "python",
324 |    "name": "python3"
325 |   },
326 |   "language_info": {
327 |    "codemirror_mode": {
328 |     "name": "ipython",
329 |     "version": 3
330 |    },
331 |    "file_extension": ".py",
332 |    "mimetype": "text/x-python",
333 |    "name": "python",
334 |    "nbconvert_exporter": "python",
335 |    "pygments_lexer": "ipython3",
336 |    "version": "3.10.0"
337 |   },
338 |   "orig_nbformat": 4
339 |  },
340 |  "nbformat": 4,
341 |  "nbformat_minor": 2
342 | }
343 | 


--------------------------------------------------------------------------------
/examples/files/facts.txt:
--------------------------------------------------------------------------------
  1 | # 🤖 Megabots
  2 | 
  3 | author: Megaklis Vasilakis
  4 | author_email: megaklis.vasilakis@gmail.com
  5 | repo: https://github.com/momegas/megabots
  6 | 
  7 | 🤖 Megabots provides State-of-the-art, production ready LLM apps made mega-easy, so you don't have to build them from scratch 🤯 Create a bot, now 🫵
  8 | 
  9 | - 👉 Join us on Discord: https://discord.gg/zkqDWk5S7P
 10 | - ✈️ Work is managed in this project: https://github.com/users/momegas/projects/5/views/2
 11 | 
 12 | **The Megabots library can be used to create bots that:**
 13 | 
 14 | - ⌚️ are production ready, in minutes
 15 | - 🗂️ can answer questions over documents
 16 | - 💾 can connect to vector databases
 17 | - 🎖️ automatically expose the bot as a rebust API using FastAPI (early release)
 18 | - 🏓 automatically expose the bot as a UI using Gradio
 19 | 
 20 | **Coming soon:**
 21 | 
 22 | - 🗣️ accept voice as an input using [whisper](https://github.com/openai/whisper)
 23 | - 👍 validate and correct the outputs of LLMs using [guardrails](https://github.com/ShreyaR/guardrails)
 24 | - 💰 semanticly cache LLM Queries and reduce Costs by 10x using [GPTCache](https://github.com/zilliztech/GPTCache)
 25 | - 🏋️ mega-easy LLM training
 26 | - 🚀 mega-easy deployment
 27 | 
 28 | 🤖 Megabots is backed by some of the most famous tools for productionalising AI. It uses [LangChain](https://docs.langchain.com/docs/) for managing LLM chains, [FastAPI](https://fastapi.tiangolo.com/) to create a production ready API, [Gradio](https://gradio.app/) to create a UI. At the moment it uses [OpenAI](https://openai.com/) to generate answers, but we plan to support other LLMs in the future.
 29 | 
 30 | ## Getting started
 31 | 
 32 | Note: This is a work in progress. The API might change.
 33 | 
 34 | ```bash
 35 | pip install megabots
 36 | ```
 37 | 
 38 | ```python
 39 | from megabots import bot
 40 | import os
 41 | 
 42 | os.environ["OPENAI_API_KEY"] = "my key"
 43 | 
 44 | # Create a bot 👉 with one line of code. Automatically loads your data from ./index or index.pkl. 
 45 | # Keep in mind that you need to have one or another.
 46 | qnabot = bot("qna-over-docs")
 47 | 
 48 | # Ask a question
 49 | answer = bot.ask("How do I use this bot?")
 50 | 
 51 | # Save the index to save costs (GPT is used to create the index)
 52 | bot.save_index("index.pkl")
 53 | 
 54 | # Load the index from a previous run
 55 | qnabot = bot("qna-over-docs", index="./index.pkl")
 56 | 
 57 | # Or create the index from a directory of documents
 58 | qnabot = bot("qna-over-docs", index="./index")
 59 | 
 60 | # Change the model
 61 | qnabot = bot("qna-over-docs", model="text-davinci-003")
 62 | ```
 63 | 
 64 | ## Changing the bot's prompt
 65 | 
 66 | You can change the bots promnpt to customize it to your needs. In the `qna-over-docs` type of bot you will need to pass 2 variables for the `context` (knwoledge searched from the index) and the `question` (the human question).
 67 | 
 68 | ```python
 69 | from megabots import bot
 70 | 
 71 | prompt = """
 72 | Use the following pieces of context to answer the question at the end.
 73 | If you don't know the answer, just say that you don't know, don't try to make up an answer.
 74 | Answer in the style of Tony Stark.
 75 | 
 76 | {context}
 77 | 
 78 | Question: {question}
 79 | Helpful humorous answer:"""
 80 | 
 81 | qnabot = bot("qna-over-docs", index="./index.pkl", prompt=prompt)
 82 | 
 83 | qnabot.ask("what was the first roster of the avengers?")
 84 | ```
 85 | 
 86 | ## Working with memory
 87 | 
 88 | You can easily add memory to your `bot` using the `memory` parameter. It accepts a string with the type of the memory to be used. This defaults to some sane dafaults.
 89 | Should you need more configuration, you can use the `memory` function and pass the type of memory and the configuration you need.
 90 | 
 91 | ```python
 92 | from megabots import bot
 93 | 
 94 | qnabot = bot("qna-over-docs", index="./index.pkl", memory="conversation-buffer")
 95 | 
 96 | print(qnabot.ask("who is iron man?"))
 97 | print(qnabot.ask("was he in the first roster?"))
 98 | # Bot should understand who "he" refers to.
 99 | ```
100 | 
101 | Or using the `memory`factory function
102 | 
103 | ```python
104 | from megabots import bot, memory
105 | 
106 | mem("conversation-buffer-window", k=5)
107 | 
108 | qnabot = bot("qna-over-docs", index="./index.pkl", memory=mem)
109 | 
110 | print(qnabot.ask("who is iron man?"))
111 | print(qnabot.ask("was he in the first roster?"))
112 | ```
113 | 
114 | NOTE: For the `qna-over-docs` bot, when using memory and passing your custom prompt, it is important to remember to pass one more variable to your custom prompt to facilitate for chat history. The variable name is `history`.
115 | 
116 | ```python
117 | from megabots import bot
118 | 
119 | prompt = """
120 | Use the following pieces of context to answer the question at the end.
121 | If you don't know the answer, just say that you don't know, don't try to make up an answer.
122 | 
123 | {context}
124 | 
125 | {history}
126 | Human: {question}
127 | AI:"""
128 | 
129 | qnabot = bot("qna-over-docs", prompt=prompt, index="./index.pkl", memory="conversation-buffer")
130 | 
131 | print(qnabot.ask("who is iron man?"))
132 | print(qnabot.ask("was he in the first roster?"))
133 | ```
134 | 
135 | ## Using Megabots with Milvus (more DBs comming soon)
136 | 
137 | Megabots `bot` can also use Milvus as a backend for its search engine. You can find an example of how to do it below.
138 | 
139 | In order to run Milvus you need to follow [this guide](https://milvus.io/docs/example_code.md) to download a docker compose file and run it.
140 | The command is:
141 | 
142 | ```bash
143 | wget https://raw.githubusercontent.com/milvus-io/pymilvus/v2.2.7/examples/hello_milvus.py
144 | ```
145 | 
146 | You can then [install Attu](https://milvus.io/docs/attu_install-docker.md) as a management tool for Milvus
147 | 
148 | ```python
149 | from megabots import bot
150 | 
151 | # Attach a vectorstore by passing the name of the database. Default port for milvus is 19530 and default host is localhost
152 | # Point it to your files directory so that it can index the files and add them to the vectorstore
153 | bot = bot("qna-over-docs", index="./examples/files/", vectorstore="milvus")
154 | 
155 | bot.ask("what was the first roster of the avengers?")
156 | ```
157 | 
158 | Or use the `vectorstore` factory function for more customisation
159 | 
160 | ```python
161 | 
162 | from megabots import bot, vectorstore
163 | 
164 | milvus = vectorstore("milvus", host="localhost", port=19530)
165 | 
166 | bot = bot("qna-over-docs", index="./examples/files/", vectorstore=milvus)
167 | ```
168 | 
169 | ## Exposing an API with FastAPI
170 | 
171 | You can also create a FastAPI app that will expose the bot as an API using the create_app function.
172 | Assuming you file is called `main.py` run `uvicorn main:app --reload` to run the API locally.
173 | You should then be able to visit `http://localhost:8000/docs` to see the API documentation.
174 | 
175 | ```python
176 | from megabots import bot, create_api
177 | 
178 | app = create_app(bot("qna-over-docs"))
179 | ```
180 | 
181 | ## Exposing a Gradio chat-like interface
182 | 
183 | You can expose a gradio UI for the bot using `create_interface` function.
184 | Assuming your file is called `ui.py` run `gradio qnabot/ui.py` to run the UI locally.
185 | You should then be able to visit `http://127.0.0.1:7860` to see the API documentation.
186 | 
187 | ```python
188 | from megabots import bot, create_interface
189 | 
190 | demo = create_interface(bot("qna-over-docs"))
191 | ```
192 | 
193 | ## Customising bot
194 | 
195 | The `bot` function should serve as the starting point for creating and customising your bot. Below is a list of the available arguments in `bot`.
196 | 
197 | | Argument    | Description                                                                                                                                                                                                                                                                                |
198 | | ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
199 | | task        | The type of bot to create. Available options: `qna-over-docs`. More comming soon                                                                                                                                                                                                           |
200 | | index       | Specifies the index to use for the bot. It can either be a saved index file (e.g., `index.pkl`) or a directory of documents (e.g., `./index`). In the case of the directory the index will be automatically created. If no index is specified `bot` will look for `index.pkl` or `./index` |
201 | | model       | The name of the model to use for the bot. You can specify a different model by providing its name, like "text-davinci-003". Supported models: `gpt-3.5-turbo` (default),`text-davinci-003` More comming soon.                                                                              |
202 | | prompt      | A string template for the prompt, which defines the format of the question and context passed to the model. The template should include placeholder variables like so: `context`, `{question}` and in the case of using memory `history`.                                                  |
203 | | memory      | The type of memory to be used by the bot. Can be a string with the type of the memory or you can use `memory` factory function. Supported memories: `conversation-buffer`, `conversation-buffer-window`                                                                                    |
204 | | vectorstore | The vectorstore to be used for the index. Can be a string with the name of the databse or you can use `vectorstore` factory function. Supported DBs: `milvus`.                                                                                                                             |
205 | 
206 | | sources | When `sources` is `True` the bot will also include sources in the response. A known [issue](https://github.com/hwchase17/langchain/issues/2858) exists, where if you pass a custom prompt with sources the code breaks. |
207 | 
208 | ## How QnA bot works
209 | 
210 | Large language models (LLMs) are powerful, but they can't answer questions about documents they haven't seen. If you want to use an LLM to answer questions about documents it was not trained on, you have to give it information about those documents. To solve this, we use "retrieval augmented generation."
211 | 
212 | In simple terms, when you have a question, you first search for relevant documents. Then, you give the documents and the question to the language model to generate an answer. To make this work, you need your documents in a searchable format (an index). This process involves two main steps: (1) preparing your documents for easy querying, and (2) using the retrieval augmented generation method.
213 | 
214 | `qna-over-docs` uses FAISS to create an index of documents and GPT to generate answers.
215 | 
216 | ## How to contribute?
217 | 
218 | We welcome any suggestions, problem reports, and contributions!
219 | For any changes you would like to make to this project, we invite you to submit an [issue](https://github.com/momegas/megabots/issues).
220 | 
221 | For more information, see [`CONTRIBUTING`](https://github.com/momegas/megabots/blob/main/CONTRIBUTING.md) instructions.
222 | 


--------------------------------------------------------------------------------
/megabots/__init__.py:
--------------------------------------------------------------------------------
 1 | from megabots.vectorstore import VectorStore, vectorstore
 2 | from megabots.memory import Memory, memory
 3 | from megabots.bot import Bot, bot
 4 | from megabots.prompt import prompt
 5 | from megabots.utils import create_api, create_interface
 6 | 
 7 | 
 8 | from dotenv import load_dotenv
 9 | 
10 | load_dotenv()
11 | 


--------------------------------------------------------------------------------
/megabots/api.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from megabots import bot
 3 | from megabots.utils import create_api
 4 | 
 5 | # from lcserve import serving
 6 | 
 7 | cur_dir = os.path.dirname(os.path.abspath(__file__))
 8 | index_dir = os.path.join(cur_dir, "..", "examples", "files")
 9 | 
10 | 
11 | mybot = bot("qna-over-docs", index="./index.pkl")
12 | 
13 | 
14 | # @serving
15 | # def ask(question: str) -> str:
16 | #     return mybot.ask(question)
17 | 
18 | 
19 | app = create_api(mybot)
20 | 


--------------------------------------------------------------------------------
/megabots/bot.py:
--------------------------------------------------------------------------------
  1 | from typing import Any
  2 | from langchain.llms import OpenAI
  3 | from langchain.chat_models import ChatOpenAI
  4 | from langchain.embeddings import OpenAIEmbeddings
  5 | from langchain.chains.qa_with_sources import load_qa_with_sources_chain
  6 | from langchain.vectorstores.faiss import FAISS
  7 | import pickle
  8 | import os
  9 | from langchain.prompts import PromptTemplate
 10 | from langchain.chains.question_answering import load_qa_chain
 11 | from langchain.chains.conversational_retrieval.prompts import QA_PROMPT
 12 | from langchain.document_loaders import DirectoryLoader
 13 | from megabots.prompt import QA_MEMORY_PROMPT
 14 | from megabots.vectorstore import VectorStore
 15 | from megabots.memory import Memory
 16 | import megabots
 17 | 
 18 | 
 19 | class Bot:
 20 |     def __init__(
 21 |         self,
 22 |         model: str | None = None,
 23 |         prompt: PromptTemplate | None = None,
 24 |         index: str | None = None,
 25 |         sources: bool | None = False,
 26 |         vectorstore: VectorStore | None = None,
 27 |         memory: Memory | None = None,
 28 |         verbose: bool = False,
 29 |         temperature: int = 0,
 30 |     ):
 31 |         self.vectorstore = vectorstore
 32 |         self.memory = memory
 33 |         self.prompt = prompt or QA_MEMORY_PROMPT if self.memory else QA_PROMPT
 34 |         self.select_model(model, temperature)
 35 |         self.create_loader(index)
 36 |         self.load_or_create_index(index, vectorstore)
 37 | 
 38 |         # Load the question-answering chain for the selected model
 39 |         self.chain = self.create_chain(sources=sources, verbose=verbose)
 40 | 
 41 |     def create_chain(
 42 |         self,
 43 |         sources: bool | None = False,
 44 |         verbose: bool = False,
 45 |     ):
 46 |         # TODO: Changing the prompt here is not working. Leave it as is for now.
 47 |         # Reference: https://github.com/hwchase17/langchain/issues/2858
 48 |         if sources:
 49 |             return load_qa_with_sources_chain(
 50 |                 self.llm,
 51 |                 chain_type="stuff",
 52 |                 memory=self.memory.memory if self.memory else None,
 53 |                 verbose=verbose,
 54 |             )
 55 |         return load_qa_chain(
 56 |             self.llm,
 57 |             chain_type="stuff",
 58 |             verbose=verbose,
 59 |             prompt=self.prompt,
 60 |             memory=self.memory.memory if self.memory else None,
 61 |         )
 62 | 
 63 |     def select_model(self, model: str | None, temperature: float):
 64 |         # Select and set the appropriate model based on the provided input
 65 |         if model is None or model == "gpt-3.5-turbo":
 66 |             print("Using model: gpt-3.5-turbo")
 67 |             self.llm = ChatOpenAI(temperature=temperature)
 68 | 
 69 |         if model == "text-davinci-003":
 70 |             print("Using model: text-davinci-003")
 71 |             self.llm = OpenAI(temperature=temperature)
 72 | 
 73 |     def create_loader(self, index: str | None):
 74 |         # Create a loader based on the provided directory (either local or S3)
 75 |         if index is None:
 76 |             raise RuntimeError(
 77 |                 """
 78 |             Impossible to find a valid index. 
 79 |             Either provide a valid path to a pickle file or a directory.               
 80 |             """
 81 |             )
 82 |         self.loader = DirectoryLoader(index, recursive=True)
 83 | 
 84 |     def load_or_create_index(self, index: str, vectorstore: VectorStore | None = None):
 85 |         # Load an existing index from disk or create a new one if not available
 86 |         if vectorstore is not None:
 87 |             self.search_index = vectorstore.client.from_documents(
 88 |                 self.loader.load_and_split(),
 89 |                 OpenAIEmbeddings(),
 90 |                 connection_args={"host": vectorstore.host, "port": vectorstore.port},
 91 |             )
 92 |             return
 93 | 
 94 |         # Is pickle
 95 |         if index is not None and "pkl" in index or "pickle" in index:
 96 |             print("Loading path from pickle file: ", index, "...")
 97 |             with open(index, "rb") as f:
 98 |                 self.search_index = pickle.load(f)
 99 |             return
100 | 
101 |         # Is directory
102 |         if index is not None and os.path.isdir(index):
103 |             print("Creating index...")
104 |             self.search_index = FAISS.from_documents(
105 |                 self.loader.load_and_split(), OpenAIEmbeddings()
106 |             )
107 |             return
108 | 
109 |         raise RuntimeError(
110 |             """
111 |             Impossible to find a valid index. 
112 |             Either provide a valid path to a pickle file or a directory.               
113 |             """
114 |         )
115 | 
116 |     def save_index(self, index_path: str):
117 |         # Save the index to the specified path
118 |         with open(index_path, "wb") as f:
119 |             pickle.dump(self.search_index, f)
120 | 
121 |     def ask(self, question: str, k=1) -> str:
122 |         # Retrieve the answer to the given question and return it
123 |         input_documents = self.search_index.similarity_search(question, k=k)
124 |         answer = self.chain.run(input_documents=input_documents, question=question)
125 |         return answer
126 | 
127 | 
128 | SUPPORTED_TASKS = {
129 |     "qna-over-docs": {
130 |         "impl": Bot,
131 |         "default": {
132 |             "model": "gpt-3.5-turbo",
133 |             "temperature": 0,
134 |             "index": "./index",
135 |             "input_variables": ["context", "question"],
136 |         },
137 |     }
138 | }
139 | 
140 | SUPPORTED_MODELS = {}
141 | 
142 | 
143 | def bot(
144 |     task: str | None = None,
145 |     *,
146 |     model: str | None = None,
147 |     index: str | None = None,
148 |     prompt: str | None = None,
149 |     memory: str | Memory | None = None,
150 |     vectorstore: str | VectorStore | None = None,
151 |     verbose: bool = False,
152 |     temperature: int = 0,
153 | ) -> Bot:
154 |     """Instanciate a bot based on the provided task. Each supported tasks has it's own default sane defaults.
155 | 
156 |     Args:
157 |         task (str | None, optional): The given task. Can be one of the SUPPORTED_TASKS.
158 | 
159 |         model (str | None, optional): Model to be used. Can be one of the SUPPORTED_MODELS.
160 | 
161 |         index (str | None, optional): Data that the model will load and store index info.
162 |         Can be either a local file path, a pickle file, or a url of a vector database.
163 |         By default it will look for a local directory called "files" in the current working directory.
164 | 
165 |         prompt (str | None, optional): The prompt that the bot will take in. Mark variables like this: {variable}.
166 |         Variables are context, question, and history if the bot has memory.
167 | 
168 |         vectorstore: (str | VectorStore | None, optional): The vectorstore that the bot will save the index to.
169 |         If only a string is passed, the defaults values willl be used.
170 | 
171 |         verbose (bool, optional): Verbocity. Defaults to False.
172 | 
173 |         temperature (int, optional): Temperature. Defaults to 0.
174 | 
175 |     Raises:
176 |         RuntimeError: _description_
177 |         ValueError: _description_
178 | 
179 |     Returns:
180 |         Bot: Bot instance
181 |     """
182 | 
183 |     if task is None:
184 |         raise RuntimeError("Impossible to instantiate a bot without a task.")
185 |     if task not in SUPPORTED_TASKS:
186 |         raise ValueError(f"Task {task} is not supported.")
187 | 
188 |     task_defaults = SUPPORTED_TASKS[task]["default"]
189 | 
190 |     if memory is not None:
191 |         task_defaults["input_variables"].append("history")
192 | 
193 |     return SUPPORTED_TASKS[task]["impl"](
194 |         model=model or task_defaults["model"],
195 |         index=index or task_defaults["index"],
196 |         prompt=None
197 |         if prompt is None
198 |         else PromptTemplate(
199 |             template=prompt, input_variables=task_defaults["input_variables"]
200 |         ),
201 |         temperature=temperature,
202 |         verbose=verbose,
203 |         vectorstore=megabots.vectorstore(vectorstore)
204 |         if isinstance(vectorstore, str)
205 |         else vectorstore,
206 |         memory=megabots.memory(memory) if isinstance(memory, str) else memory,
207 |     )
208 | 


--------------------------------------------------------------------------------
/megabots/memory.py:
--------------------------------------------------------------------------------
 1 | from langchain.memory import ConversationBufferMemory, ConversationBufferWindowMemory
 2 | 
 3 | 
 4 | class ConversationBuffer:
 5 |     def __init__(self):
 6 |         self.memory = ConversationBufferMemory(input_key="question")
 7 | 
 8 | 
 9 | class ConversationBufferWindow:
10 |     def __init__(self, k: int):
11 |         self.k: int = k
12 |         self.memory = ConversationBufferWindowMemory(k=self.k, input_key="question")
13 | 
14 | 
15 | SUPPORTED_MEMORY = {
16 |     "conversation-buffer": {
17 |         "impl": ConversationBuffer,
18 |         "default": {},
19 |     },
20 |     "conversation-buffer-window": {
21 |         "impl": ConversationBufferWindow,
22 |         "default": {"k": 3},
23 |     },
24 | }
25 | 
26 | 
27 | Memory = type("Memory", (ConversationBuffer, ConversationBufferWindow), {})
28 | 
29 | 
30 | def memory(
31 |     name: str = "conversation-buffer-window",
32 |     k: int | None = None,
33 | ) -> Memory:
34 |     if name is None:
35 |         raise RuntimeError("Impossible to instantiate memory without a name.")
36 | 
37 |     if name not in SUPPORTED_MEMORY:
38 |         raise ValueError(f"Memory {name} is not supported.")
39 | 
40 |     cl = SUPPORTED_MEMORY[name]["impl"]
41 | 
42 |     if name == "conversation-buffer-window":
43 |         return cl(k=k or SUPPORTED_MEMORY[name]["default"]["k"])
44 | 
45 |     return SUPPORTED_MEMORY[name]["impl"]()
46 | 


--------------------------------------------------------------------------------
/megabots/prompt.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | from langchain import PromptTemplate
 3 | 
 4 | QNA_TEMPLATE = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
 5 | 
 6 | {context}
 7 | 
 8 | {history}
 9 | Human: {question}
10 | AI:"""
11 | 
12 | QA_MEMORY_PROMPT = PromptTemplate(
13 |     template=QNA_TEMPLATE, input_variables=["context", "history", "question"]
14 | )
15 | 
16 | 
17 | def prompt(template: str, variables: List[str]):
18 |     return PromptTemplate(template=template, input_variables=variables)
19 | 


--------------------------------------------------------------------------------
/megabots/utils.py:
--------------------------------------------------------------------------------
 1 | import gradio as gr
 2 | from fastapi import FastAPI
 3 | from megabots.bot import Bot
 4 | from fastapi.openapi.utils import get_openapi
 5 | from pydantic import BaseModel
 6 | 
 7 | 
 8 | def _custom_openapi(app: FastAPI, version: str):
 9 |     if app.openapi_schema:
10 |         return app.openapi_schema
11 | 
12 |     openapi_schema = get_openapi(
13 |         title="🤖 Megabots API",
14 |         version=version,
15 |         description="Use this API to interact with the bot.",
16 |         routes=app.routes,
17 |     )
18 |     return openapi_schema
19 | 
20 | 
21 | class Answer(BaseModel):
22 |     text: str
23 | 
24 | 
25 | def create_api(bot: Bot, version: str = "0.0.1"):
26 |     app = FastAPI()
27 | 
28 |     @app.get(
29 |         "/v1/bot/ask/{question}",
30 |         tags=["Bot"],
31 |         summary="Ask bot",
32 |         description="Send question to the bot.",
33 |         responses={200: {"description": "Bot answer"}},
34 |         response_model=Answer,
35 |     )
36 |     async def ask(question: str) -> Answer:
37 |         answer = bot.ask(question)
38 |         return Answer(text=answer)
39 | 
40 |     app.openapi_schema = _custom_openapi(app, version)
41 | 
42 |     return app
43 | 
44 | 
45 | def create_interface(bot_instance: Bot, markdown: str = ""):
46 |     with gr.Blocks() as interface:
47 |         gr.Markdown(markdown)
48 |         chatbot = gr.Chatbot([], elem_id="chatbot").style(height=450)
49 |         msg = gr.Textbox(
50 |             show_label=False,
51 |             placeholder="Enter text and press enter",
52 |         ).style(container=False)
53 | 
54 |         def user(user_message, history):
55 |             return "", history + [[user_message, None]]
56 | 
57 |         def bot(history):
58 |             print("im here")
59 |             response = bot_instance.ask(history[-1][0])
60 |             history[-1][1] = response
61 |             return history
62 | 
63 |         msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
64 |             bot, chatbot, chatbot
65 |         )
66 | 
67 |     return interface
68 | 


--------------------------------------------------------------------------------
/megabots/vectorstore.py:
--------------------------------------------------------------------------------
 1 | from typing import Type, TypeVar
 2 | from langchain.vectorstores import Milvus
 3 | from abc import ABC
 4 | 
 5 | 
 6 | class MilvusVectorStore:
 7 |     def __init__(self, host: str, port: int):
 8 |         self.host = host
 9 |         self.port = port
10 |         self.client = Milvus
11 | 
12 | 
13 | class ChromaVectorStore:
14 |     pass
15 | 
16 | 
17 | # Generic type variable for all vectorstores
18 | VectorStore = type("VectorStore", (MilvusVectorStore, ChromaVectorStore), {})
19 | 
20 | 
21 | SUPPORTED_VECTORSTORES = {
22 |     "milvus": {
23 |         "impl": MilvusVectorStore,
24 |         "default": {"host": "localhost", "port": 19530},
25 |     }
26 | }
27 | 
28 | 
29 | def vectorstore(
30 |     name: str, host: str | None = None, port: int | None = None
31 | ) -> VectorStore:
32 |     """Return a vectorstore object."""
33 | 
34 |     if name is None:
35 |         raise RuntimeError("Impossible to instantiate a vectorstore without a name.")
36 | 
37 |     if name not in SUPPORTED_VECTORSTORES:
38 |         raise ValueError(f"Vectorstore {name} is not supported.")
39 | 
40 |     return SUPPORTED_VECTORSTORES[name]["impl"](
41 |         host=host or SUPPORTED_VECTORSTORES[name]["default"]["host"],
42 |         port=port or SUPPORTED_VECTORSTORES[name]["default"]["port"],
43 |     )
44 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | faiss-cpu>=1.7
 2 | fastapi>=0.95
 3 | gradio==3.26.0
 4 | gradio_client>=0.1
 5 | httpx==0.23
 6 | langchain==0.0.137
 7 | openai>=0.27
 8 | pytest>=7.3
 9 | rich
10 | unstructured>=0.5
11 | python-dotenv==1.0.0
12 | pdfminer.six
13 | tiktoken==0.3.3


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | VERSION = "0.0.11"
 4 | 
 5 | setup(
 6 |     name="megabots",
 7 |     version=VERSION,
 8 |     packages=find_packages(),
 9 |     install_requires=[
10 |         "langchain",
11 |         "tiktoken",
12 |         "unstructured",
13 |         "fastapi",
14 |         "faiss-cpu",
15 |         "pdfminer.six",
16 |         "gradio",
17 |         "python-dotenv",
18 |         "openai",
19 |         "langchain-serve",
20 |     ],
21 |     author="Megaklis Vasilakis",
22 |     author_email="megaklis.vasilakis@gmail.com",
23 |     description="🤖 Megabots provides State-of-the-art, production ready bots made mega-easy, so you don't have to build them from scratch 🤯 Create a bot, now 🫵",
24 |     long_description=open("README.md").read(),
25 |     long_description_content_type="text/markdown",
26 |     url="https://github.com/momegas/megabots",
27 |     keywords=[
28 |         "bot",
29 |         "qna-bot",
30 |         "information-retrieval",
31 |         "chatbot",
32 |         "question-answering",
33 |         "prompt-engineering",
34 |     ],
35 |     license="MIT",
36 |     classifiers=[
37 |         # Choose appropriate classifiers from
38 |         # https://pypi.org/classifiers/
39 |         "Development Status :: 4 - Beta"
40 |     ],
41 | )
42 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/momegas/megabots/55914acc023069a5a9e921e96aeb99d1c9a97f18/tests/__init__.py


--------------------------------------------------------------------------------
/tests/test_api.py:
--------------------------------------------------------------------------------
 1 | # def _ignore_warnings():
 2 | #     import logging
 3 | #     import warnings
 4 | 
 5 | #     logging.captureWarnings(True)
 6 | #     warnings.filterwarnings(
 7 | #         "ignore",
 8 | #         category=DeprecationWarning,
 9 | #         message="Deprecated call to `pkg_resources.declare_namespace('google')`.",
10 | #     )
11 | 
12 | 
13 | # _ignore_warnings()
14 | 
15 | # import os
16 | # import signal
17 | # import subprocess
18 | 
19 | # import requests
20 | # from requests.adapters import HTTPAdapter, Retry
21 | 
22 | # megabot_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
23 | 
24 | 
25 | # def _session_with_retry() -> requests.Session:
26 | #     s = requests.Session()
27 | #     retries = Retry(
28 | #         total=50, backoff_factor=1, status_forcelist=[404, 500, 502, 503, 504]
29 | #     )
30 | #     s.mount("http://", HTTPAdapter(max_retries=retries))
31 | #     return s
32 | 
33 | 
34 | # class LCServeLocally:
35 | #     def __init__(self, port: int = 8000):
36 | #         self.port = port
37 | #         self.command = " ".join(
38 | #             [
39 | #                 "lc-serve",
40 | #                 "deploy",
41 | #                 "local",
42 | #                 "megabots.api",
43 | #                 "--port",
44 | #                 str(self.port),
45 | #             ]
46 | #         )
47 | 
48 | #     def __enter__(self):
49 | #         self.p = subprocess.Popen(
50 | #             self.command, cwd=megabot_dir, shell=True, preexec_fn=os.setsid
51 | #         )
52 | 
53 | #     def __exit__(self, exc_type, exc_val, exc_tb):
54 | #         self.p.terminate()
55 | #         os.killpg(os.getpgid(self.p.pid), signal.SIGTERM)
56 | 
57 | 
58 | # def test_lcserve_successful():
59 | #     port = 8000
60 | #     lcserve_host = f"http://localhost:{port}"
61 | 
62 | #     with LCServeLocally(port=port):
63 | #         resp = _session_with_retry().post(
64 | #             url=f"{lcserve_host}/ask",
65 | #             json={"question": "What is your name?"},
66 | #         )
67 | #         assert resp.status_code == 200
68 | #         assert "result" in resp.json()
69 | #         assert isinstance(resp.json()["result"], str)
70 | 
71 | 
72 | # def test_lcserve_invalid_request():
73 | #     port = 8000
74 | #     lcserve_host = f"http://localhost:{port}"
75 | 
76 | #     with LCServeLocally(port=port):
77 | #         resp = _session_with_retry().post(
78 | #             url=f"{lcserve_host}/ask",
79 | #             json={"foo": "bar"},
80 | #         )
81 | #         assert resp.status_code == 422
82 | #         assert "detail" in resp.json()
83 | #         assert resp.json()["detail"] == [
84 | #             {
85 | #                 "loc": ["body", "question"],
86 | #                 "msg": "field required",
87 | #                 "type": "value_error.missing",
88 | #             }
89 | #         ]
90 | 


--------------------------------------------------------------------------------
/tests/test_bots.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import tempfile
 3 | from megabots import bot
 4 | import pickle
 5 | from langchain.vectorstores.faiss import FAISS
 6 | 
 7 | 
 8 | # Define test data
 9 | test_directory = "./examples/files"
10 | test_question = "what is megabots?"
11 | correct_answer = "state-of-the-art, production"
12 | sources = "SOURCES:"
13 | 
14 | 
15 | def test_ask():
16 |     qnabot = bot("qna-over-docs", index=test_directory)
17 |     answer = qnabot.ask(test_question)
18 | 
19 |     print(answer)
20 | 
21 |     # Assert that the answer contains the correct answer
22 |     assert correct_answer in answer
23 |     # Assert that the answer contains the sources
24 |     assert sources not in answer
25 | 
26 | 
27 | def test_save_load_index():
28 |     # Create a temporary directory and file path for the test index
29 |     with tempfile.TemporaryDirectory() as temp_dir:
30 |         index_path = os.path.join(temp_dir, "test_index.pkl")
31 | 
32 |         # Create a bot and save the index to the temporary file path
33 |         qnabot = bot("qna-over-docs", index=test_directory)
34 |         qnabot.save_index(index_path)
35 | 
36 |         # Load the saved index and assert that it is the same as the original index
37 |         with open(index_path, "rb") as f:
38 |             saved_index = pickle.load(f)
39 |         assert isinstance(saved_index, FAISS)
40 | 
41 |         bot_with_predefined_index = bot("qna-over-docs", index=index_path)
42 | 
43 |         # Assert that the bot returns the correct answer to the test question
44 |         assert correct_answer in bot_with_predefined_index.ask(test_question)
45 | 


--------------------------------------------------------------------------------
/tests/test_memory.py:
--------------------------------------------------------------------------------
 1 | from pytest import raises
 2 | from megabots import memory
 3 | from megabots.memory import ConversationBuffer, ConversationBufferWindow
 4 | 
 5 | 
 6 | def test_memory_conversation_buffer():
 7 |     mem = memory(name="conversation-buffer")
 8 |     assert isinstance(mem, ConversationBuffer)
 9 | 
10 | 
11 | def test_memory_conversation_buffer_window():
12 |     mem = memory(name="conversation-buffer-window", k=10)
13 |     assert isinstance(mem, ConversationBufferWindow)
14 | 
15 | 
16 | def test_memory_unsupported_name():
17 |     with raises(ValueError, match=r"Memory invalid-name is not supported."):
18 |         memory(name="invalid-name")
19 | 
20 | 
21 | def test_memory_no_name():
22 |     with raises(
23 |         RuntimeError, match=r"Impossible to instantiate memory without a name."
24 |     ):
25 |         memory(name=None)
26 | 


--------------------------------------------------------------------------------
/tests/test_ui.py:
--------------------------------------------------------------------------------
 1 | import gradio as gr
 2 | from megabots import create_interface
 3 | 
 4 | 
 5 | def test_create_interface():
 6 |     # create a mock Bot object
 7 |     class MockBot:
 8 |         def ask(self, question: str):
 9 |             return "Answer"
10 | 
11 |     markdown = "test"
12 | 
13 |     # call the function with the mock bot and example
14 |     interface = create_interface(MockBot(), markdown=markdown)
15 | 
16 |     # check if the interface has the correct properties
17 |     assert isinstance(interface, gr.Blocks)
18 | 


--------------------------------------------------------------------------------