├── .dockerignore ├── .editorconfig ├── .flake8 ├── .github ├── FUNDING.yml └── workflows │ └── github-actions.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .pyup.yml ├── 3.20.2 ├── ACKNOWLEDGEMENT.md ├── CONTRIBUTING.md ├── Dockerfile ├── Dockerfile_hpu ├── LICENSE ├── README.md ├── SOURCE_DOCUMENTS └── Orca_paper.pdf ├── constants.py ├── crawl.py ├── gaudi_utils ├── embeddings.py └── pipeline.py ├── ingest.py ├── load_models.py ├── localGPTUI ├── localGPTUI.py ├── static │ ├── dependencies │ │ ├── bootstrap-5.1.3-dist │ │ │ ├── css │ │ │ │ ├── bootstrap-grid.css │ │ │ │ ├── bootstrap-grid.css.map │ │ │ │ ├── bootstrap-grid.min.css │ │ │ │ ├── bootstrap-grid.min.css.map │ │ │ │ ├── bootstrap-grid.rtl.css │ │ │ │ ├── bootstrap-grid.rtl.css.map │ │ │ │ ├── bootstrap-grid.rtl.min.css │ │ │ │ ├── bootstrap-grid.rtl.min.css.map │ │ │ │ ├── bootstrap-reboot.css │ │ │ │ ├── bootstrap-reboot.css.map │ │ │ │ ├── bootstrap-reboot.min.css │ │ │ │ ├── bootstrap-reboot.min.css.map │ │ │ │ ├── bootstrap-reboot.rtl.css │ │ │ │ ├── bootstrap-reboot.rtl.css.map │ │ │ │ ├── bootstrap-reboot.rtl.min.css │ │ │ │ ├── bootstrap-reboot.rtl.min.css.map │ │ │ │ ├── bootstrap-utilities.css │ │ │ │ ├── bootstrap-utilities.css.map │ │ │ │ ├── bootstrap-utilities.min.css │ │ │ │ ├── bootstrap-utilities.min.css.map │ │ │ │ ├── bootstrap-utilities.rtl.css │ │ │ │ ├── bootstrap-utilities.rtl.css.map │ │ │ │ ├── bootstrap-utilities.rtl.min.css │ │ │ │ ├── bootstrap-utilities.rtl.min.css.map │ │ │ │ ├── bootstrap.css │ │ │ │ ├── bootstrap.css.map │ │ │ │ ├── bootstrap.min.css │ │ │ │ ├── bootstrap.min.css.map │ │ │ │ ├── bootstrap.rtl.css │ │ │ │ ├── bootstrap.rtl.css.map │ │ │ │ ├── bootstrap.rtl.min.css │ │ │ │ └── bootstrap.rtl.min.css.map │ │ │ └── js │ │ │ │ ├── bootstrap.bundle.js │ │ │ │ ├── bootstrap.bundle.js.map │ │ │ │ ├── bootstrap.bundle.min.js │ │ │ │ ├── bootstrap.bundle.min.js.map │ │ │ │ ├── bootstrap.esm.js │ │ │ │ ├── bootstrap.esm.js.map │ │ │ │ ├── bootstrap.esm.min.js │ │ │ │ ├── bootstrap.esm.min.js.map │ │ │ │ ├── bootstrap.js │ │ │ │ ├── bootstrap.js.map │ │ │ │ ├── bootstrap.min.js │ │ │ │ ├── bootstrap.min.js.map │ │ │ │ └── jquery-3.2.1.min.js │ │ └── jquery │ │ │ └── 3.6.0 │ │ │ └── jquery.min.js │ ├── document_examples │ │ ├── constitution.pdf │ │ └── news_articles.zip │ └── social_icons │ │ └── favicon.png └── templates │ └── home.html ├── localGPT_UI.py ├── prompt_template_utils.py ├── pyproject.toml ├── requirements.txt ├── run_localGPT.py ├── run_localGPT_API.py └── utils.py /.dockerignore: -------------------------------------------------------------------------------- 1 | * 2 | !*.py 3 | !requirements.txt 4 | !SOURCE_DOCUMENTS 5 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # http://editorconfig.org 2 | 3 | root = true 4 | 5 | [*] 6 | charset = utf-8 7 | end_of_line = lf 8 | insert_final_newline = true 9 | trim_trailing_whitespace = true 10 | 11 | [*.{py,rst,ini}] 12 | indent_style = space 13 | indent_size = 4 14 | 15 | [*.{html,css,scss,json,yml,xml}] 16 | indent_style = space 17 | indent_size = 2 18 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | exclude = docs 3 | max-line-length = 119 4 | extend-ignore = E203 5 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: promptengineering # Replace with a single Ko-fi username 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | otechie: # Replace with a single Otechie username 12 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry 13 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] 14 | -------------------------------------------------------------------------------- /.github/workflows/github-actions.yml: -------------------------------------------------------------------------------- 1 | on: [push] 2 | 3 | jobs: 4 | precommit: 5 | runs-on: ubuntu-latest 6 | steps: 7 | - name: Check out repository code 8 | uses: actions/checkout@v3 9 | - name: Cache Pre-Commit 10 | uses: actions/cache@v3 11 | with: 12 | path: ~/.cache/pre-commit 13 | key: ${{ runner.os }}-pre-commit-${{ hashFiles('.pre-commit-config.yaml') }} 14 | restore-keys: | 15 | ${{ runner.os }}-pre-commit-pip 16 | - name: Install pre-commit 17 | run: pip install -q pre-commit 18 | - name: Run pre-commit 19 | run: pre-commit run --show-diff-on-failure --color=always --all-files 20 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore vscode 2 | /.vscode 3 | /DB 4 | /models 5 | 6 | # Byte-compiled / optimized / DLL files 7 | __pycache__/ 8 | *.py[cod] 9 | *$py.class 10 | 11 | # C extensions 12 | *.so 13 | 14 | # Distribution / packaging 15 | .Python 16 | build/ 17 | develop-eggs/ 18 | dist/ 19 | downloads/ 20 | eggs/ 21 | .eggs/ 22 | lib/ 23 | lib64/ 24 | parts/ 25 | sdist/ 26 | var/ 27 | wheels/ 28 | share/python-wheels/ 29 | *.egg-info/ 30 | .installed.cfg 31 | *.egg 32 | MANIFEST 33 | 34 | # PyInstaller 35 | # Usually these files are written by a python script from a template 36 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 37 | *.manifest 38 | *.spec 39 | 40 | # Installer logs 41 | pip-log.txt 42 | pip-delete-this-directory.txt 43 | 44 | # Unit test / coverage reports 45 | htmlcov/ 46 | .tox/ 47 | .nox/ 48 | .coverage 49 | .coverage.* 50 | .cache 51 | nosetests.xml 52 | coverage.xml 53 | *.cover 54 | *.py,cover 55 | .hypothesis/ 56 | .pytest_cache/ 57 | cover/ 58 | 59 | # Translations 60 | *.mo 61 | *.pot 62 | 63 | # Django stuff: 64 | *.log 65 | local_settings.py 66 | db.sqlite3 67 | db.sqlite3-journal 68 | 69 | # Flask stuff: 70 | instance/ 71 | .webassets-cache 72 | 73 | # Scrapy stuff: 74 | .scrapy 75 | 76 | # Sphinx documentation 77 | docs/_build/ 78 | 79 | # PyBuilder 80 | .pybuilder/ 81 | target/ 82 | 83 | # Jupyter Notebook 84 | .ipynb_checkpoints 85 | 86 | # IPython 87 | profile_default/ 88 | ipython_config.py 89 | 90 | # pyenv 91 | # For a library or package, you might want to ignore these files since the code is 92 | # intended to run in multiple environments; otherwise, check them in: 93 | # .python-version 94 | 95 | # pipenv 96 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 97 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 98 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 99 | # install all needed dependencies. 100 | #Pipfile.lock 101 | 102 | # poetry 103 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 104 | # This is especially recommended for binary packages to ensure reproducibility, and is more 105 | # commonly ignored for libraries. 106 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 107 | #poetry.lock 108 | 109 | # pdm 110 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 111 | #pdm.lock 112 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 113 | # in version control. 114 | # https://pdm.fming.dev/#use-with-ide 115 | .pdm.toml 116 | 117 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 118 | __pypackages__/ 119 | 120 | # Celery stuff 121 | celerybeat-schedule 122 | celerybeat.pid 123 | 124 | # SageMath parsed files 125 | *.sage.py 126 | 127 | # Environments 128 | .env 129 | .venv 130 | env/ 131 | venv/ 132 | ENV/ 133 | env.bak/ 134 | venv.bak/ 135 | 136 | # Spyder project settings 137 | .spyderproject 138 | .spyproject 139 | 140 | # Rope project settings 141 | .ropeproject 142 | 143 | # mkdocs documentation 144 | /site 145 | 146 | # mypy 147 | .mypy_cache/ 148 | .dmypy.json 149 | dmypy.json 150 | 151 | # Pyre type checker 152 | .pyre/ 153 | 154 | # pytype static type analyzer 155 | .pytype/ 156 | 157 | # Cython debug symbols 158 | cython_debug/ 159 | 160 | # PyCharm 161 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 162 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 163 | # and can be added to the global gitignore or merged into this file. For a more nuclear 164 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 165 | .idea/ 166 | 167 | #MacOS 168 | .DS_Store 169 | SOURCE_DOCUMENTS/.DS_Store -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | default_stages: [commit] 2 | 3 | repos: 4 | - repo: https://github.com/pre-commit/pre-commit-hooks 5 | rev: v4.4.0 6 | hooks: 7 | - id: trailing-whitespace 8 | - id: end-of-file-fixer 9 | - id: check-json 10 | - id: check-toml 11 | - id: check-xml 12 | - id: check-yaml 13 | - id: debug-statements 14 | - id: check-builtin-literals 15 | - id: check-case-conflict 16 | - id: detect-private-key 17 | 18 | - repo: https://github.com/pre-commit/mirrors-prettier 19 | rev: "v3.0.0-alpha.9-for-vscode" 20 | hooks: 21 | - id: prettier 22 | args: ["--tab-width", "2"] 23 | 24 | - repo: https://github.com/asottile/pyupgrade 25 | rev: v3.4.0 26 | hooks: 27 | - id: pyupgrade 28 | args: [--py311-plus] 29 | exclude: hooks/ 30 | 31 | - repo: https://github.com/psf/black 32 | rev: 23.3.0 33 | hooks: 34 | - id: black 35 | 36 | - repo: https://github.com/PyCQA/isort 37 | rev: 5.12.0 38 | hooks: 39 | - id: isort 40 | 41 | - repo: https://github.com/PyCQA/flake8 42 | rev: 6.0.0 43 | hooks: 44 | - id: flake8 45 | 46 | ci: 47 | autoupdate_schedule: weekly 48 | skip: [] 49 | submodules: false 50 | -------------------------------------------------------------------------------- /.pyup.yml: -------------------------------------------------------------------------------- 1 | # configure updates globally 2 | # default: all 3 | # allowed: all, insecure, False 4 | update: all 5 | 6 | # configure dependency pinning globally 7 | # default: True 8 | # allowed: True, False 9 | pin: True 10 | 11 | # add a label to pull requests, default is not set 12 | # requires private repo permissions, even on public repos 13 | # default: empty 14 | label_prs: update 15 | 16 | requirements: 17 | - "requirements.txt" 18 | -------------------------------------------------------------------------------- /3.20.2: -------------------------------------------------------------------------------- 1 | Requirement already satisfied: protobuf in c:\users\kevin\anaconda3\lib\site-packages (4.24.4) 2 | -------------------------------------------------------------------------------- /ACKNOWLEDGEMENT.md: -------------------------------------------------------------------------------- 1 | # Acknowledgments 2 | 3 | Some code was taken or inspired from other projects:- 4 | 5 | - [CookieCutter Django][cookiecutter-django] 6 | - `pre-commit-config.yaml` is taken from there with almost no changes 7 | - `github-actions.yml` is inspired by `gitlab-ci.yml` 8 | - `.pyup.yml`, `.flake8`, `.editorconfig`, `pyproject.toml` are taken from there with minor changes, 9 | 10 | [cookiecutter-django]: https://github.com/cookiecutter/cookiecutter-django 11 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | Always happy to get issues identified and pull requests! 4 | 5 | ## General considerations 6 | 7 | 1. Keep it small. The smaller the change, the more likely we are to accept. 8 | 2. Changes that fix a current issue get priority for review. 9 | 3. Check out [GitHub guide][submit-a-pr] if you've never created a pull request before. 10 | 11 | ## Getting started 12 | 13 | 1. Fork the repo 14 | 2. Clone your fork 15 | 3. Create a branch for your changes 16 | 17 | This last step is very important, don't start developing from master, it'll cause pain if you need to send another change later. 18 | 19 | TIP: If you're working on a GitHub issue, name your branch after the issue number, e.g. `issue-123-`. This will help us keep track of what you're working on. If there is not an issue for what you're working on, create one first please. Someone else might be working on the same thing, or we might have a reason for not wanting to do it. 20 | 21 | ## Pre-commit 22 | 23 | GitHub Actions is going to run Pre-commit hooks on your PR. If the hooks fail, you will need to fix them before your PR can be merged. It will save you a lot of time if you run the hooks locally before you push your changes. To do that, you need to install pre-commit on your local machine. 24 | 25 | ```shell 26 | pip install pre-commit 27 | ``` 28 | 29 | Once installed, you need to add the pre-commit hooks to your local repo. 30 | 31 | ```shell 32 | pre-commit install 33 | ``` 34 | 35 | Now, every time you commit, the hooks will run and check your code. If they fail, you will need to fix them before you can commit. 36 | 37 | If it happened that you committed changes already without having pre-commit hooks and do not want to reset and recommit again, you can run the following command to run the hooks on your local repo. 38 | 39 | ```shell 40 | pre-commit run --all-files 41 | ``` 42 | 43 | ## Help Us Improve This Documentation 44 | 45 | If you find that something is missing or have suggestions for improvements, please submit a PR. 46 | 47 | [submit-a-pr]: https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request 48 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # syntax=docker/dockerfile:1 2 | # Build as `docker build . -t localgpt`, requires BuildKit. 3 | # Run as `docker run -it --mount src="$HOME/.cache",target=/root/.cache,type=bind --gpus=all localgpt`, requires Nvidia container toolkit. 4 | 5 | FROM nvidia/cuda:11.7.1-runtime-ubuntu22.04 6 | RUN apt-get update && apt-get install -y software-properties-common 7 | RUN apt-get install -y g++-11 make python3 python-is-python3 pip 8 | # only copy what's needed at every step to optimize layer cache 9 | COPY ./requirements.txt . 10 | # use BuildKit cache mount to drastically reduce redownloading from pip on repeated builds 11 | RUN --mount=type=cache,target=/root/.cache CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install --timeout 100 -r requirements.txt llama-cpp-python==0.1.83 12 | COPY SOURCE_DOCUMENTS ./SOURCE_DOCUMENTS 13 | COPY ingest.py constants.py ./ 14 | # Docker BuildKit does not support GPU during *docker build* time right now, only during *docker run*. 15 | # See . 16 | # If this changes in the future you can `docker build --build-arg device_type=cuda . -t localgpt` (+GPU argument to be determined). 17 | ARG device_type=cpu 18 | RUN --mount=type=cache,target=/root/.cache python ingest.py --device_type $device_type 19 | COPY . . 20 | ENV device_type=cuda 21 | CMD python run_localGPT.py --device_type $device_type 22 | -------------------------------------------------------------------------------- /Dockerfile_hpu: -------------------------------------------------------------------------------- 1 | FROM vault.habana.ai/gaudi-docker/1.17.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest 2 | 3 | ENV HABANA_VISIBLE_DEVICES=all 4 | ENV OMPI_MCA_btl_vader_single_copy_mechanism=none 5 | ENV PT_HPU_LAZY_ACC_PAR_MODE=0 6 | ENV PT_HPU_ENABLE_LAZY_COLLECTIVES=1 7 | 8 | # Install linux packages 9 | ENV DEBIAN_FRONTEND="noninteractive" TZ=Etc/UTC 10 | RUN apt-get update && apt-get install -y tzdata bash-completion python3-pip openssh-server \ 11 | vim git iputils-ping net-tools protobuf-compiler curl bc gawk tmux \ 12 | && rm -rf /var/lib/apt/lists/* 13 | 14 | # Add repo contents 15 | ADD localGPT /root/localGPT 16 | WORKDIR /root/localGPT 17 | 18 | # Install python packages 19 | RUN pip install --upgrade pip \ 20 | && pip install langchain-experimental==0.0.62 \ 21 | && pip install langchain==0.0.329 \ 22 | && pip install protobuf==3.20.2 \ 23 | && pip install grpcio-tools \ 24 | && pip install pymilvus==2.4.0 \ 25 | && pip install chromadb==0.5.15 \ 26 | && pip install llama-cpp-python==0.1.66 \ 27 | && pip install pdfminer.six==20221105 \ 28 | && pip install transformers==4.43.1 \ 29 | && pip install optimum[habana]==1.13.1 \ 30 | && pip install InstructorEmbedding==1.0.1 \ 31 | && pip install sentence-transformers==3.0.1 \ 32 | && pip install faiss-cpu==1.7.4 \ 33 | && pip install huggingface_hub==0.16.4 \ 34 | && pip install protobuf==3.20.2 \ 35 | && pip install auto-gptq==0.2.2 \ 36 | && pip install docx2txt unstructured unstructured[pdf] urllib3 accelerate \ 37 | && pip install bitsandbytes \ 38 | && pip install click flask requests openpyxl \ 39 | && pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.17.0 \ 40 | && pip install python-multipart \ 41 | && pip install fastapi \ 42 | && pip install uvicorn \ 43 | && pip install gptcache==0.1.43 \ 44 | && pip install pypdf==4.3.1 \ 45 | && pip install python-jose[cryptography] 46 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # LocalGPT: Secure, Local Conversations with Your Documents 🌐 2 | 3 |

4 | PromtEngineer%2FlocalGPT | Trendshift 5 |

6 | 7 | [![GitHub Stars](https://img.shields.io/github/stars/PromtEngineer/localGPT?style=social)](https://github.com/PromtEngineer/localGPT/stargazers) 8 | [![GitHub Forks](https://img.shields.io/github/forks/PromtEngineer/localGPT?style=social)](https://github.com/PromtEngineer/localGPT/network/members) 9 | [![GitHub Issues](https://img.shields.io/github/issues/PromtEngineer/localGPT)](https://github.com/PromtEngineer/localGPT/issues) 10 | [![GitHub Pull Requests](https://img.shields.io/github/issues-pr/PromtEngineer/localGPT)](https://github.com/PromtEngineer/localGPT/pulls) 11 | [![License](https://img.shields.io/github/license/PromtEngineer/localGPT)](https://github.com/PromtEngineer/localGPT/blob/main/LICENSE) 12 | 13 | 🚨🚨 You can run localGPT on a pre-configured [Virtual Machine](https://bit.ly/localGPT). Make sure to use the code: PromptEngineering to get 50% off. I will get a small commision! 14 | 15 | **LocalGPT** is an open-source initiative that allows you to converse with your documents without compromising your privacy. With everything running locally, you can be assured that no data ever leaves your computer. Dive into the world of secure, local document interactions with LocalGPT. 16 | 17 | ## Features 🌟 18 | - **Utmost Privacy**: Your data remains on your computer, ensuring 100% security. 19 | - **Versatile Model Support**: Seamlessly integrate a variety of open-source models, including HF, GPTQ, GGML, and GGUF. 20 | - **Diverse Embeddings**: Choose from a range of open-source embeddings. 21 | - **Reuse Your LLM**: Once downloaded, reuse your LLM without the need for repeated downloads. 22 | - **Chat History**: Remembers your previous conversations (in a session). 23 | - **API**: LocalGPT has an API that you can use for building RAG Applications. 24 | - **Graphical Interface**: LocalGPT comes with two GUIs, one uses the API and the other is standalone (based on streamlit). 25 | - **GPU, CPU, HPU & MPS Support**: Supports multiple platforms out of the box, Chat with your data using `CUDA`, `CPU`, `HPU (Intel® Gaudi®)` or `MPS` and more! 26 | 27 | ## Dive Deeper with Our Videos 🎥 28 | - [Detailed code-walkthrough](https://youtu.be/MlyoObdIHyo) 29 | - [Llama-2 with LocalGPT](https://youtu.be/lbFmceo4D5E) 30 | - [Adding Chat History](https://youtu.be/d7otIM_MCZs) 31 | - [LocalGPT - Updated (09/17/2023)](https://youtu.be/G_prHSKX9d4) 32 | 33 | ## Technical Details 🛠️ 34 | By selecting the right local models and the power of `LangChain` you can run the entire RAG pipeline locally, without any data leaving your environment, and with reasonable performance. 35 | 36 | - `ingest.py` uses `LangChain` tools to parse the document and create embeddings locally using `InstructorEmbeddings`. It then stores the result in a local vector database using `Chroma` vector store. 37 | - `run_localGPT.py` uses a local LLM to understand questions and create answers. The context for the answers is extracted from the local vector store using a similarity search to locate the right piece of context from the docs. 38 | - You can replace this local LLM with any other LLM from the HuggingFace. Make sure whatever LLM you select is in the HF format. 39 | 40 | This project was inspired by the original [privateGPT](https://github.com/imartinez/privateGPT). 41 | 42 | ## Built Using 🧩 43 | - [LangChain](https://github.com/hwchase17/langchain) 44 | - [HuggingFace LLMs](https://huggingface.co/models) 45 | - [InstructorEmbeddings](https://instructor-embedding.github.io/) 46 | - [LLAMACPP](https://github.com/abetlen/llama-cpp-python) 47 | - [ChromaDB](https://www.trychroma.com/) 48 | - [Streamlit](https://streamlit.io/) 49 | 50 | # Environment Setup 🌍 51 | 52 | 1. 📥 Clone the repo using git: 53 | 54 | ```shell 55 | git clone https://github.com/PromtEngineer/localGPT.git 56 | ``` 57 | 58 | 2. 🐍 Install [conda](https://www.anaconda.com/download) for virtual environment management. Create and activate a new virtual environment. 59 | 60 | ```shell 61 | conda create -n localGPT python=3.10.0 62 | conda activate localGPT 63 | ``` 64 | 65 | 3. 🛠️ Install the dependencies using pip 66 | 67 | To set up your environment to run the code, first install all requirements: 68 | 69 | ```shell 70 | pip install -r requirements.txt 71 | ``` 72 | 73 | ***Installing LLAMA-CPP :*** 74 | 75 | LocalGPT uses [LlamaCpp-Python](https://github.com/abetlen/llama-cpp-python) for GGML (you will need llama-cpp-python <=0.1.76) and GGUF (llama-cpp-python >=0.1.83) models. 76 | 77 | To run the quantized Llama3 model, ensure you have llama-cpp-python version 0.2.62 or higher installed. 78 | 79 | If you want to use BLAS or Metal with [llama-cpp](https://github.com/abetlen/llama-cpp-python#installation-with-openblas--cublas--clblast--metal) you can set appropriate flags: 80 | 81 | For `NVIDIA` GPUs support, use `cuBLAS` 82 | 83 | ```shell 84 | # Example: cuBLAS 85 | CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python --no-cache-dir 86 | ``` 87 | 88 | For Apple Metal (`M1/M2`) support, use 89 | 90 | ```shell 91 | # Example: METAL 92 | CMAKE_ARGS="-DLLAMA_METAL=on" FORCE_CMAKE=1 pip install llama-cpp-python --no-cache-dir 93 | ``` 94 | For more details, please refer to [llama-cpp](https://github.com/abetlen/llama-cpp-python#installation-with-openblas--cublas--clblast--metal) 95 | 96 | ## Docker 🐳 97 | 98 | Installing the required packages for GPU inference on NVIDIA GPUs, like gcc 11 and CUDA 11, may cause conflicts with other packages in your system. 99 | As an alternative to Conda, you can use Docker with the provided Dockerfile. 100 | It includes CUDA, your system just needs Docker, BuildKit, your NVIDIA GPU driver and the NVIDIA container toolkit. 101 | Build as `docker build -t localgpt .`, requires BuildKit. 102 | Docker BuildKit does not support GPU during *docker build* time right now, only during *docker run*. 103 | Run as `docker run -it --mount src="$HOME/.cache",target=/root/.cache,type=bind --gpus=all localgpt`. 104 | For running the code on Intel® Gaudi® HPU, use the following Dockerfile - `Dockerfile_hpu`. 105 | 106 | ## Test dataset 107 | 108 | For testing, this repository comes with [Constitution of USA](https://constitutioncenter.org/media/files/constitution.pdf) as an example file to use. 109 | 110 | ## Ingesting your OWN Data. 111 | Put your files in the `SOURCE_DOCUMENTS` folder. You can put multiple folders within the `SOURCE_DOCUMENTS` folder and the code will recursively read your files. 112 | 113 | ### Support file formats: 114 | LocalGPT currently supports the following file formats. LocalGPT uses `LangChain` for loading these file formats. The code in `constants.py` uses a `DOCUMENT_MAP` dictionary to map a file format to the corresponding loader. In order to add support for another file format, simply add this dictionary with the file format and the corresponding loader from [LangChain](https://python.langchain.com/docs/modules/data_connection/document_loaders/). 115 | 116 | ```shell 117 | DOCUMENT_MAP = { 118 | ".txt": TextLoader, 119 | ".md": TextLoader, 120 | ".py": TextLoader, 121 | ".pdf": PDFMinerLoader, 122 | ".csv": CSVLoader, 123 | ".xls": UnstructuredExcelLoader, 124 | ".xlsx": UnstructuredExcelLoader, 125 | ".docx": Docx2txtLoader, 126 | ".doc": Docx2txtLoader, 127 | } 128 | ``` 129 | 130 | ### Ingest 131 | 132 | Run the following command to ingest all the data. 133 | 134 | If you have `cuda` setup on your system. 135 | 136 | ```shell 137 | python ingest.py 138 | ``` 139 | You will see an output like this: 140 | Screenshot 2023-09-14 at 3 36 27 PM 141 | 142 | 143 | Use the device type argument to specify a given device. 144 | To run on `cpu` 145 | 146 | ```sh 147 | python ingest.py --device_type cpu 148 | ``` 149 | 150 | To run on `M1/M2` 151 | 152 | ```sh 153 | python ingest.py --device_type mps 154 | ``` 155 | 156 | Use help for a full list of supported devices. 157 | 158 | ```sh 159 | python ingest.py --help 160 | ``` 161 | 162 | This will create a new folder called `DB` and use it for the newly created vector store. You can ingest as many documents as you want, and all will be accumulated in the local embeddings database. 163 | If you want to start from an empty database, delete the `DB` and reingest your documents. 164 | 165 | Note: When you run this for the first time, it will need internet access to download the embedding model (default: `Instructor Embedding`). In the subsequent runs, no data will leave your local environment and you can ingest data without internet connection. 166 | 167 | ## Ask questions to your documents, locally! 168 | 169 | In order to chat with your documents, run the following command (by default, it will run on `cuda`). 170 | 171 | ```shell 172 | python run_localGPT.py 173 | ``` 174 | You can also specify the device type just like `ingest.py` 175 | 176 | ```shell 177 | python run_localGPT.py --device_type mps # to run on Apple silicon 178 | ``` 179 | 180 | ```shell 181 | # To run on Intel® Gaudi® hpu 182 | MODEL_ID = "mistralai/Mistral-7B-Instruct-v0.2" # in constants.py 183 | python run_localGPT.py --device_type hpu 184 | ``` 185 | 186 | This will load the ingested vector store and embedding model. You will be presented with a prompt: 187 | 188 | ```shell 189 | > Enter a query: 190 | ``` 191 | 192 | After typing your question, hit enter. LocalGPT will take some time based on your hardware. You will get a response like this below. 193 | Screenshot 2023-09-14 at 3 33 19 PM 194 | 195 | Once the answer is generated, you can then ask another question without re-running the script, just wait for the prompt again. 196 | 197 | 198 | ***Note:*** When you run this for the first time, it will need internet connection to download the LLM (default: `TheBloke/Llama-2-7b-Chat-GGUF`). After that you can turn off your internet connection, and the script inference would still work. No data gets out of your local environment. 199 | 200 | Type `exit` to finish the script. 201 | 202 | ### Extra Options with run_localGPT.py 203 | 204 | You can use the `--show_sources` flag with `run_localGPT.py` to show which chunks were retrieved by the embedding model. By default, it will show 4 different sources/chunks. You can change the number of sources/chunks 205 | 206 | ```shell 207 | python run_localGPT.py --show_sources 208 | ``` 209 | 210 | Another option is to enable chat history. ***Note***: This is disabled by default and can be enabled by using the `--use_history` flag. The context window is limited so keep in mind enabling history will use it and might overflow. 211 | 212 | ```shell 213 | python run_localGPT.py --use_history 214 | ``` 215 | 216 | You can store user questions and model responses with flag `--save_qa` into a csv file `/local_chat_history/qa_log.csv`. Every interaction will be stored. 217 | 218 | ```shell 219 | python run_localGPT.py --save_qa 220 | ``` 221 | 222 | # Run the Graphical User Interface 223 | 224 | 1. Open `constants.py` in an editor of your choice and depending on choice add the LLM you want to use. By default, the following model will be used: 225 | 226 | ```shell 227 | MODEL_ID = "TheBloke/Llama-2-7b-Chat-GGUF" 228 | MODEL_BASENAME = "llama-2-7b-chat.Q4_K_M.gguf" 229 | ``` 230 | 231 | 3. Open up a terminal and activate your python environment that contains the dependencies installed from requirements.txt. 232 | 233 | 4. Navigate to the `/LOCALGPT` directory. 234 | 235 | 5. Run the following command `python run_localGPT_API.py`. The API should being to run. 236 | 237 | 6. Wait until everything has loaded in. You should see something like `INFO:werkzeug:Press CTRL+C to quit`. 238 | 239 | 7. Open up a second terminal and activate the same python environment. 240 | 241 | 8. Navigate to the `/LOCALGPT/localGPTUI` directory. 242 | 243 | 9. Run the command `python localGPTUI.py`. 244 | 245 | 10. Open up a web browser and go the address `http://localhost:5111/`. 246 | 247 | 248 | # How to select different LLM models? 249 | 250 | To change the models you will need to set both `MODEL_ID` and `MODEL_BASENAME`. 251 | 252 | 1. Open up `constants.py` in the editor of your choice. 253 | 2. Change the `MODEL_ID` and `MODEL_BASENAME`. If you are using a quantized model (`GGML`, `GPTQ`, `GGUF`), you will need to provide `MODEL_BASENAME`. For unquantized models, set `MODEL_BASENAME` to `NONE` 254 | 5. There are a number of example models from HuggingFace that have already been tested to be run with the original trained model (ending with HF or have a .bin in its "Files and versions"), and quantized models (ending with GPTQ or have a .no-act-order or .safetensors in its "Files and versions"). 255 | 6. For models that end with HF or have a .bin inside its "Files and versions" on its HuggingFace page. 256 | 257 | - Make sure you have a `MODEL_ID` selected. For example -> `MODEL_ID = "TheBloke/guanaco-7B-HF"` 258 | - Go to the [HuggingFace Repo](https://huggingface.co/TheBloke/guanaco-7B-HF) 259 | 260 | 7. For models that contain GPTQ in its name and or have a .no-act-order or .safetensors extension inside its "Files and versions on its HuggingFace page. 261 | 262 | - Make sure you have a `MODEL_ID` selected. For example -> model_id = `"TheBloke/wizardLM-7B-GPTQ"` 263 | - Got to the corresponding [HuggingFace Repo](https://huggingface.co/TheBloke/wizardLM-7B-GPTQ) and select "Files and versions". 264 | - Pick one of the model names and set it as `MODEL_BASENAME`. For example -> `MODEL_BASENAME = "wizardLM-7B-GPTQ-4bit.compat.no-act-order.safetensors"` 265 | 266 | 8. Follow the same steps for `GGUF` and `GGML` models. 267 | 268 | # GPU and VRAM Requirements 269 | 270 | Below is the VRAM requirement for different models depending on their size (Billions of parameters). The estimates in the table does not include VRAM used by the Embedding models - which use an additional 2GB-7GB of VRAM depending on the model. 271 | 272 | | Mode Size (B) | float32 | float16 | GPTQ 8bit | GPTQ 4bit | 273 | | ------- | --------- | --------- | -------------- | ------------------ | 274 | | 7B | 28 GB | 14 GB | 7 GB - 9 GB | 3.5 GB - 5 GB | 275 | | 13B | 52 GB | 26 GB | 13 GB - 15 GB | 6.5 GB - 8 GB | 276 | | 32B | 130 GB | 65 GB | 32.5 GB - 35 GB| 16.25 GB - 19 GB | 277 | | 65B | 260.8 GB | 130.4 GB | 65.2 GB - 67 GB| 32.6 GB - 35 GB | 278 | 279 | 280 | # System Requirements 281 | 282 | ## Python Version 283 | 284 | To use this software, you must have Python 3.10 or later installed. Earlier versions of Python will not compile. 285 | 286 | ## C++ Compiler 287 | 288 | If you encounter an error while building a wheel during the `pip install` process, you may need to install a C++ compiler on your computer. 289 | 290 | ### For Windows 10/11 291 | 292 | To install a C++ compiler on Windows 10/11, follow these steps: 293 | 294 | 1. Install Visual Studio 2022. 295 | 2. Make sure the following components are selected: 296 | - Universal Windows Platform development 297 | - C++ CMake tools for Windows 298 | 3. Download the MinGW installer from the [MinGW website](https://sourceforge.net/projects/mingw/). 299 | 4. Run the installer and select the "gcc" component. 300 | 301 | ### NVIDIA Driver's Issues: 302 | 303 | Follow this [page](https://linuxconfig.org/how-to-install-the-nvidia-drivers-on-ubuntu-22-04) to install NVIDIA Drivers. 304 | 305 | ## Star History 306 | 307 | [![Star History Chart](https://api.star-history.com/svg?repos=PromtEngineer/localGPT&type=Date)](https://star-history.com/#PromtEngineer/localGPT&Date) 308 | 309 | # Disclaimer 310 | 311 | This is a test project to validate the feasibility of a fully local solution for question answering using LLMs and Vector embeddings. It is not production ready, and it is not meant to be used in production. Vicuna-7B is based on the Llama model so that has the original Llama license. 312 | 313 | # Common Errors 314 | 315 | - [Torch not compatible with CUDA enabled](https://github.com/pytorch/pytorch/issues/30664) 316 | 317 | - Get CUDA version 318 | ```shell 319 | nvcc --version 320 | ``` 321 | ```shell 322 | nvidia-smi 323 | ``` 324 | - Try installing PyTorch depending on your CUDA version 325 | ```shell 326 | conda install -c pytorch torchvision cudatoolkit=10.1 pytorch 327 | ``` 328 | - If it doesn't work, try reinstalling 329 | ```shell 330 | pip uninstall torch 331 | pip cache purge 332 | pip install torch -f https://download.pytorch.org/whl/torch_stable.html 333 | ``` 334 | 335 | - [ERROR: pip's dependency resolver does not currently take into account all the packages that are installed](https://stackoverflow.com/questions/72672196/error-pips-dependency-resolver-does-not-currently-take-into-account-all-the-pa/76604141#76604141) 336 | ```shell 337 | pip install h5py 338 | pip install typing-extensions 339 | pip install wheel 340 | ``` 341 | - [Failed to import transformers](https://github.com/huggingface/transformers/issues/11262) 342 | - Try re-install 343 | ```shell 344 | conda uninstall tokenizers, transformers 345 | pip install transformers 346 | ``` 347 | -------------------------------------------------------------------------------- /SOURCE_DOCUMENTS/Orca_paper.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PromtEngineer/localGPT/4e0d9e75e9340fbd9d8f7d75cfdb7c5c9fbb0df4/SOURCE_DOCUMENTS/Orca_paper.pdf -------------------------------------------------------------------------------- /constants.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | # from dotenv import load_dotenv 4 | from chromadb.config import Settings 5 | 6 | # https://python.langchain.com/en/latest/modules/indexes/document_loaders/examples/excel.html?highlight=xlsx#microsoft-excel 7 | from langchain.document_loaders import CSVLoader, PDFMinerLoader, TextLoader, UnstructuredExcelLoader, Docx2txtLoader 8 | from langchain.document_loaders import UnstructuredFileLoader, UnstructuredMarkdownLoader 9 | from langchain.document_loaders import UnstructuredHTMLLoader 10 | 11 | 12 | # load_dotenv() 13 | ROOT_DIRECTORY = os.path.dirname(os.path.realpath(__file__)) 14 | 15 | # Define the folder for storing database 16 | SOURCE_DIRECTORY = f"{ROOT_DIRECTORY}/SOURCE_DOCUMENTS" 17 | 18 | PERSIST_DIRECTORY = f"{ROOT_DIRECTORY}/DB" 19 | 20 | MODELS_PATH = "./models" 21 | 22 | # Can be changed to a specific number 23 | INGEST_THREADS = os.cpu_count() or 8 24 | 25 | # Define the Chroma settings 26 | CHROMA_SETTINGS = Settings( 27 | anonymized_telemetry=False, 28 | is_persistent=True, 29 | ) 30 | 31 | # Context Window and Max New Tokens 32 | CONTEXT_WINDOW_SIZE = 8096 33 | MAX_NEW_TOKENS = CONTEXT_WINDOW_SIZE # int(CONTEXT_WINDOW_SIZE/4) 34 | 35 | #### If you get a "not enough space in the buffer" error, you should reduce the values below, start with half of the original values and keep halving the value until the error stops appearing 36 | 37 | N_GPU_LAYERS = 100 # Llama-2-70B has 83 layers 38 | N_BATCH = 512 39 | 40 | ### From experimenting with the Llama-2-7B-Chat-GGML model on 8GB VRAM, these values work: 41 | # N_GPU_LAYERS = 20 42 | # N_BATCH = 512 43 | 44 | 45 | # https://python.langchain.com/en/latest/_modules/langchain/document_loaders/excel.html#UnstructuredExcelLoader 46 | DOCUMENT_MAP = { 47 | ".html": UnstructuredHTMLLoader, 48 | ".txt": TextLoader, 49 | ".md": UnstructuredMarkdownLoader, 50 | ".py": TextLoader, 51 | # ".pdf": PDFMinerLoader, 52 | ".pdf": UnstructuredFileLoader, 53 | ".csv": CSVLoader, 54 | ".xls": UnstructuredExcelLoader, 55 | ".xlsx": UnstructuredExcelLoader, 56 | ".docx": Docx2txtLoader, 57 | ".doc": Docx2txtLoader, 58 | } 59 | 60 | # Default Instructor Model 61 | EMBEDDING_MODEL_NAME = "hkunlp/instructor-large" # Uses 1.5 GB of VRAM (High Accuracy with lower VRAM usage) 62 | 63 | #### 64 | #### OTHER EMBEDDING MODEL OPTIONS 65 | #### 66 | 67 | # EMBEDDING_MODEL_NAME = "hkunlp/instructor-xl" # Uses 5 GB of VRAM (Most Accurate of all models) 68 | # EMBEDDING_MODEL_NAME = "intfloat/e5-large-v2" # Uses 1.5 GB of VRAM (A little less accurate than instructor-large) 69 | # EMBEDDING_MODEL_NAME = "intfloat/e5-base-v2" # Uses 0.5 GB of VRAM (A good model for lower VRAM GPUs) 70 | # EMBEDDING_MODEL_NAME = "all-MiniLM-L6-v2" # Uses 0.2 GB of VRAM (Less accurate but fastest - only requires 150mb of vram) 71 | 72 | #### 73 | #### MULTILINGUAL EMBEDDING MODELS 74 | #### 75 | 76 | # EMBEDDING_MODEL_NAME = "intfloat/multilingual-e5-large" # Uses 2.5 GB of VRAM 77 | # EMBEDDING_MODEL_NAME = "intfloat/multilingual-e5-base" # Uses 1.2 GB of VRAM 78 | 79 | 80 | #### SELECT AN OPEN SOURCE LLM (LARGE LANGUAGE MODEL) 81 | # Select the Model ID and model_basename 82 | # load the LLM for generating Natural Language responses 83 | 84 | #### GPU VRAM Memory required for LLM Models (ONLY) by Billion Parameter value (B Model) 85 | #### Does not include VRAM used by Embedding Models - which use an additional 2GB-7GB of VRAM depending on the model. 86 | #### 87 | #### (B Model) (float32) (float16) (GPTQ 8bit) (GPTQ 4bit) 88 | #### 7b 28 GB 14 GB 7 GB - 9 GB 3.5 GB - 5 GB 89 | #### 13b 52 GB 26 GB 13 GB - 15 GB 6.5 GB - 8 GB 90 | #### 32b 130 GB 65 GB 32.5 GB - 35 GB 16.25 GB - 19 GB 91 | #### 65b 260.8 GB 130.4 GB 65.2 GB - 67 GB 32.6 GB - - 35 GB 92 | 93 | # MODEL_ID = "TheBloke/Llama-2-7B-Chat-GGML" 94 | # MODEL_BASENAME = "llama-2-7b-chat.ggmlv3.q4_0.bin" 95 | 96 | #### 97 | #### (FOR GGUF MODELS) 98 | #### 99 | 100 | # MODEL_ID = "TheBloke/Llama-2-13b-Chat-GGUF" 101 | # MODEL_BASENAME = "llama-2-13b-chat.Q4_K_M.gguf" 102 | 103 | # MODEL_ID = "TheBloke/Llama-2-7b-Chat-GGUF" 104 | # MODEL_BASENAME = "llama-2-7b-chat.Q4_K_M.gguf" 105 | 106 | # MODEL_ID = "QuantFactory/Meta-Llama-3-8B-Instruct-GGUF" 107 | # MODEL_BASENAME = "Meta-Llama-3-8B-Instruct.Q4_K_M.gguf" 108 | 109 | # Use mistral to run on hpu 110 | # MODEL_ID = "mistralai/Mistral-7B-Instruct-v0.2" 111 | 112 | # LLAMA 3 # use for Apple Silicon 113 | MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct" 114 | MODEL_BASENAME = None 115 | 116 | # LLAMA 3 # use for NVIDIA GPUs 117 | # MODEL_ID = "unsloth/llama-3-8b-bnb-4bit" 118 | # MODEL_BASENAME = None 119 | 120 | # MODEL_ID = "TheBloke/Mistral-7B-Instruct-v0.1-GGUF" 121 | # MODEL_BASENAME = "mistral-7b-instruct-v0.1.Q8_0.gguf" 122 | 123 | # MODEL_ID = "TheBloke/Llama-2-70b-Chat-GGUF" 124 | # MODEL_BASENAME = "llama-2-70b-chat.Q4_K_M.gguf" 125 | 126 | #### 127 | #### (FOR HF MODELS) 128 | #### 129 | 130 | # MODEL_ID = "NousResearch/Llama-2-7b-chat-hf" 131 | # MODEL_BASENAME = None 132 | # MODEL_ID = "TheBloke/vicuna-7B-1.1-HF" 133 | # MODEL_BASENAME = None 134 | # MODEL_ID = "TheBloke/Wizard-Vicuna-7B-Uncensored-HF" 135 | # MODEL_ID = "TheBloke/guanaco-7B-HF" 136 | # MODEL_ID = 'NousResearch/Nous-Hermes-13b' # Requires ~ 23GB VRAM. Using STransformers 137 | # alongside will 100% create OOM on 24GB cards. 138 | # llm = load_model(device_type, model_id=model_id) 139 | 140 | #### 141 | #### (FOR GPTQ QUANTIZED) Select a llm model based on your GPU and VRAM GB. Does not include Embedding Models VRAM usage. 142 | #### 143 | 144 | ##### 48GB VRAM Graphics Cards (RTX 6000, RTX A6000 and other 48GB VRAM GPUs) ##### 145 | 146 | ### 65b GPTQ LLM Models for 48GB GPUs (*** With best embedding model: hkunlp/instructor-xl ***) 147 | # MODEL_ID = "TheBloke/guanaco-65B-GPTQ" 148 | # MODEL_BASENAME = "model.safetensors" 149 | # MODEL_ID = "TheBloke/Airoboros-65B-GPT4-2.0-GPTQ" 150 | # MODEL_BASENAME = "model.safetensors" 151 | # MODEL_ID = "TheBloke/gpt4-alpaca-lora_mlp-65B-GPTQ" 152 | # MODEL_BASENAME = "model.safetensors" 153 | # MODEL_ID = "TheBloke/Upstage-Llama1-65B-Instruct-GPTQ" 154 | # MODEL_BASENAME = "model.safetensors" 155 | 156 | ##### 24GB VRAM Graphics Cards (RTX 3090 - RTX 4090 (35% Faster) - RTX A5000 - RTX A5500) ##### 157 | 158 | ### 13b GPTQ Models for 24GB GPUs (*** With best embedding model: hkunlp/instructor-xl ***) 159 | # MODEL_ID = "TheBloke/Wizard-Vicuna-13B-Uncensored-GPTQ" 160 | # MODEL_BASENAME = "Wizard-Vicuna-13B-Uncensored-GPTQ-4bit-128g.compat.no-act-order.safetensors" 161 | # MODEL_ID = "TheBloke/vicuna-13B-v1.5-GPTQ" 162 | # MODEL_BASENAME = "model.safetensors" 163 | # MODEL_ID = "TheBloke/Nous-Hermes-13B-GPTQ" 164 | # MODEL_BASENAME = "nous-hermes-13b-GPTQ-4bit-128g.no-act.order" 165 | # MODEL_ID = "TheBloke/WizardLM-13B-V1.2-GPTQ" 166 | # MODEL_BASENAME = "gptq_model-4bit-128g.safetensors 167 | 168 | ### 30b GPTQ Models for 24GB GPUs (*** Requires using intfloat/e5-base-v2 instead of hkunlp/instructor-large as embedding model ***) 169 | # MODEL_ID = "TheBloke/Wizard-Vicuna-30B-Uncensored-GPTQ" 170 | # MODEL_BASENAME = "Wizard-Vicuna-30B-Uncensored-GPTQ-4bit--1g.act.order.safetensors" 171 | # MODEL_ID = "TheBloke/WizardLM-30B-Uncensored-GPTQ" 172 | # MODEL_BASENAME = "WizardLM-30B-Uncensored-GPTQ-4bit.act-order.safetensors" 173 | 174 | ##### 8-10GB VRAM Graphics Cards (RTX 3080 - RTX 3080 Ti - RTX 3070 Ti - 3060 Ti - RTX 2000 Series, Quadro RTX 4000, 5000, 6000) ##### 175 | ### (*** Requires using intfloat/e5-small-v2 instead of hkunlp/instructor-large as embedding model ***) 176 | 177 | ### 7b GPTQ Models for 8GB GPUs 178 | # MODEL_ID = "TheBloke/Wizard-Vicuna-7B-Uncensored-GPTQ" 179 | # MODEL_BASENAME = "Wizard-Vicuna-7B-Uncensored-GPTQ-4bit-128g.no-act.order.safetensors" 180 | # MODEL_ID = "TheBloke/WizardLM-7B-uncensored-GPTQ" 181 | # MODEL_BASENAME = "WizardLM-7B-uncensored-GPTQ-4bit-128g.compat.no-act-order.safetensors" 182 | # MODEL_ID = "TheBloke/wizardLM-7B-GPTQ" 183 | # MODEL_BASENAME = "wizardLM-7B-GPTQ-4bit.compat.no-act-order.safetensors" 184 | 185 | #### 186 | #### (FOR GGML) (Quantized cpu+gpu+mps) models - check if they support llama.cpp 187 | #### 188 | 189 | # MODEL_ID = "TheBloke/wizard-vicuna-13B-GGML" 190 | # MODEL_BASENAME = "wizard-vicuna-13B.ggmlv3.q4_0.bin" 191 | # MODEL_BASENAME = "wizard-vicuna-13B.ggmlv3.q6_K.bin" 192 | # MODEL_BASENAME = "wizard-vicuna-13B.ggmlv3.q2_K.bin" 193 | # MODEL_ID = "TheBloke/orca_mini_3B-GGML" 194 | # MODEL_BASENAME = "orca-mini-3b.ggmlv3.q4_0.bin" 195 | 196 | #### 197 | #### (FOR AWQ QUANTIZED) Select a llm model based on your GPU and VRAM GB. Does not include Embedding Models VRAM usage. 198 | ### (*** MODEL_BASENAME is not actually used but have to contain .awq so the correct model loading is used ***) 199 | ### (*** Compute capability 7.5 (sm75) and CUDA Toolkit 11.8+ are required ***) 200 | #### 201 | # MODEL_ID = "TheBloke/Llama-2-7B-Chat-AWQ" 202 | # MODEL_BASENAME = "model.safetensors.awq" 203 | -------------------------------------------------------------------------------- /crawl.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import click 4 | import subprocess 5 | 6 | from constants import ( 7 | DOCUMENT_MAP, 8 | SOURCE_DIRECTORY 9 | ) 10 | 11 | def logToFile(logentry): 12 | file1 = open("crawl.log","a") 13 | file1.write(logentry + "\n") 14 | file1.close() 15 | print(logentry + "\n") 16 | 17 | @click.command() 18 | @click.option( 19 | "--device_type", 20 | default="cuda", 21 | type=click.Choice( 22 | [ 23 | "cpu", 24 | "cuda", 25 | "ipu", 26 | "xpu", 27 | "mkldnn", 28 | "opengl", 29 | "opencl", 30 | "ideep", 31 | "hip", 32 | "ve", 33 | "fpga", 34 | "ort", 35 | "xla", 36 | "lazy", 37 | "vulkan", 38 | "mps", 39 | "meta", 40 | "hpu", 41 | "mtia", 42 | ], 43 | ), 44 | help="Device to run on. (Default is cuda)", 45 | ) 46 | @click.option( 47 | "--landing_directory", 48 | default="./LANDING_DOCUMENTS" 49 | ) 50 | @click.option( 51 | "--processed_directory", 52 | default="./PROCESSED_DOCUMENTS" 53 | ) 54 | @click.option( 55 | "--error_directory", 56 | default="./ERROR_DOCUMENTS" 57 | ) 58 | @click.option( 59 | "--unsupported_directory", 60 | default="./UNSUPPORTED_DOCUMENTS" 61 | ) 62 | 63 | def main(device_type, landing_directory, processed_directory, error_directory, unsupported_directory): 64 | paths = [] 65 | 66 | os.makedirs(processed_directory, exist_ok=True) 67 | os.makedirs(error_directory, exist_ok=True) 68 | os.makedirs(unsupported_directory, exist_ok=True) 69 | 70 | for root, _, files in os.walk(landing_directory): 71 | for file_name in files: 72 | file_extension = os.path.splitext(file_name)[1] 73 | short_filename = os.path.basename(file_name) 74 | 75 | if not os.path.isdir(root + "/" + file_name): 76 | if file_extension in DOCUMENT_MAP.keys(): 77 | shutil.move(root + "/" + file_name, SOURCE_DIRECTORY+ "/" + short_filename) 78 | logToFile("START: " + root + "/" + short_filename) 79 | process = subprocess.Popen("python ingest.py --device_type=" + device_type, shell=True, stdout=subprocess.PIPE) 80 | process.wait() 81 | if process.returncode > 0: 82 | shutil.move(SOURCE_DIRECTORY + "/" + short_filename, error_directory + "/" + short_filename) 83 | logToFile("ERROR: " + root + "/" + short_filename) 84 | else: 85 | logToFile("VALID: " + root + "/" + short_filename) 86 | shutil.move(SOURCE_DIRECTORY + "/" + short_filename, processed_directory+ "/" + short_filename) 87 | else: 88 | shutil.move(root + "/" + file_name, unsupported_directory+ "/" + short_filename) 89 | 90 | if __name__ == "__main__": 91 | main() 92 | -------------------------------------------------------------------------------- /gaudi_utils/embeddings.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import torch 3 | 4 | from langchain.embeddings import HuggingFaceEmbeddings 5 | from habana_frameworks.torch.utils.library_loader import load_habana_module 6 | from optimum.habana.sentence_transformers.modeling_utils import ( 7 | adapt_sentence_transformers_to_gaudi, 8 | ) 9 | 10 | from constants import EMBEDDING_MODEL_NAME 11 | 12 | 13 | def load_embeddings(): 14 | """Load HuggingFace Embeddings object onto Gaudi or CPU""" 15 | load_habana_module() 16 | if torch.hpu.is_available(): 17 | logging.info("Loading embedding model on hpu") 18 | 19 | adapt_sentence_transformers_to_gaudi() 20 | embeddings = HuggingFaceEmbeddings( 21 | model_name=EMBEDDING_MODEL_NAME, model_kwargs={"device": "hpu"} 22 | ) 23 | else: 24 | logging.info("Loading embedding model on cpu") 25 | embeddings = HuggingFaceEmbeddings( 26 | model_name=EMBEDDING_MODEL_NAME, model_kwargs={"device": "cpu"} 27 | ) 28 | return embeddings 29 | 30 | 31 | def calculate_similarity(model, response, expected_answer): 32 | """Calculate similarity between response and expected answer using the model""" 33 | response_embedding = model.client.encode(response, convert_to_tensor=True).squeeze() 34 | expected_embedding = model.client.encode( 35 | expected_answer, convert_to_tensor=True 36 | ).squeeze() 37 | similarity_score = torch.nn.functional.cosine_similarity( 38 | response_embedding, expected_embedding, dim=0 39 | ) 40 | return similarity_score.item() 41 | -------------------------------------------------------------------------------- /gaudi_utils/pipeline.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import os 3 | import torch 4 | from pathlib import Path 5 | from typing import List 6 | 7 | import habana_frameworks.torch.hpu as torch_hpu 8 | 9 | from habana_frameworks.torch.hpu import wrap_in_hpu_graph 10 | from huggingface_hub import snapshot_download 11 | from optimum.habana.transformers.generation import MODELS_OPTIMIZED_WITH_STATIC_SHAPES 12 | from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi 13 | from optimum.habana.utils import set_seed 14 | from transformers import AutoModelForCausalLM, AutoTokenizer, TextGenerationPipeline 15 | from transformers.utils import is_offline_mode 16 | 17 | 18 | def get_repo_root(model_name_or_path, local_rank=-1, token=None): 19 | """ 20 | Downloads the specified model checkpoint and returns the repository where it was downloaded. 21 | """ 22 | if Path(model_name_or_path).is_dir(): 23 | # If it is a local model, no need to download anything 24 | return model_name_or_path 25 | else: 26 | # Checks if online or not 27 | if is_offline_mode(): 28 | if local_rank == 0: 29 | print("Offline mode: forcing local_files_only=True") 30 | 31 | # Only download PyTorch weights by default 32 | allow_patterns = ["*.bin"] 33 | 34 | # Download only on first process 35 | if local_rank in [-1, 0]: 36 | cache_dir = snapshot_download( 37 | model_name_or_path, 38 | local_files_only=is_offline_mode(), 39 | cache_dir=os.getenv("TRANSFORMERS_CACHE", None), 40 | allow_patterns=allow_patterns, 41 | max_workers=16, 42 | token=token, 43 | ) 44 | if local_rank == -1: 45 | # If there is only one process, then the method is finished 46 | return cache_dir 47 | 48 | # Make all processes wait so that other processes can get the checkpoint directly from cache 49 | torch.distributed.barrier() 50 | 51 | return snapshot_download( 52 | model_name_or_path, 53 | local_files_only=is_offline_mode(), 54 | cache_dir=os.getenv("TRANSFORMERS_CACHE", None), 55 | allow_patterns=allow_patterns, 56 | token=token, 57 | ) 58 | 59 | 60 | def get_optimized_model_name(config): 61 | for model_type in MODELS_OPTIMIZED_WITH_STATIC_SHAPES: 62 | if model_type == config.model_type: 63 | return model_type 64 | 65 | return None 66 | 67 | 68 | def model_is_optimized(config): 69 | """ 70 | Checks if the given config belongs to a model in optimum/habana/transformers/models, which has a 71 | new input token_idx. 72 | """ 73 | return get_optimized_model_name(config) is not None 74 | 75 | 76 | class GaudiTextGenerationPipeline(TextGenerationPipeline): 77 | """ 78 | An end-to-end text-generation pipeline that can used to initialize LangChain classes. 79 | """ 80 | def __init__(self, model_name_or_path=None, revision="main", **kwargs): 81 | self.task = "text-generation" 82 | self.device = "hpu" 83 | 84 | # Tweak generation so that it runs faster on Gaudi 85 | adapt_transformers_to_gaudi() 86 | set_seed(27) 87 | 88 | # Initialize tokenizer and define datatype 89 | self.tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, revision=revision) 90 | model_dtype = torch.bfloat16 91 | 92 | # Intialize model 93 | get_repo_root(model_name_or_path) 94 | model = AutoModelForCausalLM.from_pretrained(model_name_or_path, revision=revision, torch_dtype=model_dtype) 95 | model = model.eval().to(self.device) 96 | is_optimized = model_is_optimized(model.config) 97 | model = wrap_in_hpu_graph(model) 98 | self.model = model 99 | 100 | # Used for padding input to fixed length 101 | self.tokenizer.padding_side = "left" 102 | self.max_padding_length = kwargs.get("max_padding_length", self.model.config.max_position_embeddings) 103 | 104 | # Define config params for llama and mistral models 105 | if self.model.config.model_type in ["llama", "mistral"]: 106 | self.model.generation_config.pad_token_id = 0 107 | self.model.generation_config.bos_token_id = 1 108 | self.model.generation_config.eos_token_id = 2 109 | self.tokenizer.bos_token_id = self.model.generation_config.bos_token_id 110 | self.tokenizer.eos_token_id = self.model.generation_config.eos_token_id 111 | self.tokenizer.pad_token_id = self.model.generation_config.pad_token_id 112 | self.tokenizer.pad_token = self.tokenizer.decode(self.tokenizer.pad_token_id) 113 | self.tokenizer.eos_token = self.tokenizer.decode(self.tokenizer.eos_token_id) 114 | self.tokenizer.bos_token = self.tokenizer.decode(self.tokenizer.bos_token_id) 115 | 116 | # Applicable to models that do not have pad tokens 117 | if self.tokenizer.pad_token is None: 118 | self.tokenizer.pad_token = self.tokenizer.eos_token 119 | self.model.generation_config.pad_token_id = self.model.generation_config.eos_token_id 120 | 121 | # Edit generation configuration based on input arguments 122 | self.generation_config = copy.deepcopy(self.model.generation_config) 123 | self.generation_config.max_new_tokens = kwargs.get("max_new_tokens", 100) 124 | self.generation_config.use_cache = kwargs.get("use_kv_cache", True) 125 | self.generation_config.static_shapes = is_optimized 126 | self.generation_config.do_sample = kwargs.get("do_sample", False) 127 | self.generation_config.num_beams = kwargs.get("num_beams", 1) 128 | self.generation_config.temperature = kwargs.get("temperature", 1.0) 129 | self.generation_config.top_p = kwargs.get("top_p", 1.0) 130 | self.generation_config.repetition_penalty = kwargs.get("repetition_penalty", 1.0) 131 | self.generation_config.num_return_sequences = kwargs.get("num_return_sequences", 1) 132 | self.generation_config.bad_words_ids = None 133 | self.generation_config.force_words_ids = None 134 | self.generation_config.ignore_eos = False 135 | 136 | # Define empty post-process params dict as there is no postprocesing 137 | self._postprocess_params = {} 138 | 139 | # Warm-up hpu and compile computation graphs 140 | self.compile_graph() 141 | 142 | def __call__(self, prompt: List[str]): 143 | """ 144 | __call__ method of pipeline class 145 | """ 146 | # Tokenize input string 147 | model_inputs = self.tokenizer.encode_plus(prompt[0], return_tensors="pt", max_length=self.max_padding_length, padding="max_length", truncation=True) 148 | 149 | # Move tensors to hpu 150 | for t in model_inputs: 151 | if torch.is_tensor(model_inputs[t]): 152 | model_inputs[t] = model_inputs[t].to(self.device) 153 | 154 | # Call model's generate method 155 | output = self.model.generate(**model_inputs, generation_config=self.generation_config, lazy_mode=True, hpu_graphs=True, profiling_steps=0, profiling_warmup_steps=0).cpu() 156 | 157 | # Decode and return result 158 | output_text = self.tokenizer.decode(output[0], skip_special_tokens=True) 159 | del output, model_inputs 160 | return [{"generated_text": output_text}] 161 | 162 | def compile_graph(self): 163 | """ 164 | Function to compile computation graphs and synchronize hpus. 165 | """ 166 | for _ in range(3): 167 | self(["Here is my prompt"]) 168 | torch_hpu.synchronize() 169 | -------------------------------------------------------------------------------- /ingest.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed 4 | 5 | import click 6 | import torch 7 | from langchain.docstore.document import Document 8 | from langchain.text_splitter import Language, RecursiveCharacterTextSplitter 9 | from langchain.vectorstores import Chroma 10 | from utils import get_embeddings 11 | 12 | from constants import ( 13 | CHROMA_SETTINGS, 14 | DOCUMENT_MAP, 15 | EMBEDDING_MODEL_NAME, 16 | INGEST_THREADS, 17 | PERSIST_DIRECTORY, 18 | SOURCE_DIRECTORY, 19 | ) 20 | 21 | import nltk 22 | nltk.download('punkt_tab') 23 | nltk.download('averaged_perceptron_tagger_eng') 24 | 25 | def file_log(logentry): 26 | file1 = open("file_ingest.log", "a") 27 | file1.write(logentry + "\n") 28 | file1.close() 29 | print(logentry + "\n") 30 | 31 | 32 | def load_single_document(file_path: str) -> Document: 33 | # Loads a single document from a file path 34 | try: 35 | file_extension = os.path.splitext(file_path)[1] 36 | loader_class = DOCUMENT_MAP.get(file_extension) 37 | if loader_class: 38 | file_log(file_path + " loaded.") 39 | loader = loader_class(file_path) 40 | else: 41 | file_log(file_path + " document type is undefined.") 42 | raise ValueError("Document type is undefined") 43 | return loader.load()[0] 44 | except Exception as ex: 45 | file_log("%s loading error: \n%s" % (file_path, ex)) 46 | return None 47 | 48 | 49 | def load_document_batch(filepaths): 50 | logging.info("Loading document batch") 51 | # create a thread pool 52 | with ThreadPoolExecutor(len(filepaths)) as exe: 53 | # load files 54 | futures = [exe.submit(load_single_document, name) for name in filepaths] 55 | # collect data 56 | if futures is None: 57 | file_log(name + " failed to submit") 58 | return None 59 | else: 60 | data_list = [future.result() for future in futures] 61 | # return data and file paths 62 | return (data_list, filepaths) 63 | 64 | 65 | def load_documents(source_dir: str) -> list[Document]: 66 | # Loads all documents from the source documents directory, including nested folders 67 | paths = [] 68 | for root, _, files in os.walk(source_dir): 69 | for file_name in files: 70 | print("Importing: " + file_name) 71 | file_extension = os.path.splitext(file_name)[1] 72 | source_file_path = os.path.join(root, file_name) 73 | if file_extension in DOCUMENT_MAP.keys(): 74 | paths.append(source_file_path) 75 | 76 | # Have at least one worker and at most INGEST_THREADS workers 77 | n_workers = min(INGEST_THREADS, max(len(paths), 1)) 78 | chunksize = round(len(paths) / n_workers) 79 | docs = [] 80 | with ProcessPoolExecutor(n_workers) as executor: 81 | futures = [] 82 | # split the load operations into chunks 83 | for i in range(0, len(paths), chunksize): 84 | # select a chunk of filenames 85 | filepaths = paths[i : (i + chunksize)] 86 | # submit the task 87 | try: 88 | future = executor.submit(load_document_batch, filepaths) 89 | except Exception as ex: 90 | file_log("executor task failed: %s" % (ex)) 91 | future = None 92 | if future is not None: 93 | futures.append(future) 94 | # process all results 95 | for future in as_completed(futures): 96 | # open the file and load the data 97 | try: 98 | contents, _ = future.result() 99 | docs.extend(contents) 100 | except Exception as ex: 101 | file_log("Exception: %s" % (ex)) 102 | 103 | return docs 104 | 105 | 106 | def split_documents(documents: list[Document]) -> tuple[list[Document], list[Document]]: 107 | # Splits documents for correct Text Splitter 108 | text_docs, python_docs = [], [] 109 | for doc in documents: 110 | if doc is not None: 111 | file_extension = os.path.splitext(doc.metadata["source"])[1] 112 | if file_extension == ".py": 113 | python_docs.append(doc) 114 | else: 115 | text_docs.append(doc) 116 | return text_docs, python_docs 117 | 118 | 119 | @click.command() 120 | @click.option( 121 | "--device_type", 122 | default="cuda" if torch.cuda.is_available() else "cpu", 123 | type=click.Choice( 124 | [ 125 | "cpu", 126 | "cuda", 127 | "ipu", 128 | "xpu", 129 | "mkldnn", 130 | "opengl", 131 | "opencl", 132 | "ideep", 133 | "hip", 134 | "ve", 135 | "fpga", 136 | "ort", 137 | "xla", 138 | "lazy", 139 | "vulkan", 140 | "mps", 141 | "meta", 142 | "hpu", 143 | "mtia", 144 | ], 145 | ), 146 | help="Device to run on. (Default is cuda)", 147 | ) 148 | def main(device_type): 149 | # Load documents and split in chunks 150 | logging.info(f"Loading documents from {SOURCE_DIRECTORY}") 151 | documents = load_documents(SOURCE_DIRECTORY) 152 | text_documents, python_documents = split_documents(documents) 153 | text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) 154 | python_splitter = RecursiveCharacterTextSplitter.from_language( 155 | language=Language.PYTHON, chunk_size=880, chunk_overlap=200 156 | ) 157 | texts = text_splitter.split_documents(text_documents) 158 | texts.extend(python_splitter.split_documents(python_documents)) 159 | logging.info(f"Loaded {len(documents)} documents from {SOURCE_DIRECTORY}") 160 | logging.info(f"Split into {len(texts)} chunks of text") 161 | 162 | """ 163 | (1) Chooses an appropriate langchain library based on the enbedding model name. Matching code is contained within fun_localGPT.py. 164 | 165 | (2) Provides additional arguments for instructor and BGE models to improve results, pursuant to the instructions contained on 166 | their respective huggingface repository, project page or github repository. 167 | """ 168 | 169 | embeddings = get_embeddings(device_type) 170 | 171 | logging.info(f"Loaded embeddings from {EMBEDDING_MODEL_NAME}") 172 | 173 | db = Chroma.from_documents( 174 | texts, 175 | embeddings, 176 | persist_directory=PERSIST_DIRECTORY, 177 | client_settings=CHROMA_SETTINGS, 178 | ) 179 | 180 | 181 | if __name__ == "__main__": 182 | logging.basicConfig( 183 | format="%(asctime)s - %(levelname)s - %(filename)s:%(lineno)s - %(message)s", level=logging.INFO 184 | ) 185 | main() 186 | -------------------------------------------------------------------------------- /load_models.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import torch 4 | 5 | if sys.platform != "darwin": 6 | from auto_gptq import AutoGPTQForCausalLM 7 | 8 | from huggingface_hub import hf_hub_download 9 | from langchain.llms import LlamaCpp 10 | from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaForCausalLM, LlamaTokenizer, BitsAndBytesConfig 11 | 12 | from constants import CONTEXT_WINDOW_SIZE, MAX_NEW_TOKENS, MODELS_PATH, N_BATCH, N_GPU_LAYERS 13 | 14 | 15 | def load_quantized_model_gguf_ggml(model_id, model_basename, device_type, logging): 16 | """ 17 | Load a GGUF/GGML quantized model using LlamaCpp. 18 | 19 | This function attempts to load a GGUF/GGML quantized model using the LlamaCpp library. 20 | If the model is of type GGML, and newer version of LLAMA-CPP is used which does not support GGML, 21 | it logs a message indicating that LLAMA-CPP has dropped support for GGML. 22 | 23 | Parameters: 24 | - model_id (str): The identifier for the model on HuggingFace Hub. 25 | - model_basename (str): The base name of the model file. 26 | - device_type (str): The type of device where the model will run, e.g., 'mps', 'cuda', etc. 27 | - logging (logging.Logger): Logger instance for logging messages. 28 | 29 | Returns: 30 | - LlamaCpp: An instance of the LlamaCpp model if successful, otherwise None. 31 | 32 | Notes: 33 | - The function uses the `hf_hub_download` function to download the model from the HuggingFace Hub. 34 | - The number of GPU layers is set based on the device type. 35 | """ 36 | 37 | try: 38 | logging.info("Using Llamacpp for GGUF/GGML quantized models") 39 | model_path = hf_hub_download( 40 | repo_id=model_id, 41 | filename=model_basename, 42 | resume_download=True, 43 | cache_dir=MODELS_PATH, 44 | ) 45 | kwargs = { 46 | "model_path": model_path, 47 | "n_ctx": CONTEXT_WINDOW_SIZE, 48 | "max_tokens": MAX_NEW_TOKENS, 49 | "n_batch": N_BATCH, # set this based on your GPU & CPU RAM 50 | } 51 | if device_type.lower() == "mps": 52 | kwargs["n_gpu_layers"] = 1 53 | if device_type.lower() == "cuda": 54 | kwargs["n_gpu_layers"] = N_GPU_LAYERS # set this based on your GPU 55 | 56 | return LlamaCpp(**kwargs) 57 | except TypeError: 58 | if "ggml" in model_basename: 59 | logging.INFO("If you were using GGML model, LLAMA-CPP Dropped Support, Use GGUF Instead") 60 | return None 61 | 62 | 63 | def load_quantized_model_qptq(model_id, model_basename, device_type, logging): 64 | """ 65 | Load a GPTQ quantized model using AutoGPTQForCausalLM. 66 | 67 | This function loads a quantized model that ends with GPTQ and may have variations 68 | of .no-act.order or .safetensors in their HuggingFace repo. 69 | It will not work for Macs, as AutoGPTQ only supports Linux and Windows: 70 | - Nvidia CUDA (Windows and Linux) 71 | - AMD ROCm (Linux only) 72 | - CPU QiGen (Linux only, new and experimental) 73 | 74 | Parameters: 75 | - model_id (str): The identifier for the model on HuggingFace Hub. 76 | - model_basename (str): The base name of the model file. 77 | - device_type (str): The type of device where the model will run. 78 | - logging (logging.Logger): Logger instance for logging messages. 79 | 80 | Returns: 81 | - model (AutoGPTQForCausalLM): The loaded quantized model. 82 | - tokenizer (AutoTokenizer): The tokenizer associated with the model. 83 | 84 | Notes: 85 | - The function checks for the ".safetensors" ending in the model_basename and removes it if present. 86 | """ 87 | 88 | if sys.platform == "darwin": 89 | logging.INFO("GPTQ models will NOT work on Mac devices. Please choose a different model.") 90 | return None, None 91 | 92 | # The code supports all huggingface models that ends with GPTQ and have some variation 93 | # of .no-act.order or .safetensors in their HF repo. 94 | logging.info("Using AutoGPTQForCausalLM for quantized models") 95 | 96 | if ".safetensors" in model_basename: 97 | # Remove the ".safetensors" ending if present 98 | model_basename = model_basename.replace(".safetensors", "") 99 | 100 | tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True) 101 | logging.info("Tokenizer loaded") 102 | 103 | model = AutoGPTQForCausalLM.from_quantized( 104 | model_id, 105 | model_basename=model_basename, 106 | use_safetensors=True, 107 | trust_remote_code=True, 108 | device_map="auto", 109 | use_triton=False, 110 | quantize_config=None, 111 | ) 112 | return model, tokenizer 113 | 114 | 115 | def load_full_model(model_id, model_basename, device_type, logging): 116 | """ 117 | Load a full model using either LlamaTokenizer or AutoModelForCausalLM. 118 | 119 | This function loads a full model based on the specified device type. 120 | If the device type is 'mps' or 'cpu', it uses LlamaTokenizer and LlamaForCausalLM. 121 | Otherwise, it uses AutoModelForCausalLM. 122 | 123 | Parameters: 124 | - model_id (str): The identifier for the model on HuggingFace Hub. 125 | - model_basename (str): The base name of the model file. 126 | - device_type (str): The type of device where the model will run. 127 | - logging (logging.Logger): Logger instance for logging messages. 128 | 129 | Returns: 130 | - model (Union[LlamaForCausalLM, AutoModelForCausalLM]): The loaded model. 131 | - tokenizer (Union[LlamaTokenizer, AutoTokenizer]): The tokenizer associated with the model. 132 | 133 | Notes: 134 | - The function uses the `from_pretrained` method to load both the model and the tokenizer. 135 | - Additional settings are provided for NVIDIA GPUs, such as loading in 4-bit and setting the compute dtype. 136 | """ 137 | 138 | if device_type.lower() in ["mps", "cpu", "hpu"]: 139 | logging.info("Using AutoModelForCausalLM") 140 | # tokenizer = LlamaTokenizer.from_pretrained(model_id, cache_dir="./models/") 141 | # model = LlamaForCausalLM.from_pretrained(model_id, cache_dir="./models/") 142 | 143 | model = AutoModelForCausalLM.from_pretrained(model_id, 144 | # quantization_config=quantization_config, 145 | # low_cpu_mem_usage=True, 146 | # torch_dtype="auto", 147 | torch_dtype=torch.bfloat16, 148 | device_map="auto", 149 | cache_dir="./models/") 150 | 151 | tokenizer = AutoTokenizer.from_pretrained(model_id, cache_dir="./models/") 152 | else: 153 | logging.info("Using AutoModelForCausalLM for full models") 154 | tokenizer = AutoTokenizer.from_pretrained(model_id, cache_dir="./models/") 155 | logging.info("Tokenizer loaded") 156 | bnb_config = BitsAndBytesConfig( 157 | load_in_4bit=True, 158 | bnb_4bit_use_double_quant=True, 159 | bnb_4bit_quant_type="nf4", 160 | bnb_4bit_compute_dtype=torch.float16 161 | ) 162 | model = AutoModelForCausalLM.from_pretrained( 163 | model_id, 164 | device_map="auto", 165 | torch_dtype=torch.float16, 166 | low_cpu_mem_usage=True, 167 | cache_dir=MODELS_PATH, 168 | trust_remote_code=True, # set these if you are using NVIDIA GPU 169 | quantization_config=bnb_config 170 | # load_in_4bit=True, 171 | # bnb_4bit_quant_type="nf4", 172 | # bnb_4bit_compute_dtype=torch.float16, 173 | # max_memory={0: "15GB"}, # Uncomment this line with you encounter CUDA out of memory errors 174 | ) 175 | 176 | model.tie_weights() 177 | return model, tokenizer 178 | 179 | 180 | def load_quantized_model_awq(model_id, logging): 181 | """ 182 | Load a AWQ quantized model using AutoModelForCausalLM. 183 | 184 | This function loads a quantized model that ends with AWQ. 185 | It will not work for Macs as AutoAWQ currently only supports Nvidia GPUs. 186 | 187 | Parameters: 188 | - model_id (str): The identifier for the model on HuggingFace Hub. 189 | - logging (logging.Logger): Logger instance for logging messages. 190 | 191 | Returns: 192 | - model (AutoModelForCausalLM): The loaded quantized model. 193 | - tokenizer (AutoTokenizer): The tokenizer associated with the model. 194 | 195 | """ 196 | 197 | if sys.platform == "darwin": 198 | logging.INFO("AWQ models will NOT work on Mac devices. Please choose a different model.") 199 | return None, None 200 | 201 | # The code supports all huggingface models that ends with AWQ. 202 | logging.info("Using AutoModelForCausalLM for AWQ quantized models") 203 | 204 | tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True) 205 | logging.info("Tokenizer loaded") 206 | 207 | model = AutoModelForCausalLM.from_pretrained( 208 | model_id, 209 | use_safetensors=True, 210 | trust_remote_code=True, 211 | device_map="auto", 212 | ) 213 | return model, tokenizer 214 | -------------------------------------------------------------------------------- /localGPTUI/localGPTUI.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import sys 4 | import tempfile 5 | 6 | import requests 7 | from flask import Flask, render_template, request 8 | from werkzeug.utils import secure_filename 9 | 10 | sys.path.append(os.path.join(os.path.dirname(__file__), "..")) 11 | 12 | app = Flask(__name__) 13 | app.secret_key = "LeafmanZSecretKey" 14 | 15 | API_HOST = "http://localhost:5110/api" 16 | 17 | 18 | # PAGES # 19 | @app.route("/", methods=["GET", "POST"]) 20 | def home_page(): 21 | if request.method == "POST": 22 | if "user_prompt" in request.form: 23 | user_prompt = request.form["user_prompt"] 24 | print(f"User Prompt: {user_prompt}") 25 | 26 | main_prompt_url = f"{API_HOST}/prompt_route" 27 | response = requests.post(main_prompt_url, data={"user_prompt": user_prompt}) 28 | print(response.status_code) # print HTTP response status code for debugging 29 | if response.status_code == 200: 30 | # print(response.json()) # Print the JSON data from the response 31 | return render_template("home.html", show_response_modal=True, response_dict=response.json()) 32 | elif "documents" in request.files: 33 | delete_source_url = f"{API_HOST}/delete_source" # URL of the /api/delete_source endpoint 34 | if request.form.get("action") == "reset": 35 | response = requests.get(delete_source_url) 36 | 37 | save_document_url = f"{API_HOST}/save_document" 38 | run_ingest_url = f"{API_HOST}/run_ingest" # URL of the /api/run_ingest endpoint 39 | files = request.files.getlist("documents") 40 | for file in files: 41 | print(file.filename) 42 | filename = secure_filename(file.filename) 43 | with tempfile.SpooledTemporaryFile() as f: 44 | f.write(file.read()) 45 | f.seek(0) 46 | response = requests.post(save_document_url, files={"document": (filename, f)}) 47 | print(response.status_code) # print HTTP response status code for debugging 48 | # Make a GET request to the /api/run_ingest endpoint 49 | response = requests.get(run_ingest_url) 50 | print(response.status_code) # print HTTP response status code for debugging 51 | 52 | # Display the form for GET request 53 | return render_template( 54 | "home.html", 55 | show_response_modal=False, 56 | response_dict={"Prompt": "None", "Answer": "None", "Sources": [("ewf", "wef")]}, 57 | ) 58 | 59 | 60 | if __name__ == "__main__": 61 | parser = argparse.ArgumentParser() 62 | parser.add_argument("--port", type=int, default=5111, help="Port to run the UI on. Defaults to 5111.") 63 | parser.add_argument( 64 | "--host", 65 | type=str, 66 | default="127.0.0.1", 67 | help="Host to run the UI on. Defaults to 127.0.0.1. " 68 | "Set to 0.0.0.0 to make the UI externally " 69 | "accessible from other devices.", 70 | ) 71 | args = parser.parse_args() 72 | app.run(debug=False, host=args.host, port=args.port) 73 | -------------------------------------------------------------------------------- /localGPTUI/static/dependencies/bootstrap-5.1.3-dist/css/bootstrap-reboot.css: -------------------------------------------------------------------------------- 1 | /*! 2 | * Bootstrap Reboot v5.1.3 (https://getbootstrap.com/) 3 | * Copyright 2011-2021 The Bootstrap Authors 4 | * Copyright 2011-2021 Twitter, Inc. 5 | * Licensed under MIT (https://github.com/twbs/bootstrap/blob/main/LICENSE) 6 | * Forked from Normalize.css, licensed MIT (https://github.com/necolas/normalize.css/blob/master/LICENSE.md) 7 | */ 8 | :root { 9 | --bs-blue: #0d6efd; 10 | --bs-indigo: #6610f2; 11 | --bs-purple: #6f42c1; 12 | --bs-pink: #d63384; 13 | --bs-red: #dc3545; 14 | --bs-orange: #fd7e14; 15 | --bs-yellow: #ffc107; 16 | --bs-green: #198754; 17 | --bs-teal: #20c997; 18 | --bs-cyan: #0dcaf0; 19 | --bs-white: #fff; 20 | --bs-gray: #6c757d; 21 | --bs-gray-dark: #343a40; 22 | --bs-gray-100: #f8f9fa; 23 | --bs-gray-200: #e9ecef; 24 | --bs-gray-300: #dee2e6; 25 | --bs-gray-400: #ced4da; 26 | --bs-gray-500: #adb5bd; 27 | --bs-gray-600: #6c757d; 28 | --bs-gray-700: #495057; 29 | --bs-gray-800: #343a40; 30 | --bs-gray-900: #212529; 31 | --bs-primary: #0d6efd; 32 | --bs-secondary: #6c757d; 33 | --bs-success: #198754; 34 | --bs-info: #0dcaf0; 35 | --bs-warning: #ffc107; 36 | --bs-danger: #dc3545; 37 | --bs-light: #f8f9fa; 38 | --bs-dark: #212529; 39 | --bs-primary-rgb: 13, 110, 253; 40 | --bs-secondary-rgb: 108, 117, 125; 41 | --bs-success-rgb: 25, 135, 84; 42 | --bs-info-rgb: 13, 202, 240; 43 | --bs-warning-rgb: 255, 193, 7; 44 | --bs-danger-rgb: 220, 53, 69; 45 | --bs-light-rgb: 248, 249, 250; 46 | --bs-dark-rgb: 33, 37, 41; 47 | --bs-white-rgb: 255, 255, 255; 48 | --bs-black-rgb: 0, 0, 0; 49 | --bs-body-color-rgb: 33, 37, 41; 50 | --bs-body-bg-rgb: 255, 255, 255; 51 | --bs-font-sans-serif: system-ui, -apple-system, "Segoe UI", Roboto, 52 | "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, 53 | "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji"; 54 | --bs-font-monospace: SFMono-Regular, Menlo, Monaco, Consolas, 55 | "Liberation Mono", "Courier New", monospace; 56 | --bs-gradient: linear-gradient( 57 | 180deg, 58 | rgba(255, 255, 255, 0.15), 59 | rgba(255, 255, 255, 0) 60 | ); 61 | --bs-body-font-family: var(--bs-font-sans-serif); 62 | --bs-body-font-size: 1rem; 63 | --bs-body-font-weight: 400; 64 | --bs-body-line-height: 1.5; 65 | --bs-body-color: #212529; 66 | --bs-body-bg: #fff; 67 | } 68 | 69 | *, 70 | *::before, 71 | *::after { 72 | box-sizing: border-box; 73 | } 74 | 75 | @media (prefers-reduced-motion: no-preference) { 76 | :root { 77 | scroll-behavior: smooth; 78 | } 79 | } 80 | 81 | body { 82 | margin: 0; 83 | font-family: var(--bs-body-font-family); 84 | font-size: var(--bs-body-font-size); 85 | font-weight: var(--bs-body-font-weight); 86 | line-height: var(--bs-body-line-height); 87 | color: var(--bs-body-color); 88 | text-align: var(--bs-body-text-align); 89 | background-color: var(--bs-body-bg); 90 | -webkit-text-size-adjust: 100%; 91 | -webkit-tap-highlight-color: rgba(0, 0, 0, 0); 92 | } 93 | 94 | hr { 95 | margin: 1rem 0; 96 | color: inherit; 97 | background-color: currentColor; 98 | border: 0; 99 | opacity: 0.25; 100 | } 101 | 102 | hr:not([size]) { 103 | height: 1px; 104 | } 105 | 106 | h6, 107 | h5, 108 | h4, 109 | h3, 110 | h2, 111 | h1 { 112 | margin-top: 0; 113 | margin-bottom: 0.5rem; 114 | font-weight: 500; 115 | line-height: 1.2; 116 | } 117 | 118 | h1 { 119 | font-size: calc(1.375rem + 1.5vw); 120 | } 121 | @media (min-width: 1200px) { 122 | h1 { 123 | font-size: 2.5rem; 124 | } 125 | } 126 | 127 | h2 { 128 | font-size: calc(1.325rem + 0.9vw); 129 | } 130 | @media (min-width: 1200px) { 131 | h2 { 132 | font-size: 2rem; 133 | } 134 | } 135 | 136 | h3 { 137 | font-size: calc(1.3rem + 0.6vw); 138 | } 139 | @media (min-width: 1200px) { 140 | h3 { 141 | font-size: 1.75rem; 142 | } 143 | } 144 | 145 | h4 { 146 | font-size: calc(1.275rem + 0.3vw); 147 | } 148 | @media (min-width: 1200px) { 149 | h4 { 150 | font-size: 1.5rem; 151 | } 152 | } 153 | 154 | h5 { 155 | font-size: 1.25rem; 156 | } 157 | 158 | h6 { 159 | font-size: 1rem; 160 | } 161 | 162 | p { 163 | margin-top: 0; 164 | margin-bottom: 1rem; 165 | } 166 | 167 | abbr[title], 168 | abbr[data-bs-original-title] { 169 | -webkit-text-decoration: underline dotted; 170 | text-decoration: underline dotted; 171 | cursor: help; 172 | -webkit-text-decoration-skip-ink: none; 173 | text-decoration-skip-ink: none; 174 | } 175 | 176 | address { 177 | margin-bottom: 1rem; 178 | font-style: normal; 179 | line-height: inherit; 180 | } 181 | 182 | ol, 183 | ul { 184 | padding-left: 2rem; 185 | } 186 | 187 | ol, 188 | ul, 189 | dl { 190 | margin-top: 0; 191 | margin-bottom: 1rem; 192 | } 193 | 194 | ol ol, 195 | ul ul, 196 | ol ul, 197 | ul ol { 198 | margin-bottom: 0; 199 | } 200 | 201 | dt { 202 | font-weight: 700; 203 | } 204 | 205 | dd { 206 | margin-bottom: 0.5rem; 207 | margin-left: 0; 208 | } 209 | 210 | blockquote { 211 | margin: 0 0 1rem; 212 | } 213 | 214 | b, 215 | strong { 216 | font-weight: bolder; 217 | } 218 | 219 | small { 220 | font-size: 0.875em; 221 | } 222 | 223 | mark { 224 | padding: 0.2em; 225 | background-color: #fcf8e3; 226 | } 227 | 228 | sub, 229 | sup { 230 | position: relative; 231 | font-size: 0.75em; 232 | line-height: 0; 233 | vertical-align: baseline; 234 | } 235 | 236 | sub { 237 | bottom: -0.25em; 238 | } 239 | 240 | sup { 241 | top: -0.5em; 242 | } 243 | 244 | a { 245 | color: #0d6efd; 246 | text-decoration: underline; 247 | } 248 | a:hover { 249 | color: #0a58ca; 250 | } 251 | 252 | a:not([href]):not([class]), 253 | a:not([href]):not([class]):hover { 254 | color: inherit; 255 | text-decoration: none; 256 | } 257 | 258 | pre, 259 | code, 260 | kbd, 261 | samp { 262 | font-family: var(--bs-font-monospace); 263 | font-size: 1em; 264 | direction: ltr /* rtl:ignore */; 265 | unicode-bidi: bidi-override; 266 | } 267 | 268 | pre { 269 | display: block; 270 | margin-top: 0; 271 | margin-bottom: 1rem; 272 | overflow: auto; 273 | font-size: 0.875em; 274 | } 275 | pre code { 276 | font-size: inherit; 277 | color: inherit; 278 | word-break: normal; 279 | } 280 | 281 | code { 282 | font-size: 0.875em; 283 | color: #d63384; 284 | word-wrap: break-word; 285 | } 286 | a > code { 287 | color: inherit; 288 | } 289 | 290 | kbd { 291 | padding: 0.2rem 0.4rem; 292 | font-size: 0.875em; 293 | color: #fff; 294 | background-color: #212529; 295 | border-radius: 0.2rem; 296 | } 297 | kbd kbd { 298 | padding: 0; 299 | font-size: 1em; 300 | font-weight: 700; 301 | } 302 | 303 | figure { 304 | margin: 0 0 1rem; 305 | } 306 | 307 | img, 308 | svg { 309 | vertical-align: middle; 310 | } 311 | 312 | table { 313 | caption-side: bottom; 314 | border-collapse: collapse; 315 | } 316 | 317 | caption { 318 | padding-top: 0.5rem; 319 | padding-bottom: 0.5rem; 320 | color: #6c757d; 321 | text-align: left; 322 | } 323 | 324 | th { 325 | text-align: inherit; 326 | text-align: -webkit-match-parent; 327 | } 328 | 329 | thead, 330 | tbody, 331 | tfoot, 332 | tr, 333 | td, 334 | th { 335 | border-color: inherit; 336 | border-style: solid; 337 | border-width: 0; 338 | } 339 | 340 | label { 341 | display: inline-block; 342 | } 343 | 344 | button { 345 | border-radius: 0; 346 | } 347 | 348 | button:focus:not(:focus-visible) { 349 | outline: 0; 350 | } 351 | 352 | input, 353 | button, 354 | select, 355 | optgroup, 356 | textarea { 357 | margin: 0; 358 | font-family: inherit; 359 | font-size: inherit; 360 | line-height: inherit; 361 | } 362 | 363 | button, 364 | select { 365 | text-transform: none; 366 | } 367 | 368 | [role="button"] { 369 | cursor: pointer; 370 | } 371 | 372 | select { 373 | word-wrap: normal; 374 | } 375 | select:disabled { 376 | opacity: 1; 377 | } 378 | 379 | [list]::-webkit-calendar-picker-indicator { 380 | display: none; 381 | } 382 | 383 | button, 384 | [type="button"], 385 | [type="reset"], 386 | [type="submit"] { 387 | -webkit-appearance: button; 388 | } 389 | button:not(:disabled), 390 | [type="button"]:not(:disabled), 391 | [type="reset"]:not(:disabled), 392 | [type="submit"]:not(:disabled) { 393 | cursor: pointer; 394 | } 395 | 396 | ::-moz-focus-inner { 397 | padding: 0; 398 | border-style: none; 399 | } 400 | 401 | textarea { 402 | resize: vertical; 403 | } 404 | 405 | fieldset { 406 | min-width: 0; 407 | padding: 0; 408 | margin: 0; 409 | border: 0; 410 | } 411 | 412 | legend { 413 | float: left; 414 | width: 100%; 415 | padding: 0; 416 | margin-bottom: 0.5rem; 417 | font-size: calc(1.275rem + 0.3vw); 418 | line-height: inherit; 419 | } 420 | @media (min-width: 1200px) { 421 | legend { 422 | font-size: 1.5rem; 423 | } 424 | } 425 | legend + * { 426 | clear: left; 427 | } 428 | 429 | ::-webkit-datetime-edit-fields-wrapper, 430 | ::-webkit-datetime-edit-text, 431 | ::-webkit-datetime-edit-minute, 432 | ::-webkit-datetime-edit-hour-field, 433 | ::-webkit-datetime-edit-day-field, 434 | ::-webkit-datetime-edit-month-field, 435 | ::-webkit-datetime-edit-year-field { 436 | padding: 0; 437 | } 438 | 439 | ::-webkit-inner-spin-button { 440 | height: auto; 441 | } 442 | 443 | [type="search"] { 444 | outline-offset: -2px; 445 | -webkit-appearance: textfield; 446 | } 447 | 448 | /* rtl:raw: 449 | [type="tel"], 450 | [type="url"], 451 | [type="email"], 452 | [type="number"] { 453 | direction: ltr; 454 | } 455 | */ 456 | ::-webkit-search-decoration { 457 | -webkit-appearance: none; 458 | } 459 | 460 | ::-webkit-color-swatch-wrapper { 461 | padding: 0; 462 | } 463 | 464 | ::-webkit-file-upload-button { 465 | font: inherit; 466 | } 467 | 468 | ::file-selector-button { 469 | font: inherit; 470 | } 471 | 472 | ::-webkit-file-upload-button { 473 | font: inherit; 474 | -webkit-appearance: button; 475 | } 476 | 477 | output { 478 | display: inline-block; 479 | } 480 | 481 | iframe { 482 | border: 0; 483 | } 484 | 485 | summary { 486 | display: list-item; 487 | cursor: pointer; 488 | } 489 | 490 | progress { 491 | vertical-align: baseline; 492 | } 493 | 494 | [hidden] { 495 | display: none !important; 496 | } 497 | 498 | /*# sourceMappingURL=bootstrap-reboot.css.map */ 499 | -------------------------------------------------------------------------------- /localGPTUI/static/dependencies/bootstrap-5.1.3-dist/css/bootstrap-reboot.min.css: -------------------------------------------------------------------------------- 1 | /*! 2 | * Bootstrap Reboot v5.1.3 (https://getbootstrap.com/) 3 | * Copyright 2011-2021 The Bootstrap Authors 4 | * Copyright 2011-2021 Twitter, Inc. 5 | * Licensed under MIT (https://github.com/twbs/bootstrap/blob/main/LICENSE) 6 | * Forked from Normalize.css, licensed MIT (https://github.com/necolas/normalize.css/blob/master/LICENSE.md) 7 | */ 8 | :root { 9 | --bs-blue: #0d6efd; 10 | --bs-indigo: #6610f2; 11 | --bs-purple: #6f42c1; 12 | --bs-pink: #d63384; 13 | --bs-red: #dc3545; 14 | --bs-orange: #fd7e14; 15 | --bs-yellow: #ffc107; 16 | --bs-green: #198754; 17 | --bs-teal: #20c997; 18 | --bs-cyan: #0dcaf0; 19 | --bs-white: #fff; 20 | --bs-gray: #6c757d; 21 | --bs-gray-dark: #343a40; 22 | --bs-gray-100: #f8f9fa; 23 | --bs-gray-200: #e9ecef; 24 | --bs-gray-300: #dee2e6; 25 | --bs-gray-400: #ced4da; 26 | --bs-gray-500: #adb5bd; 27 | --bs-gray-600: #6c757d; 28 | --bs-gray-700: #495057; 29 | --bs-gray-800: #343a40; 30 | --bs-gray-900: #212529; 31 | --bs-primary: #0d6efd; 32 | --bs-secondary: #6c757d; 33 | --bs-success: #198754; 34 | --bs-info: #0dcaf0; 35 | --bs-warning: #ffc107; 36 | --bs-danger: #dc3545; 37 | --bs-light: #f8f9fa; 38 | --bs-dark: #212529; 39 | --bs-primary-rgb: 13, 110, 253; 40 | --bs-secondary-rgb: 108, 117, 125; 41 | --bs-success-rgb: 25, 135, 84; 42 | --bs-info-rgb: 13, 202, 240; 43 | --bs-warning-rgb: 255, 193, 7; 44 | --bs-danger-rgb: 220, 53, 69; 45 | --bs-light-rgb: 248, 249, 250; 46 | --bs-dark-rgb: 33, 37, 41; 47 | --bs-white-rgb: 255, 255, 255; 48 | --bs-black-rgb: 0, 0, 0; 49 | --bs-body-color-rgb: 33, 37, 41; 50 | --bs-body-bg-rgb: 255, 255, 255; 51 | --bs-font-sans-serif: system-ui, -apple-system, "Segoe UI", Roboto, 52 | "Helvetica Neue", Arial, "Noto Sans", "Liberation Sans", sans-serif, 53 | "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji"; 54 | --bs-font-monospace: SFMono-Regular, Menlo, Monaco, Consolas, 55 | "Liberation Mono", "Courier New", monospace; 56 | --bs-gradient: linear-gradient( 57 | 180deg, 58 | rgba(255, 255, 255, 0.15), 59 | rgba(255, 255, 255, 0) 60 | ); 61 | --bs-body-font-family: var(--bs-font-sans-serif); 62 | --bs-body-font-size: 1rem; 63 | --bs-body-font-weight: 400; 64 | --bs-body-line-height: 1.5; 65 | --bs-body-color: #212529; 66 | --bs-body-bg: #fff; 67 | } 68 | *, 69 | ::after, 70 | ::before { 71 | box-sizing: border-box; 72 | } 73 | @media (prefers-reduced-motion: no-preference) { 74 | :root { 75 | scroll-behavior: smooth; 76 | } 77 | } 78 | body { 79 | margin: 0; 80 | font-family: var(--bs-body-font-family); 81 | font-size: var(--bs-body-font-size); 82 | font-weight: var(--bs-body-font-weight); 83 | line-height: var(--bs-body-line-height); 84 | color: var(--bs-body-color); 85 | text-align: var(--bs-body-text-align); 86 | background-color: var(--bs-body-bg); 87 | -webkit-text-size-adjust: 100%; 88 | -webkit-tap-highlight-color: transparent; 89 | } 90 | hr { 91 | margin: 1rem 0; 92 | color: inherit; 93 | background-color: currentColor; 94 | border: 0; 95 | opacity: 0.25; 96 | } 97 | hr:not([size]) { 98 | height: 1px; 99 | } 100 | h1, 101 | h2, 102 | h3, 103 | h4, 104 | h5, 105 | h6 { 106 | margin-top: 0; 107 | margin-bottom: 0.5rem; 108 | font-weight: 500; 109 | line-height: 1.2; 110 | } 111 | h1 { 112 | font-size: calc(1.375rem + 1.5vw); 113 | } 114 | @media (min-width: 1200px) { 115 | h1 { 116 | font-size: 2.5rem; 117 | } 118 | } 119 | h2 { 120 | font-size: calc(1.325rem + 0.9vw); 121 | } 122 | @media (min-width: 1200px) { 123 | h2 { 124 | font-size: 2rem; 125 | } 126 | } 127 | h3 { 128 | font-size: calc(1.3rem + 0.6vw); 129 | } 130 | @media (min-width: 1200px) { 131 | h3 { 132 | font-size: 1.75rem; 133 | } 134 | } 135 | h4 { 136 | font-size: calc(1.275rem + 0.3vw); 137 | } 138 | @media (min-width: 1200px) { 139 | h4 { 140 | font-size: 1.5rem; 141 | } 142 | } 143 | h5 { 144 | font-size: 1.25rem; 145 | } 146 | h6 { 147 | font-size: 1rem; 148 | } 149 | p { 150 | margin-top: 0; 151 | margin-bottom: 1rem; 152 | } 153 | abbr[data-bs-original-title], 154 | abbr[title] { 155 | -webkit-text-decoration: underline dotted; 156 | text-decoration: underline dotted; 157 | cursor: help; 158 | -webkit-text-decoration-skip-ink: none; 159 | text-decoration-skip-ink: none; 160 | } 161 | address { 162 | margin-bottom: 1rem; 163 | font-style: normal; 164 | line-height: inherit; 165 | } 166 | ol, 167 | ul { 168 | padding-left: 2rem; 169 | } 170 | dl, 171 | ol, 172 | ul { 173 | margin-top: 0; 174 | margin-bottom: 1rem; 175 | } 176 | ol ol, 177 | ol ul, 178 | ul ol, 179 | ul ul { 180 | margin-bottom: 0; 181 | } 182 | dt { 183 | font-weight: 700; 184 | } 185 | dd { 186 | margin-bottom: 0.5rem; 187 | margin-left: 0; 188 | } 189 | blockquote { 190 | margin: 0 0 1rem; 191 | } 192 | b, 193 | strong { 194 | font-weight: bolder; 195 | } 196 | small { 197 | font-size: 0.875em; 198 | } 199 | mark { 200 | padding: 0.2em; 201 | background-color: #fcf8e3; 202 | } 203 | sub, 204 | sup { 205 | position: relative; 206 | font-size: 0.75em; 207 | line-height: 0; 208 | vertical-align: baseline; 209 | } 210 | sub { 211 | bottom: -0.25em; 212 | } 213 | sup { 214 | top: -0.5em; 215 | } 216 | a { 217 | color: #0d6efd; 218 | text-decoration: underline; 219 | } 220 | a:hover { 221 | color: #0a58ca; 222 | } 223 | a:not([href]):not([class]), 224 | a:not([href]):not([class]):hover { 225 | color: inherit; 226 | text-decoration: none; 227 | } 228 | code, 229 | kbd, 230 | pre, 231 | samp { 232 | font-family: var(--bs-font-monospace); 233 | font-size: 1em; 234 | direction: ltr; 235 | unicode-bidi: bidi-override; 236 | } 237 | pre { 238 | display: block; 239 | margin-top: 0; 240 | margin-bottom: 1rem; 241 | overflow: auto; 242 | font-size: 0.875em; 243 | } 244 | pre code { 245 | font-size: inherit; 246 | color: inherit; 247 | word-break: normal; 248 | } 249 | code { 250 | font-size: 0.875em; 251 | color: #d63384; 252 | word-wrap: break-word; 253 | } 254 | a > code { 255 | color: inherit; 256 | } 257 | kbd { 258 | padding: 0.2rem 0.4rem; 259 | font-size: 0.875em; 260 | color: #fff; 261 | background-color: #212529; 262 | border-radius: 0.2rem; 263 | } 264 | kbd kbd { 265 | padding: 0; 266 | font-size: 1em; 267 | font-weight: 700; 268 | } 269 | figure { 270 | margin: 0 0 1rem; 271 | } 272 | img, 273 | svg { 274 | vertical-align: middle; 275 | } 276 | table { 277 | caption-side: bottom; 278 | border-collapse: collapse; 279 | } 280 | caption { 281 | padding-top: 0.5rem; 282 | padding-bottom: 0.5rem; 283 | color: #6c757d; 284 | text-align: left; 285 | } 286 | th { 287 | text-align: inherit; 288 | text-align: -webkit-match-parent; 289 | } 290 | tbody, 291 | td, 292 | tfoot, 293 | th, 294 | thead, 295 | tr { 296 | border-color: inherit; 297 | border-style: solid; 298 | border-width: 0; 299 | } 300 | label { 301 | display: inline-block; 302 | } 303 | button { 304 | border-radius: 0; 305 | } 306 | button:focus:not(:focus-visible) { 307 | outline: 0; 308 | } 309 | button, 310 | input, 311 | optgroup, 312 | select, 313 | textarea { 314 | margin: 0; 315 | font-family: inherit; 316 | font-size: inherit; 317 | line-height: inherit; 318 | } 319 | button, 320 | select { 321 | text-transform: none; 322 | } 323 | [role="button"] { 324 | cursor: pointer; 325 | } 326 | select { 327 | word-wrap: normal; 328 | } 329 | select:disabled { 330 | opacity: 1; 331 | } 332 | [list]::-webkit-calendar-picker-indicator { 333 | display: none; 334 | } 335 | [type="button"], 336 | [type="reset"], 337 | [type="submit"], 338 | button { 339 | -webkit-appearance: button; 340 | } 341 | [type="button"]:not(:disabled), 342 | [type="reset"]:not(:disabled), 343 | [type="submit"]:not(:disabled), 344 | button:not(:disabled) { 345 | cursor: pointer; 346 | } 347 | ::-moz-focus-inner { 348 | padding: 0; 349 | border-style: none; 350 | } 351 | textarea { 352 | resize: vertical; 353 | } 354 | fieldset { 355 | min-width: 0; 356 | padding: 0; 357 | margin: 0; 358 | border: 0; 359 | } 360 | legend { 361 | float: left; 362 | width: 100%; 363 | padding: 0; 364 | margin-bottom: 0.5rem; 365 | font-size: calc(1.275rem + 0.3vw); 366 | line-height: inherit; 367 | } 368 | @media (min-width: 1200px) { 369 | legend { 370 | font-size: 1.5rem; 371 | } 372 | } 373 | legend + * { 374 | clear: left; 375 | } 376 | ::-webkit-datetime-edit-day-field, 377 | ::-webkit-datetime-edit-fields-wrapper, 378 | ::-webkit-datetime-edit-hour-field, 379 | ::-webkit-datetime-edit-minute, 380 | ::-webkit-datetime-edit-month-field, 381 | ::-webkit-datetime-edit-text, 382 | ::-webkit-datetime-edit-year-field { 383 | padding: 0; 384 | } 385 | ::-webkit-inner-spin-button { 386 | height: auto; 387 | } 388 | [type="search"] { 389 | outline-offset: -2px; 390 | -webkit-appearance: textfield; 391 | } 392 | ::-webkit-search-decoration { 393 | -webkit-appearance: none; 394 | } 395 | ::-webkit-color-swatch-wrapper { 396 | padding: 0; 397 | } 398 | ::-webkit-file-upload-button { 399 | font: inherit; 400 | } 401 | ::file-selector-button { 402 | font: inherit; 403 | } 404 | ::-webkit-file-upload-button { 405 | font: inherit; 406 | -webkit-appearance: button; 407 | } 408 | output { 409 | display: inline-block; 410 | } 411 | iframe { 412 | border: 0; 413 | } 414 | summary { 415 | display: list-item; 416 | cursor: pointer; 417 | } 418 | progress { 419 | vertical-align: baseline; 420 | } 421 | [hidden] { 422 | display: none !important; 423 | } 424 | /*# sourceMappingURL=bootstrap-reboot.min.css.map */ 425 | -------------------------------------------------------------------------------- /localGPTUI/static/dependencies/bootstrap-5.1.3-dist/css/bootstrap-reboot.min.css.map: -------------------------------------------------------------------------------- 1 | {"version":3,"sources":["../../scss/bootstrap-reboot.scss","../../scss/_root.scss","../../scss/_reboot.scss","dist/css/bootstrap-reboot.css","../../scss/vendor/_rfs.scss","../../scss/mixins/_border-radius.scss"],"names":[],"mappings":"AAAA;;;;;;ACAA,MAQI,UAAA,QAAA,YAAA,QAAA,YAAA,QAAA,UAAA,QAAA,SAAA,QAAA,YAAA,QAAA,YAAA,QAAA,WAAA,QAAA,UAAA,QAAA,UAAA,QAAA,WAAA,KAAA,UAAA,QAAA,eAAA,QAIA,cAAA,QAAA,cAAA,QAAA,cAAA,QAAA,cAAA,QAAA,cAAA,QAAA,cAAA,QAAA,cAAA,QAAA,cAAA,QAAA,cAAA,QAIA,aAAA,QAAA,eAAA,QAAA,aAAA,QAAA,UAAA,QAAA,aAAA,QAAA,YAAA,QAAA,WAAA,QAAA,UAAA,QAIA,iBAAA,EAAA,CAAA,GAAA,CAAA,IAAA,mBAAA,GAAA,CAAA,GAAA,CAAA,IAAA,iBAAA,EAAA,CAAA,GAAA,CAAA,GAAA,cAAA,EAAA,CAAA,GAAA,CAAA,IAAA,iBAAA,GAAA,CAAA,GAAA,CAAA,EAAA,gBAAA,GAAA,CAAA,EAAA,CAAA,GAAA,eAAA,GAAA,CAAA,GAAA,CAAA,IAAA,cAAA,EAAA,CAAA,EAAA,CAAA,GAGF,eAAA,GAAA,CAAA,GAAA,CAAA,IACA,eAAA,CAAA,CAAA,CAAA,CAAA,EACA,oBAAA,EAAA,CAAA,EAAA,CAAA,GACA,iBAAA,GAAA,CAAA,GAAA,CAAA,IAMA,qBAAA,SAAA,CAAA,aAAA,CAAA,UAAA,CAAA,MAAA,CAAA,gBAAA,CAAA,KAAA,CAAA,WAAA,CAAA,iBAAA,CAAA,UAAA,CAAA,mBAAA,CAAA,gBAAA,CAAA,iBAAA,CAAA,mBACA,oBAAA,cAAA,CAAA,KAAA,CAAA,MAAA,CAAA,QAAA,CAAA,iBAAA,CAAA,aAAA,CAAA,UACA,cAAA,2EAQA,sBAAA,0BACA,oBAAA,KACA,sBAAA,IACA,sBAAA,IACA,gBAAA,QAIA,aAAA,KCnCF,ECgDA,QADA,SD5CE,WAAA,WAeE,8CANJ,MAOM,gBAAA,QAcN,KACE,OAAA,EACA,YAAA,2BEmPI,UAAA,yBFjPJ,YAAA,2BACA,YAAA,2BACA,MAAA,qBACA,WAAA,0BACA,iBAAA,kBACA,yBAAA,KACA,4BAAA,YAUF,GACE,OAAA,KAAA,EACA,MAAA,QACA,iBAAA,aACA,OAAA,EACA,QAAA,IAGF,eACE,OAAA,IAUF,GAAA,GAAA,GAAA,GAAA,GAAA,GACE,WAAA,EACA,cAAA,MAGA,YAAA,IACA,YAAA,IAIF,GEwMQ,UAAA,uBAlKJ,0BFtCJ,GE+MQ,UAAA,QF1MR,GEmMQ,UAAA,sBAlKJ,0BFjCJ,GE0MQ,UAAA,MFrMR,GE8LQ,UAAA,oBAlKJ,0BF5BJ,GEqMQ,UAAA,SFhMR,GEyLQ,UAAA,sBAlKJ,0BFvBJ,GEgMQ,UAAA,QF3LR,GEgLM,UAAA,QF3KN,GE2KM,UAAA,KFhKN,EACE,WAAA,EACA,cAAA,KCoBF,6BDTA,YAEE,wBAAA,UAAA,OAAA,gBAAA,UAAA,OACA,OAAA,KACA,iCAAA,KAAA,yBAAA,KAMF,QACE,cAAA,KACA,WAAA,OACA,YAAA,QAMF,GCKA,GDHE,aAAA,KCSF,GDNA,GCKA,GDFE,WAAA,EACA,cAAA,KAGF,MCMA,MACA,MAFA,MDDE,cAAA,EAGF,GACE,YAAA,IAKF,GACE,cAAA,MACA,YAAA,EAMF,WACE,OAAA,EAAA,EAAA,KAQF,ECLA,ODOE,YAAA,OAQF,ME4EM,UAAA,OFrEN,KACE,QAAA,KACA,iBAAA,QASF,ICnBA,IDqBE,SAAA,SEwDI,UAAA,MFtDJ,YAAA,EACA,eAAA,SAGF,IAAM,OAAA,OACN,IAAM,IAAA,MAKN,EACE,MAAA,QACA,gBAAA,UAEA,QACE,MAAA,QAWF,2BAAA,iCAEE,MAAA,QACA,gBAAA,KCvBJ,KACA,ID6BA,IC5BA,KDgCE,YAAA,yBEcI,UAAA,IFZJ,UAAA,IACA,aAAA,cAOF,IACE,QAAA,MACA,WAAA,EACA,cAAA,KACA,SAAA,KEAI,UAAA,OFKJ,SELI,UAAA,QFOF,MAAA,QACA,WAAA,OAIJ,KEZM,UAAA,OFcJ,MAAA,QACA,UAAA,WAGA,OACE,MAAA,QAIJ,IACE,QAAA,MAAA,MExBI,UAAA,OF0BJ,MAAA,KACA,iBAAA,QG7SE,cAAA,MHgTF,QACE,QAAA,EE/BE,UAAA,IFiCF,YAAA,IASJ,OACE,OAAA,EAAA,EAAA,KAMF,IChDA,IDkDE,eAAA,OAQF,MACE,aAAA,OACA,gBAAA,SAGF,QACE,YAAA,MACA,eAAA,MACA,MAAA,QACA,WAAA,KAOF,GAEE,WAAA,QACA,WAAA,qBCvDF,MAGA,GAFA,MAGA,GDsDA,MCxDA,GD8DE,aAAA,QACA,aAAA,MACA,aAAA,EAQF,MACE,QAAA,aAMF,OAEE,cAAA,EAQF,iCACE,QAAA,ECrEF,OD0EA,MCxEA,SADA,OAEA,SD4EE,OAAA,EACA,YAAA,QE9HI,UAAA,QFgIJ,YAAA,QAIF,OC3EA,OD6EE,eAAA,KAKF,cACE,OAAA,QAGF,OAGE,UAAA,OAGA,gBACE,QAAA,EAOJ,0CACE,QAAA,KCjFF,cACA,aACA,cDuFA,OAIE,mBAAA,OCvFF,6BACA,4BACA,6BDwFI,sBACE,OAAA,QAON,mBACE,QAAA,EACA,aAAA,KAKF,SACE,OAAA,SAUF,SACE,UAAA,EACA,QAAA,EACA,OAAA,EACA,OAAA,EAQF,OACE,MAAA,KACA,MAAA,KACA,QAAA,EACA,cAAA,MEnNM,UAAA,sBFsNN,YAAA,QExXE,0BFiXJ,OExMQ,UAAA,QFiNN,SACE,MAAA,KC/FJ,kCDsGA,uCCvGA,mCADA,+BAGA,oCAJA,6BAKA,mCD2GE,QAAA,EAGF,4BACE,OAAA,KASF,cACE,eAAA,KACA,mBAAA,UAmBF,4BACE,mBAAA,KAKF,+BACE,QAAA,EAMF,6BACE,KAAA,QADF,uBACE,KAAA,QAMF,6BACE,KAAA,QACA,mBAAA,OAKF,OACE,QAAA,aAKF,OACE,OAAA,EAOF,QACE,QAAA,UACA,OAAA,QAQF,SACE,eAAA,SAQF,SACE,QAAA","sourcesContent":["/*!\n * Bootstrap Reboot v5.1.3 (https://getbootstrap.com/)\n * Copyright 2011-2021 The Bootstrap Authors\n * Copyright 2011-2021 Twitter, Inc.\n * Licensed under MIT (https://github.com/twbs/bootstrap/blob/main/LICENSE)\n * Forked from Normalize.css, licensed MIT (https://github.com/necolas/normalize.css/blob/master/LICENSE.md)\n */\n\n@import \"functions\";\n@import \"variables\";\n@import \"mixins\";\n@import \"root\";\n@import \"reboot\";\n",":root {\n // Note: Custom variable values only support SassScript inside `#{}`.\n\n // Colors\n //\n // Generate palettes for full colors, grays, and theme colors.\n\n @each $color, $value in $colors {\n --#{$variable-prefix}#{$color}: #{$value};\n }\n\n @each $color, $value in $grays {\n --#{$variable-prefix}gray-#{$color}: #{$value};\n }\n\n @each $color, $value in $theme-colors {\n --#{$variable-prefix}#{$color}: #{$value};\n }\n\n @each $color, $value in $theme-colors-rgb {\n --#{$variable-prefix}#{$color}-rgb: #{$value};\n }\n\n --#{$variable-prefix}white-rgb: #{to-rgb($white)};\n --#{$variable-prefix}black-rgb: #{to-rgb($black)};\n --#{$variable-prefix}body-color-rgb: #{to-rgb($body-color)};\n --#{$variable-prefix}body-bg-rgb: #{to-rgb($body-bg)};\n\n // Fonts\n\n // Note: Use `inspect` for lists so that quoted items keep the quotes.\n // See https://github.com/sass/sass/issues/2383#issuecomment-336349172\n --#{$variable-prefix}font-sans-serif: #{inspect($font-family-sans-serif)};\n --#{$variable-prefix}font-monospace: #{inspect($font-family-monospace)};\n --#{$variable-prefix}gradient: #{$gradient};\n\n // Root and body\n // stylelint-disable custom-property-empty-line-before\n // scss-docs-start root-body-variables\n @if $font-size-root != null {\n --#{$variable-prefix}root-font-size: #{$font-size-root};\n }\n --#{$variable-prefix}body-font-family: #{$font-family-base};\n --#{$variable-prefix}body-font-size: #{$font-size-base};\n --#{$variable-prefix}body-font-weight: #{$font-weight-base};\n --#{$variable-prefix}body-line-height: #{$line-height-base};\n --#{$variable-prefix}body-color: #{$body-color};\n @if $body-text-align != null {\n --#{$variable-prefix}body-text-align: #{$body-text-align};\n }\n --#{$variable-prefix}body-bg: #{$body-bg};\n // scss-docs-end root-body-variables\n // stylelint-enable custom-property-empty-line-before\n}\n","// stylelint-disable declaration-no-important, selector-no-qualifying-type, property-no-vendor-prefix\n\n\n// Reboot\n//\n// Normalization of HTML elements, manually forked from Normalize.css to remove\n// styles targeting irrelevant browsers while applying new styles.\n//\n// Normalize is licensed MIT. https://github.com/necolas/normalize.css\n\n\n// Document\n//\n// Change from `box-sizing: content-box` so that `width` is not affected by `padding` or `border`.\n\n*,\n*::before,\n*::after {\n box-sizing: border-box;\n}\n\n\n// Root\n//\n// Ability to the value of the root font sizes, affecting the value of `rem`.\n// null by default, thus nothing is generated.\n\n:root {\n @if $font-size-root != null {\n font-size: var(--#{$variable-prefix}root-font-size);\n }\n\n @if $enable-smooth-scroll {\n @media (prefers-reduced-motion: no-preference) {\n scroll-behavior: smooth;\n }\n }\n}\n\n\n// Body\n//\n// 1. Remove the margin in all browsers.\n// 2. As a best practice, apply a default `background-color`.\n// 3. Prevent adjustments of font size after orientation changes in iOS.\n// 4. Change the default tap highlight to be completely transparent in iOS.\n\n// scss-docs-start reboot-body-rules\nbody {\n margin: 0; // 1\n font-family: var(--#{$variable-prefix}body-font-family);\n @include font-size(var(--#{$variable-prefix}body-font-size));\n font-weight: var(--#{$variable-prefix}body-font-weight);\n line-height: var(--#{$variable-prefix}body-line-height);\n color: var(--#{$variable-prefix}body-color);\n text-align: var(--#{$variable-prefix}body-text-align);\n background-color: var(--#{$variable-prefix}body-bg); // 2\n -webkit-text-size-adjust: 100%; // 3\n -webkit-tap-highlight-color: rgba($black, 0); // 4\n}\n// scss-docs-end reboot-body-rules\n\n\n// Content grouping\n//\n// 1. Reset Firefox's gray color\n// 2. Set correct height and prevent the `size` attribute to make the `hr` look like an input field\n\nhr {\n margin: $hr-margin-y 0;\n color: $hr-color; // 1\n background-color: currentColor;\n border: 0;\n opacity: $hr-opacity;\n}\n\nhr:not([size]) {\n height: $hr-height; // 2\n}\n\n\n// Typography\n//\n// 1. Remove top margins from headings\n// By default, `

`-`

` all receive top and bottom margins. We nuke the top\n// margin for easier control within type scales as it avoids margin collapsing.\n\n%heading {\n margin-top: 0; // 1\n margin-bottom: $headings-margin-bottom;\n font-family: $headings-font-family;\n font-style: $headings-font-style;\n font-weight: $headings-font-weight;\n line-height: $headings-line-height;\n color: $headings-color;\n}\n\nh1 {\n @extend %heading;\n @include font-size($h1-font-size);\n}\n\nh2 {\n @extend %heading;\n @include font-size($h2-font-size);\n}\n\nh3 {\n @extend %heading;\n @include font-size($h3-font-size);\n}\n\nh4 {\n @extend %heading;\n @include font-size($h4-font-size);\n}\n\nh5 {\n @extend %heading;\n @include font-size($h5-font-size);\n}\n\nh6 {\n @extend %heading;\n @include font-size($h6-font-size);\n}\n\n\n// Reset margins on paragraphs\n//\n// Similarly, the top margin on `

`s get reset. However, we also reset the\n// bottom margin to use `rem` units instead of `em`.\n\np {\n margin-top: 0;\n margin-bottom: $paragraph-margin-bottom;\n}\n\n\n// Abbreviations\n//\n// 1. Duplicate behavior to the data-bs-* attribute for our tooltip plugin\n// 2. Add the correct text decoration in Chrome, Edge, Opera, and Safari.\n// 3. Add explicit cursor to indicate changed behavior.\n// 4. Prevent the text-decoration to be skipped.\n\nabbr[title],\nabbr[data-bs-original-title] { // 1\n text-decoration: underline dotted; // 2\n cursor: help; // 3\n text-decoration-skip-ink: none; // 4\n}\n\n\n// Address\n\naddress {\n margin-bottom: 1rem;\n font-style: normal;\n line-height: inherit;\n}\n\n\n// Lists\n\nol,\nul {\n padding-left: 2rem;\n}\n\nol,\nul,\ndl {\n margin-top: 0;\n margin-bottom: 1rem;\n}\n\nol ol,\nul ul,\nol ul,\nul ol {\n margin-bottom: 0;\n}\n\ndt {\n font-weight: $dt-font-weight;\n}\n\n// 1. Undo browser default\n\ndd {\n margin-bottom: .5rem;\n margin-left: 0; // 1\n}\n\n\n// Blockquote\n\nblockquote {\n margin: 0 0 1rem;\n}\n\n\n// Strong\n//\n// Add the correct font weight in Chrome, Edge, and Safari\n\nb,\nstrong {\n font-weight: $font-weight-bolder;\n}\n\n\n// Small\n//\n// Add the correct font size in all browsers\n\nsmall {\n @include font-size($small-font-size);\n}\n\n\n// Mark\n\nmark {\n padding: $mark-padding;\n background-color: $mark-bg;\n}\n\n\n// Sub and Sup\n//\n// Prevent `sub` and `sup` elements from affecting the line height in\n// all browsers.\n\nsub,\nsup {\n position: relative;\n @include font-size($sub-sup-font-size);\n line-height: 0;\n vertical-align: baseline;\n}\n\nsub { bottom: -.25em; }\nsup { top: -.5em; }\n\n\n// Links\n\na {\n color: $link-color;\n text-decoration: $link-decoration;\n\n &:hover {\n color: $link-hover-color;\n text-decoration: $link-hover-decoration;\n }\n}\n\n// And undo these styles for placeholder links/named anchors (without href).\n// It would be more straightforward to just use a[href] in previous block, but that\n// causes specificity issues in many other styles that are too complex to fix.\n// See https://github.com/twbs/bootstrap/issues/19402\n\na:not([href]):not([class]) {\n &,\n &:hover {\n color: inherit;\n text-decoration: none;\n }\n}\n\n\n// Code\n\npre,\ncode,\nkbd,\nsamp {\n font-family: $font-family-code;\n @include font-size(1em); // Correct the odd `em` font sizing in all browsers.\n direction: ltr #{\"/* rtl:ignore */\"};\n unicode-bidi: bidi-override;\n}\n\n// 1. Remove browser default top margin\n// 2. Reset browser default of `1em` to use `rem`s\n// 3. Don't allow content to break outside\n\npre {\n display: block;\n margin-top: 0; // 1\n margin-bottom: 1rem; // 2\n overflow: auto; // 3\n @include font-size($code-font-size);\n color: $pre-color;\n\n // Account for some code outputs that place code tags in pre tags\n code {\n @include font-size(inherit);\n color: inherit;\n word-break: normal;\n }\n}\n\ncode {\n @include font-size($code-font-size);\n color: $code-color;\n word-wrap: break-word;\n\n // Streamline the style when inside anchors to avoid broken underline and more\n a > & {\n color: inherit;\n }\n}\n\nkbd {\n padding: $kbd-padding-y $kbd-padding-x;\n @include font-size($kbd-font-size);\n color: $kbd-color;\n background-color: $kbd-bg;\n @include border-radius($border-radius-sm);\n\n kbd {\n padding: 0;\n @include font-size(1em);\n font-weight: $nested-kbd-font-weight;\n }\n}\n\n\n// Figures\n//\n// Apply a consistent margin strategy (matches our type styles).\n\nfigure {\n margin: 0 0 1rem;\n}\n\n\n// Images and content\n\nimg,\nsvg {\n vertical-align: middle;\n}\n\n\n// Tables\n//\n// Prevent double borders\n\ntable {\n caption-side: bottom;\n border-collapse: collapse;\n}\n\ncaption {\n padding-top: $table-cell-padding-y;\n padding-bottom: $table-cell-padding-y;\n color: $table-caption-color;\n text-align: left;\n}\n\n// 1. Removes font-weight bold by inheriting\n// 2. Matches default `` alignment by inheriting `text-align`.\n// 3. Fix alignment for Safari\n\nth {\n font-weight: $table-th-font-weight; // 1\n text-align: inherit; // 2\n text-align: -webkit-match-parent; // 3\n}\n\nthead,\ntbody,\ntfoot,\ntr,\ntd,\nth {\n border-color: inherit;\n border-style: solid;\n border-width: 0;\n}\n\n\n// Forms\n//\n// 1. Allow labels to use `margin` for spacing.\n\nlabel {\n display: inline-block; // 1\n}\n\n// Remove the default `border-radius` that macOS Chrome adds.\n// See https://github.com/twbs/bootstrap/issues/24093\n\nbutton {\n // stylelint-disable-next-line property-disallowed-list\n border-radius: 0;\n}\n\n// Explicitly remove focus outline in Chromium when it shouldn't be\n// visible (e.g. as result of mouse click or touch tap). It already\n// should be doing this automatically, but seems to currently be\n// confused and applies its very visible two-tone outline anyway.\n\nbutton:focus:not(:focus-visible) {\n outline: 0;\n}\n\n// 1. Remove the margin in Firefox and Safari\n\ninput,\nbutton,\nselect,\noptgroup,\ntextarea {\n margin: 0; // 1\n font-family: inherit;\n @include font-size(inherit);\n line-height: inherit;\n}\n\n// Remove the inheritance of text transform in Firefox\nbutton,\nselect {\n text-transform: none;\n}\n// Set the cursor for non-` 374 | 375 |

381 | 388 | 395 |
396 | 397 | 398 | 399 | 400 | 401 | 402 | 403 | 432 | 433 | 434 | 522 | 523 | 524 | 549 | 550 | 551 | 617 | 618 | 619 | 638 | 639 | 640 | 787 | 788 | 789 | 790 | -------------------------------------------------------------------------------- /localGPT_UI.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import subprocess 3 | import streamlit as st 4 | from run_localGPT import load_model 5 | from langchain.vectorstores import Chroma 6 | from constants import CHROMA_SETTINGS, EMBEDDING_MODEL_NAME, PERSIST_DIRECTORY, MODEL_ID, MODEL_BASENAME 7 | from langchain.embeddings import HuggingFaceInstructEmbeddings 8 | from langchain.chains import RetrievalQA 9 | from streamlit_extras.add_vertical_space import add_vertical_space 10 | from langchain.prompts import PromptTemplate 11 | from langchain.memory import ConversationBufferMemory 12 | 13 | 14 | def model_memory(): 15 | # Adding history to the model. 16 | template = """Use the following pieces of context to answer the question at the end. If you don't know the answer,\ 17 | just say that you don't know, don't try to make up an answer. 18 | 19 | {context} 20 | 21 | {history} 22 | Question: {question} 23 | Helpful Answer:""" 24 | 25 | prompt = PromptTemplate(input_variables=["history", "context", "question"], template=template) 26 | memory = ConversationBufferMemory(input_key="question", memory_key="history") 27 | 28 | return prompt, memory 29 | 30 | 31 | # Sidebar contents 32 | with st.sidebar: 33 | st.title("🤗💬 Converse with your Data") 34 | st.markdown( 35 | """ 36 | ## About 37 | This app is an LLM-powered chatbot built using: 38 | - [Streamlit](https://streamlit.io/) 39 | - [LangChain](https://python.langchain.com/) 40 | - [LocalGPT](https://github.com/PromtEngineer/localGPT) 41 | 42 | """ 43 | ) 44 | add_vertical_space(5) 45 | st.write("Made with ❤️ by [Prompt Engineer](https://youtube.com/@engineerprompt)") 46 | 47 | 48 | if torch.backends.mps.is_available(): 49 | DEVICE_TYPE = "mps" 50 | elif torch.cuda.is_available(): 51 | DEVICE_TYPE = "cuda" 52 | else: 53 | DEVICE_TYPE = "cpu" 54 | 55 | 56 | # if "result" not in st.session_state: 57 | # # Run the document ingestion process. 58 | # run_langest_commands = ["python", "ingest.py"] 59 | # run_langest_commands.append("--device_type") 60 | # run_langest_commands.append(DEVICE_TYPE) 61 | 62 | # result = subprocess.run(run_langest_commands, capture_output=True) 63 | # st.session_state.result = result 64 | 65 | # Define the retreiver 66 | # load the vectorstore 67 | if "EMBEDDINGS" not in st.session_state: 68 | EMBEDDINGS = HuggingFaceInstructEmbeddings(model_name=EMBEDDING_MODEL_NAME, model_kwargs={"device": DEVICE_TYPE}) 69 | st.session_state.EMBEDDINGS = EMBEDDINGS 70 | 71 | if "DB" not in st.session_state: 72 | DB = Chroma( 73 | persist_directory=PERSIST_DIRECTORY, 74 | embedding_function=st.session_state.EMBEDDINGS, 75 | client_settings=CHROMA_SETTINGS, 76 | ) 77 | st.session_state.DB = DB 78 | 79 | if "RETRIEVER" not in st.session_state: 80 | RETRIEVER = DB.as_retriever() 81 | st.session_state.RETRIEVER = RETRIEVER 82 | 83 | if "LLM" not in st.session_state: 84 | LLM = load_model(device_type=DEVICE_TYPE, model_id=MODEL_ID, model_basename=MODEL_BASENAME) 85 | st.session_state["LLM"] = LLM 86 | 87 | 88 | if "QA" not in st.session_state: 89 | prompt, memory = model_memory() 90 | 91 | QA = RetrievalQA.from_chain_type( 92 | llm=LLM, 93 | chain_type="stuff", 94 | retriever=RETRIEVER, 95 | return_source_documents=True, 96 | chain_type_kwargs={"prompt": prompt, "memory": memory}, 97 | ) 98 | st.session_state["QA"] = QA 99 | 100 | st.title("LocalGPT App 💬") 101 | # Create a text input box for the user 102 | prompt = st.text_input("Input your prompt here") 103 | # while True: 104 | 105 | # If the user hits enter 106 | if prompt: 107 | # Then pass the prompt to the LLM 108 | response = st.session_state["QA"](prompt) 109 | answer, docs = response["result"], response["source_documents"] 110 | # ...and write it out to the screen 111 | st.write(answer) 112 | 113 | # With a streamlit expander 114 | with st.expander("Document Similarity Search"): 115 | # Find the relevant pages 116 | search = st.session_state.DB.similarity_search_with_score(prompt) 117 | # Write out the first 118 | for i, doc in enumerate(search): 119 | # print(doc) 120 | st.write(f"Source Document # {i+1} : {doc[0].metadata['source'].split('/')[-1]}") 121 | st.write(doc[0].page_content) 122 | st.write("--------------------------------") 123 | -------------------------------------------------------------------------------- /prompt_template_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file implements prompt template for llama based models. 3 | Modify the prompt template based on the model you select. 4 | This seems to have significant impact on the output of the LLM. 5 | """ 6 | 7 | from langchain.memory import ConversationBufferMemory 8 | from langchain.prompts import PromptTemplate 9 | 10 | # this is specific to Llama-2. 11 | 12 | system_prompt = """You are a helpful assistant, you will use the provided context to answer user questions. 13 | Read the given context before answering questions and think step by step. If you can not answer a user question based on 14 | the provided context, inform the user. Do not use any other information for answering user. Provide a detailed answer to the question.""" 15 | 16 | 17 | def get_prompt_template(system_prompt=system_prompt, promptTemplate_type=None, history=False): 18 | if promptTemplate_type == "llama": 19 | B_INST, E_INST = "[INST]", "[/INST]" 20 | B_SYS, E_SYS = "<>\n", "\n<>\n\n" 21 | SYSTEM_PROMPT = B_SYS + system_prompt + E_SYS 22 | if history: 23 | instruction = """ 24 | Context: {history} \n {context} 25 | User: {question}""" 26 | 27 | prompt_template = B_INST + SYSTEM_PROMPT + instruction + E_INST 28 | prompt = PromptTemplate(input_variables=["history", "context", "question"], template=prompt_template) 29 | else: 30 | instruction = """ 31 | Context: {context} 32 | User: {question}""" 33 | 34 | prompt_template = B_INST + SYSTEM_PROMPT + instruction + E_INST 35 | prompt = PromptTemplate(input_variables=["context", "question"], template=prompt_template) 36 | 37 | elif promptTemplate_type == "llama3": 38 | 39 | B_INST, E_INST = "<|start_header_id|>user<|end_header_id|>", "<|eot_id|>" 40 | B_SYS, E_SYS = "<|begin_of_text|><|start_header_id|>system<|end_header_id|> ", "<|eot_id|>" 41 | ASSISTANT_INST = "<|start_header_id|>assistant<|end_header_id|>" 42 | SYSTEM_PROMPT = B_SYS + system_prompt + E_SYS 43 | if history: 44 | instruction = """ 45 | Context: {history} \n {context} 46 | User: {question}""" 47 | 48 | prompt_template = SYSTEM_PROMPT + B_INST + instruction + ASSISTANT_INST 49 | prompt = PromptTemplate(input_variables=["history", "context", "question"], template=prompt_template) 50 | else: 51 | instruction = """ 52 | Context: {context} 53 | User: {question}""" 54 | 55 | prompt_template = SYSTEM_PROMPT + B_INST + instruction + ASSISTANT_INST 56 | prompt = PromptTemplate(input_variables=["context", "question"], template=prompt_template) 57 | 58 | elif promptTemplate_type == "mistral": 59 | B_INST, E_INST = "[INST] ", " [/INST]" 60 | if history: 61 | prompt_template = ( 62 | B_INST 63 | + system_prompt 64 | + """ 65 | 66 | Context: {history} \n {context} 67 | User: {question}""" 68 | + E_INST 69 | ) 70 | prompt = PromptTemplate(input_variables=["history", "context", "question"], template=prompt_template) 71 | else: 72 | prompt_template = ( 73 | B_INST 74 | + system_prompt 75 | + """ 76 | 77 | Context: {context} 78 | User: {question}""" 79 | + E_INST 80 | ) 81 | prompt = PromptTemplate(input_variables=["context", "question"], template=prompt_template) 82 | else: 83 | # change this based on the model you have selected. 84 | if history: 85 | prompt_template = ( 86 | system_prompt 87 | + """ 88 | 89 | Context: {history} \n {context} 90 | User: {question} 91 | Answer:""" 92 | ) 93 | prompt = PromptTemplate(input_variables=["history", "context", "question"], template=prompt_template) 94 | else: 95 | prompt_template = ( 96 | system_prompt 97 | + """ 98 | 99 | Context: {context} 100 | User: {question} 101 | Answer:""" 102 | ) 103 | prompt = PromptTemplate(input_variables=["context", "question"], template=prompt_template) 104 | 105 | memory = ConversationBufferMemory(input_key="question", memory_key="history") 106 | 107 | print(f"Here is the prompt used: {prompt}") 108 | 109 | return ( 110 | prompt, 111 | memory, 112 | ) 113 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | # ==== black ==== 2 | [tool.black] 3 | line-length = 119 4 | target-version = ['py311'] 5 | 6 | 7 | # ==== isort ==== 8 | [tool.isort] 9 | profile = "black" 10 | line_length = 119 11 | known_first_party = [ 12 | "tests", 13 | "scripts", 14 | "hooks", 15 | ] 16 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # Natural Language Processing 2 | langchain==0.0.267 3 | chromadb==0.4.6 4 | pdfminer.six==20221105 5 | InstructorEmbedding 6 | sentence-transformers==2.2.2 7 | faiss-cpu 8 | huggingface_hub==0.25.0 9 | transformers 10 | autoawq; sys_platform != 'darwin' 11 | protobuf==3.20.2; sys_platform != 'darwin' 12 | protobuf==3.20.2; sys_platform == 'darwin' and platform_machine != 'arm64' 13 | protobuf==3.20.3; sys_platform == 'darwin' and platform_machine == 'arm64' 14 | auto-gptq==0.6.0; sys_platform != 'darwin' 15 | docx2txt 16 | unstructured 17 | unstructured[pdf] 18 | 19 | # Utilities 20 | urllib3==1.26.6 21 | accelerate 22 | bitsandbytes ; sys_platform != 'win32' 23 | bitsandbytes-windows ; sys_platform == 'win32' 24 | click 25 | flask 26 | requests 27 | 28 | # Streamlit related 29 | streamlit 30 | Streamlit-extras 31 | 32 | # Excel File Manipulation 33 | openpyxl 34 | -------------------------------------------------------------------------------- /run_localGPT.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | import click 4 | import torch 5 | import utils 6 | from langchain.chains import RetrievalQA 7 | from langchain.embeddings import HuggingFaceInstructEmbeddings 8 | from langchain.llms import HuggingFacePipeline 9 | from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler # for streaming response 10 | from langchain.callbacks.manager import CallbackManager 11 | 12 | callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) 13 | 14 | from prompt_template_utils import get_prompt_template 15 | from utils import get_embeddings 16 | 17 | # from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler 18 | from langchain.vectorstores import Chroma 19 | from transformers import ( 20 | GenerationConfig, 21 | pipeline, 22 | ) 23 | 24 | from load_models import ( 25 | load_quantized_model_awq, 26 | load_quantized_model_gguf_ggml, 27 | load_quantized_model_qptq, 28 | load_full_model, 29 | ) 30 | 31 | from constants import ( 32 | EMBEDDING_MODEL_NAME, 33 | PERSIST_DIRECTORY, 34 | MODEL_ID, 35 | MODEL_BASENAME, 36 | MAX_NEW_TOKENS, 37 | MODELS_PATH, 38 | CHROMA_SETTINGS, 39 | ) 40 | 41 | 42 | def load_model(device_type, model_id, model_basename=None, LOGGING=logging): 43 | """ 44 | Select a model for text generation using the HuggingFace library. 45 | If you are running this for the first time, it will download a model for you. 46 | subsequent runs will use the model from the disk. 47 | 48 | Args: 49 | device_type (str): Type of device to use, e.g., "cuda" for GPU or "cpu" for CPU. 50 | model_id (str): Identifier of the model to load from HuggingFace's model hub. 51 | model_basename (str, optional): Basename of the model if using quantized models. 52 | Defaults to None. 53 | 54 | Returns: 55 | HuggingFacePipeline: A pipeline object for text generation using the loaded model. 56 | 57 | Raises: 58 | ValueError: If an unsupported model or device type is provided. 59 | """ 60 | logging.info(f"Loading Model: {model_id}, on: {device_type}") 61 | logging.info("This action can take a few minutes!") 62 | 63 | if model_basename is not None: 64 | if ".gguf" in model_basename.lower(): 65 | llm = load_quantized_model_gguf_ggml(model_id, model_basename, device_type, LOGGING) 66 | return llm 67 | elif ".ggml" in model_basename.lower(): 68 | model, tokenizer = load_quantized_model_gguf_ggml(model_id, model_basename, device_type, LOGGING) 69 | elif ".awq" in model_basename.lower(): 70 | model, tokenizer = load_quantized_model_awq(model_id, LOGGING) 71 | else: 72 | model, tokenizer = load_quantized_model_qptq(model_id, model_basename, device_type, LOGGING) 73 | else: 74 | model, tokenizer = load_full_model(model_id, model_basename, device_type, LOGGING) 75 | 76 | # Load configuration from the model to avoid warnings 77 | generation_config = GenerationConfig.from_pretrained(model_id) 78 | # see here for details: 79 | # https://huggingface.co/docs/transformers/ 80 | # main_classes/text_generation#transformers.GenerationConfig.from_pretrained.returns 81 | 82 | # Create a pipeline for text generation 83 | if device_type == "hpu": 84 | from gaudi_utils.pipeline import GaudiTextGenerationPipeline 85 | 86 | pipe = GaudiTextGenerationPipeline( 87 | model_name_or_path=model_id, 88 | max_new_tokens=1000, 89 | temperature=0.2, 90 | top_p=0.95, 91 | repetition_penalty=1.15, 92 | do_sample=True, 93 | max_padding_length=5000, 94 | ) 95 | pipe.compile_graph() 96 | else: 97 | pipe = pipeline( 98 | "text-generation", 99 | model=model, 100 | tokenizer=tokenizer, 101 | max_length=MAX_NEW_TOKENS, 102 | temperature=0.2, 103 | # top_p=0.95, 104 | repetition_penalty=1.15, 105 | generation_config=generation_config, 106 | ) 107 | 108 | local_llm = HuggingFacePipeline(pipeline=pipe) 109 | logging.info("Local LLM Loaded") 110 | 111 | return local_llm 112 | 113 | 114 | def retrieval_qa_pipline(device_type, use_history, promptTemplate_type="llama"): 115 | """ 116 | Initializes and returns a retrieval-based Question Answering (QA) pipeline. 117 | 118 | This function sets up a QA system that retrieves relevant information using embeddings 119 | from the HuggingFace library. It then answers questions based on the retrieved information. 120 | 121 | Parameters: 122 | - device_type (str): Specifies the type of device where the model will run, e.g., 'cpu', 'cuda', etc. 123 | - use_history (bool): Flag to determine whether to use chat history or not. 124 | 125 | Returns: 126 | - RetrievalQA: An initialized retrieval-based QA system. 127 | 128 | Notes: 129 | - The function uses embeddings from the HuggingFace library, either instruction-based or regular. 130 | - The Chroma class is used to load a vector store containing pre-computed embeddings. 131 | - The retriever fetches relevant documents or data based on a query. 132 | - The prompt and memory, obtained from the `get_prompt_template` function, might be used in the QA system. 133 | - The model is loaded onto the specified device using its ID and basename. 134 | - The QA system retrieves relevant documents using the retriever and then answers questions based on those documents. 135 | """ 136 | 137 | """ 138 | (1) Chooses an appropriate langchain library based on the enbedding model name. Matching code is contained within ingest.py. 139 | 140 | (2) Provides additional arguments for instructor and BGE models to improve results, pursuant to the instructions contained on 141 | their respective huggingface repository, project page or github repository. 142 | """ 143 | if device_type == "hpu": 144 | from gaudi_utils.embeddings import load_embeddings 145 | 146 | embeddings = load_embeddings() 147 | else: 148 | embeddings = get_embeddings(device_type) 149 | 150 | logging.info(f"Loaded embeddings from {EMBEDDING_MODEL_NAME}") 151 | 152 | # load the vectorstore 153 | db = Chroma(persist_directory=PERSIST_DIRECTORY, embedding_function=embeddings, client_settings=CHROMA_SETTINGS) 154 | retriever = db.as_retriever() 155 | 156 | # get the prompt template and memory if set by the user. 157 | prompt, memory = get_prompt_template(promptTemplate_type=promptTemplate_type, history=use_history) 158 | 159 | # load the llm pipeline 160 | llm = load_model(device_type, model_id=MODEL_ID, model_basename=MODEL_BASENAME, LOGGING=logging) 161 | 162 | if use_history: 163 | qa = RetrievalQA.from_chain_type( 164 | llm=llm, 165 | chain_type="stuff", # try other chains types as well. refine, map_reduce, map_rerank 166 | retriever=retriever, 167 | return_source_documents=True, # verbose=True, 168 | callbacks=callback_manager, 169 | chain_type_kwargs={"prompt": prompt, "memory": memory}, 170 | ) 171 | else: 172 | qa = RetrievalQA.from_chain_type( 173 | llm=llm, 174 | chain_type="stuff", # try other chains types as well. refine, map_reduce, map_rerank 175 | retriever=retriever, 176 | return_source_documents=True, # verbose=True, 177 | callbacks=callback_manager, 178 | chain_type_kwargs={ 179 | "prompt": prompt, 180 | }, 181 | ) 182 | 183 | return qa 184 | 185 | 186 | # chose device typ to run on as well as to show source documents. 187 | @click.command() 188 | @click.option( 189 | "--device_type", 190 | default="cuda" if torch.cuda.is_available() else "cpu", 191 | type=click.Choice( 192 | [ 193 | "cpu", 194 | "cuda", 195 | "ipu", 196 | "xpu", 197 | "mkldnn", 198 | "opengl", 199 | "opencl", 200 | "ideep", 201 | "hip", 202 | "ve", 203 | "fpga", 204 | "ort", 205 | "xla", 206 | "lazy", 207 | "vulkan", 208 | "mps", 209 | "meta", 210 | "hpu", 211 | "mtia", 212 | ], 213 | ), 214 | help="Device to run on. (Default is cuda)", 215 | ) 216 | @click.option( 217 | "--show_sources", 218 | "-s", 219 | is_flag=True, 220 | help="Show sources along with answers (Default is False)", 221 | ) 222 | @click.option( 223 | "--use_history", 224 | "-h", 225 | is_flag=True, 226 | help="Use history (Default is False)", 227 | ) 228 | @click.option( 229 | "--model_type", 230 | default="llama3", 231 | type=click.Choice( 232 | ["llama3", "llama", "mistral", "non_llama"], 233 | ), 234 | help="model type, llama3, llama, mistral or non_llama", 235 | ) 236 | @click.option( 237 | "--save_qa", 238 | is_flag=True, 239 | help="whether to save Q&A pairs to a CSV file (Default is False)", 240 | ) 241 | def main(device_type, show_sources, use_history, model_type, save_qa): 242 | """ 243 | Implements the main information retrieval task for a localGPT. 244 | 245 | This function sets up the QA system by loading the necessary embeddings, vectorstore, and LLM model. 246 | It then enters an interactive loop where the user can input queries and receive answers. Optionally, 247 | the source documents used to derive the answers can also be displayed. 248 | 249 | Parameters: 250 | - device_type (str): Specifies the type of device where the model will run, e.g., 'cpu', 'mps', 'cuda', etc. 251 | - show_sources (bool): Flag to determine whether to display the source documents used for answering. 252 | - use_history (bool): Flag to determine whether to use chat history or not. 253 | 254 | Notes: 255 | - Logging information includes the device type, whether source documents are displayed, and the use of history. 256 | - If the models directory does not exist, it creates a new one to store models. 257 | - The user can exit the interactive loop by entering "exit". 258 | - The source documents are displayed if the show_sources flag is set to True. 259 | 260 | """ 261 | 262 | logging.info(f"Running on: {device_type}") 263 | logging.info(f"Display Source Documents set to: {show_sources}") 264 | logging.info(f"Use history set to: {use_history}") 265 | 266 | # check if models directory do not exist, create a new one and store models here. 267 | if not os.path.exists(MODELS_PATH): 268 | os.mkdir(MODELS_PATH) 269 | 270 | qa = retrieval_qa_pipline(device_type, use_history, promptTemplate_type=model_type) 271 | # Interactive questions and answers 272 | while True: 273 | query = input("\nEnter a query: ") 274 | if query == "exit": 275 | break 276 | # Get the answer from the chain 277 | res = qa(query) 278 | answer, docs = res["result"], res["source_documents"] 279 | 280 | # Print the result 281 | print("\n\n> Question:") 282 | print(query) 283 | print("\n> Answer:") 284 | print(answer) 285 | 286 | if show_sources: # this is a flag that you can set to disable showing answers. 287 | # # Print the relevant sources used for the answer 288 | print("----------------------------------SOURCE DOCUMENTS---------------------------") 289 | for document in docs: 290 | print("\n> " + document.metadata["source"] + ":") 291 | print(document.page_content) 292 | print("----------------------------------SOURCE DOCUMENTS---------------------------") 293 | 294 | # Log the Q&A to CSV only if save_qa is True 295 | if save_qa: 296 | utils.log_to_csv(query, answer) 297 | 298 | 299 | if __name__ == "__main__": 300 | logging.basicConfig( 301 | format="%(asctime)s - %(levelname)s - %(filename)s:%(lineno)s - %(message)s", level=logging.INFO 302 | ) 303 | main() 304 | -------------------------------------------------------------------------------- /run_localGPT_API.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import shutil 4 | import subprocess 5 | import argparse 6 | 7 | import torch 8 | from flask import Flask, jsonify, request 9 | from langchain.chains import RetrievalQA 10 | from langchain.embeddings import HuggingFaceInstructEmbeddings 11 | 12 | # from langchain.embeddings import HuggingFaceEmbeddings 13 | from run_localGPT import load_model 14 | from prompt_template_utils import get_prompt_template 15 | 16 | # from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler 17 | from langchain.vectorstores import Chroma 18 | from werkzeug.utils import secure_filename 19 | 20 | from constants import CHROMA_SETTINGS, EMBEDDING_MODEL_NAME, PERSIST_DIRECTORY, MODEL_ID, MODEL_BASENAME 21 | 22 | # API queue addition 23 | from threading import Lock 24 | 25 | request_lock = Lock() 26 | 27 | 28 | if torch.backends.mps.is_available(): 29 | DEVICE_TYPE = "mps" 30 | elif torch.cuda.is_available(): 31 | DEVICE_TYPE = "cuda" 32 | else: 33 | DEVICE_TYPE = "cpu" 34 | 35 | SHOW_SOURCES = True 36 | logging.info(f"Running on: {DEVICE_TYPE}") 37 | logging.info(f"Display Source Documents set to: {SHOW_SOURCES}") 38 | 39 | EMBEDDINGS = HuggingFaceInstructEmbeddings(model_name=EMBEDDING_MODEL_NAME, model_kwargs={"device": DEVICE_TYPE}) 40 | 41 | # uncomment the following line if you used HuggingFaceEmbeddings in the ingest.py 42 | # EMBEDDINGS = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL_NAME) 43 | # if os.path.exists(PERSIST_DIRECTORY): 44 | # try: 45 | # shutil.rmtree(PERSIST_DIRECTORY) 46 | # except OSError as e: 47 | # print(f"Error: {e.filename} - {e.strerror}.") 48 | # else: 49 | # print("The directory does not exist") 50 | 51 | # run_langest_commands = ["python", "ingest.py"] 52 | # if DEVICE_TYPE == "cpu": 53 | # run_langest_commands.append("--device_type") 54 | # run_langest_commands.append(DEVICE_TYPE) 55 | 56 | # result = subprocess.run(run_langest_commands, capture_output=True) 57 | # if result.returncode != 0: 58 | # raise FileNotFoundError( 59 | # "No files were found inside SOURCE_DOCUMENTS, please put a starter file inside before starting the API!" 60 | # ) 61 | 62 | # load the vectorstore 63 | DB = Chroma( 64 | persist_directory=PERSIST_DIRECTORY, 65 | embedding_function=EMBEDDINGS, 66 | client_settings=CHROMA_SETTINGS, 67 | ) 68 | 69 | RETRIEVER = DB.as_retriever() 70 | 71 | LLM = load_model(device_type=DEVICE_TYPE, model_id=MODEL_ID, model_basename=MODEL_BASENAME) 72 | prompt, memory = get_prompt_template(promptTemplate_type="llama", history=False) 73 | 74 | QA = RetrievalQA.from_chain_type( 75 | llm=LLM, 76 | chain_type="stuff", 77 | retriever=RETRIEVER, 78 | return_source_documents=SHOW_SOURCES, 79 | chain_type_kwargs={ 80 | "prompt": prompt, 81 | }, 82 | ) 83 | 84 | app = Flask(__name__) 85 | 86 | 87 | @app.route("/api/delete_source", methods=["GET"]) 88 | def delete_source_route(): 89 | folder_name = "SOURCE_DOCUMENTS" 90 | 91 | if os.path.exists(folder_name): 92 | shutil.rmtree(folder_name) 93 | 94 | os.makedirs(folder_name) 95 | 96 | return jsonify({"message": f"Folder '{folder_name}' successfully deleted and recreated."}) 97 | 98 | 99 | @app.route("/api/save_document", methods=["GET", "POST"]) 100 | def save_document_route(): 101 | if "document" not in request.files: 102 | return "No document part", 400 103 | file = request.files["document"] 104 | if file.filename == "": 105 | return "No selected file", 400 106 | if file: 107 | filename = secure_filename(file.filename) 108 | folder_path = "SOURCE_DOCUMENTS" 109 | if not os.path.exists(folder_path): 110 | os.makedirs(folder_path) 111 | file_path = os.path.join(folder_path, filename) 112 | file.save(file_path) 113 | return "File saved successfully", 200 114 | 115 | 116 | @app.route("/api/run_ingest", methods=["GET"]) 117 | def run_ingest_route(): 118 | global DB 119 | global RETRIEVER 120 | global QA 121 | try: 122 | if os.path.exists(PERSIST_DIRECTORY): 123 | try: 124 | shutil.rmtree(PERSIST_DIRECTORY) 125 | except OSError as e: 126 | print(f"Error: {e.filename} - {e.strerror}.") 127 | else: 128 | print("The directory does not exist") 129 | 130 | run_langest_commands = ["python", "ingest.py"] 131 | if DEVICE_TYPE == "cpu": 132 | run_langest_commands.append("--device_type") 133 | run_langest_commands.append(DEVICE_TYPE) 134 | 135 | result = subprocess.run(run_langest_commands, capture_output=True) 136 | if result.returncode != 0: 137 | return "Script execution failed: {}".format(result.stderr.decode("utf-8")), 500 138 | # load the vectorstore 139 | DB = Chroma( 140 | persist_directory=PERSIST_DIRECTORY, 141 | embedding_function=EMBEDDINGS, 142 | client_settings=CHROMA_SETTINGS, 143 | ) 144 | RETRIEVER = DB.as_retriever() 145 | prompt, memory = get_prompt_template(promptTemplate_type="llama", history=False) 146 | 147 | QA = RetrievalQA.from_chain_type( 148 | llm=LLM, 149 | chain_type="stuff", 150 | retriever=RETRIEVER, 151 | return_source_documents=SHOW_SOURCES, 152 | chain_type_kwargs={ 153 | "prompt": prompt, 154 | }, 155 | ) 156 | return "Script executed successfully: {}".format(result.stdout.decode("utf-8")), 200 157 | except Exception as e: 158 | return f"Error occurred: {str(e)}", 500 159 | 160 | 161 | @app.route("/api/prompt_route", methods=["GET", "POST"]) 162 | def prompt_route(): 163 | global QA 164 | global request_lock # Make sure to use the global lock instance 165 | user_prompt = request.form.get("user_prompt") 166 | if user_prompt: 167 | # Acquire the lock before processing the prompt 168 | with request_lock: 169 | # print(f'User Prompt: {user_prompt}') 170 | # Get the answer from the chain 171 | res = QA(user_prompt) 172 | answer, docs = res["result"], res["source_documents"] 173 | 174 | prompt_response_dict = { 175 | "Prompt": user_prompt, 176 | "Answer": answer, 177 | } 178 | 179 | prompt_response_dict["Sources"] = [] 180 | for document in docs: 181 | prompt_response_dict["Sources"].append( 182 | (os.path.basename(str(document.metadata["source"])), str(document.page_content)) 183 | ) 184 | 185 | return jsonify(prompt_response_dict), 200 186 | else: 187 | return "No user prompt received", 400 188 | 189 | 190 | if __name__ == "__main__": 191 | parser = argparse.ArgumentParser() 192 | parser.add_argument("--port", type=int, default=5110, help="Port to run the API on. Defaults to 5110.") 193 | parser.add_argument( 194 | "--host", 195 | type=str, 196 | default="127.0.0.1", 197 | help="Host to run the UI on. Defaults to 127.0.0.1. " 198 | "Set to 0.0.0.0 to make the UI externally " 199 | "accessible from other devices.", 200 | ) 201 | args = parser.parse_args() 202 | 203 | logging.basicConfig( 204 | format="%(asctime)s - %(levelname)s - %(filename)s:%(lineno)s - %(message)s", level=logging.INFO 205 | ) 206 | app.run(debug=False, host=args.host, port=args.port) 207 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import csv 3 | from datetime import datetime 4 | from constants import EMBEDDING_MODEL_NAME 5 | from langchain.embeddings import HuggingFaceInstructEmbeddings 6 | from langchain.embeddings import HuggingFaceBgeEmbeddings 7 | from langchain.embeddings import HuggingFaceEmbeddings 8 | 9 | 10 | def log_to_csv(question, answer): 11 | 12 | log_dir, log_file = "local_chat_history", "qa_log.csv" 13 | # Ensure log directory exists, create if not 14 | if not os.path.exists(log_dir): 15 | os.makedirs(log_dir) 16 | 17 | # Construct the full file path 18 | log_path = os.path.join(log_dir, log_file) 19 | 20 | # Check if file exists, if not create and write headers 21 | if not os.path.isfile(log_path): 22 | with open(log_path, mode="w", newline="", encoding="utf-8") as file: 23 | writer = csv.writer(file) 24 | writer.writerow(["timestamp", "question", "answer"]) 25 | 26 | # Append the log entry 27 | with open(log_path, mode="a", newline="", encoding="utf-8") as file: 28 | writer = csv.writer(file) 29 | timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") 30 | writer.writerow([timestamp, question, answer]) 31 | 32 | 33 | def get_embeddings(device_type="cuda"): 34 | if "instructor" in EMBEDDING_MODEL_NAME: 35 | return HuggingFaceInstructEmbeddings( 36 | model_name=EMBEDDING_MODEL_NAME, 37 | model_kwargs={"device": device_type}, 38 | embed_instruction="Represent the document for retrieval:", 39 | query_instruction="Represent the question for retrieving supporting documents:", 40 | ) 41 | 42 | elif "bge" in EMBEDDING_MODEL_NAME: 43 | return HuggingFaceBgeEmbeddings( 44 | model_name=EMBEDDING_MODEL_NAME, 45 | model_kwargs={"device": device_type}, 46 | query_instruction="Represent this sentence for searching relevant passages:", 47 | ) 48 | 49 | else: 50 | return HuggingFaceEmbeddings( 51 | model_name=EMBEDDING_MODEL_NAME, 52 | model_kwargs={"device": device_type}, 53 | ) 54 | --------------------------------------------------------------------------------