├── .env ├── .github └── workflows │ ├── codeql.yml │ ├── docker-image.yml │ └── python-package-conda.yml ├── .gitignore ├── .python-version ├── .vscode ├── launch.json └── settings.json ├── Dockerfile ├── README.md ├── add_mount.sh ├── database_functions.py ├── embeddings_data_models.py ├── end_to_end_tests.py ├── environment.yml ├── grammar_builder.py ├── grammar_files ├── accept_or_reject.gbnf ├── investor_sentiment_json.gbnf ├── json.gbnf └── list.gbnf ├── image_files ├── llama_knife_sticker.webp ├── llama_knife_sticker2.jpg ├── swiss_army_llama__swagger_screenshot.png ├── swiss_army_llama__swagger_screenshot_running.png └── swiss_army_llama_logo.webp ├── install_swiss_army_llama.sh ├── log_viewer_functions.py ├── logger_config.py ├── misc_utility_functions.py ├── model_urls.json ├── models └── download.lock ├── pyproject.toml ├── ramdisk_functions.py ├── requirements.txt ├── sample_input_files_for_end_to_end_tests ├── Don_King_if_he_lived_in_the_tiny_island_nation_known_as_Japan.mp3 ├── bh-us-03-sassaman-conference-slides.pdf ├── sunset.jpg └── tale_two_cities_first_3_chapters.txt ├── sentiment_score_generation.py ├── service_functions.py ├── setup_dockerized_app_on_fresh_machine.sh ├── shared_resources.py ├── swiss_army_llama.py ├── tests ├── conftest.py ├── swiss_army_llama │ ├── test_audio_transcription_functions.py │ ├── test_build_faiss_indexes.py │ ├── test_database_operations.py │ ├── test_ramdisk_management.py │ ├── test_service_functions.py │ └── test_views.py ├── test_log_viewer_functions.py └── test_sentiment_score_generation.py └── uvicorn_config.py /.env: -------------------------------------------------------------------------------- 1 | USE_SECURITY_TOKEN=1 2 | USE_PARALLEL_INFERENCE_QUEUE=1 3 | MAX_CONCURRENT_PARALLEL_INFERENCE_TASKS=50 4 | DEFAULT_MODEL_NAME=Meta-Llama-3-8B-Instruct.Q3_K_S 5 | DEFAULT_EMBEDDING_MODEL_NAME=nomic-embed-text-v1.5.Q6_K 6 | DEFAULT_MULTI_MODAL_MODEL_NAME=llava-llama-3-8b-v1_1-int4 7 | USE_FLASH_ATTENTION=1 8 | LLM_CONTEXT_SIZE_IN_TOKENS=2048 9 | TEXT_COMPLETION_CONTEXT_SIZE_IN_TOKENS=32000 10 | DEFAULT_MAX_COMPLETION_TOKENS=1000 11 | DEFAULT_NUMBER_OF_COMPLETIONS_TO_GENERATE =1 12 | DEFAULT_COMPLETION_TEMPERATURE=0.7 13 | DEFAULT_EMBEDDING_POOLING_METHOD=mean 14 | LLAMA_EMBEDDING_SERVER_LISTEN_PORT=8089 15 | UVICORN_NUMBER_OF_WORKERS=1 16 | MINIMUM_STRING_LENGTH_FOR_DOCUMENT_EMBEDDING=15 17 | MAX_RETRIES=10 18 | DB_WRITE_BATCH_SIZE=25 19 | RETRY_DELAY_BASE_SECONDS=1 20 | JITTER_FACTOR=0.1 21 | USE_RAMDISK=0 22 | USE_VERBOSE=0 23 | USE_RESOURCE_MONITORING=1 24 | RAMDISK_PATH = "/mnt/ramdisk" 25 | RAMDISK_SIZE_IN_GB=50 26 | USE_AUTOMATIC_PURGING_OF_EXPIRED_RECORDS=0 27 | TIME_IN_DAYS_BEFORE_RECORDS_ARE_PURGED=2 28 | MAX_THOUSANDS_OF_WORDs_FOR_DOCUMENT_EMBEDDING=600 -------------------------------------------------------------------------------- /.github/workflows/codeql.yml: -------------------------------------------------------------------------------- 1 | # For most projects, this workflow file will not need changing; you simply need 2 | # to commit it to your repository. 3 | # 4 | # You may wish to alter this file to override the set of languages analyzed, 5 | # or to provide custom queries or build logic. 6 | # 7 | # ******** NOTE ******** 8 | # We have attempted to detect the languages in your repository. Please check 9 | # the `language` matrix defined below to confirm you have the correct set of 10 | # supported CodeQL languages. 11 | # 12 | name: "CodeQL" 13 | 14 | on: 15 | push: 16 | branches: [ "main" ] 17 | pull_request: 18 | # The branches below must be a subset of the branches above 19 | branches: [ "main" ] 20 | schedule: 21 | - cron: '38 0 * * 4' 22 | 23 | jobs: 24 | analyze: 25 | name: Analyze 26 | # Runner size impacts CodeQL analysis time. To learn more, please see: 27 | # - https://gh.io/recommended-hardware-resources-for-running-codeql 28 | # - https://gh.io/supported-runners-and-hardware-resources 29 | # - https://gh.io/using-larger-runners 30 | # Consider using larger runners for possible analysis time improvements. 31 | runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }} 32 | timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }} 33 | permissions: 34 | actions: read 35 | contents: read 36 | security-events: write 37 | 38 | strategy: 39 | fail-fast: false 40 | matrix: 41 | language: [ 'python' ] 42 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby', 'swift' ] 43 | # Use only 'java' to analyze code written in Java, Kotlin or both 44 | # Use only 'javascript' to analyze code written in JavaScript, TypeScript or both 45 | # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support 46 | 47 | steps: 48 | - name: Checkout repository 49 | uses: actions/checkout@v3 50 | 51 | # Initializes the CodeQL tools for scanning. 52 | - name: Initialize CodeQL 53 | uses: github/codeql-action/init@v2 54 | with: 55 | languages: ${{ matrix.language }} 56 | # If you wish to specify custom queries, you can do so here or in a config file. 57 | # By default, queries listed here will override any specified in a config file. 58 | # Prefix the list here with "+" to use these queries and those in the config file. 59 | 60 | # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs 61 | # queries: security-extended,security-and-quality 62 | 63 | 64 | # Autobuild attempts to build any compiled languages (C/C++, C#, Go, Java, or Swift). 65 | # If this step fails, then you should remove it and run the build manually (see below) 66 | - name: Autobuild 67 | uses: github/codeql-action/autobuild@v2 68 | 69 | # ℹ️ Command-line programs to run using the OS shell. 70 | # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun 71 | 72 | # If the Autobuild fails above, remove it and uncomment the following three lines. 73 | # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance. 74 | 75 | # - run: | 76 | # echo "Run, Build Application using script" 77 | # ./location_of_script_within_repo/buildscript.sh 78 | 79 | - name: Perform CodeQL Analysis 80 | uses: github/codeql-action/analyze@v2 81 | with: 82 | category: "/language:${{matrix.language}}" 83 | -------------------------------------------------------------------------------- /.github/workflows/docker-image.yml: -------------------------------------------------------------------------------- 1 | name: Docker Image CI 2 | 3 | on: 4 | push: 5 | branches: [ "main" ] 6 | pull_request: 7 | branches: [ "main" ] 8 | 9 | jobs: 10 | 11 | build: 12 | 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - uses: actions/checkout@v3 17 | - name: Build the Docker image 18 | run: docker build . --file Dockerfile --tag my-image-name:$(date +%s) 19 | -------------------------------------------------------------------------------- /.github/workflows/python-package-conda.yml: -------------------------------------------------------------------------------- 1 | name: Python Package using Conda 2 | 3 | on: [push] 4 | 5 | jobs: 6 | build-linux: 7 | runs-on: ubuntu-latest 8 | strategy: 9 | max-parallel: 5 10 | 11 | steps: 12 | - uses: actions/checkout@v3 13 | - name: Set up Python 3.10 14 | uses: actions/setup-python@v3 15 | with: 16 | python-version: '3.10' 17 | - name: Add conda to system path 18 | run: | 19 | # $CONDA is an environment variable pointing to the root of the miniconda directory 20 | echo $CONDA/bin >> $GITHUB_PATH 21 | - name: Install dependencies 22 | run: | 23 | conda env update --file environment.yml --name base 24 | - name: Lint with flake8 25 | run: | 26 | conda install flake8 27 | # stop the build if there are Python syntax errors or undefined names 28 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 29 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 30 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 31 | - name: Test with pytest 32 | run: | 33 | conda install pytest 34 | pytest 35 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | 162 | #ignore .bin model files and .sqlite files 163 | *.bin 164 | *.sqlite 165 | *.sqlite-shm 166 | *.sqlite-wal 167 | *.gguf 168 | generated_transcript_embeddings_zip_files 169 | old_logs 170 | *.csv 171 | models_url.json 172 | resource_monitoring_logs.json 173 | redis_configured.txt 174 | saved_outputs -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | 3.12 2 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "0.2.0", 3 | "configurations": [ 4 | { 5 | "name": "Swiss Army Llama", 6 | "type": "debugpy", 7 | "request": "launch", 8 | "program": "${workspaceFolder}/swiss_army_llama.py", 9 | "console": "integratedTerminal", 10 | } 11 | ] 12 | } 13 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "git.ignoreLimitWarning": true, 3 | "cSpell.words": [ 4 | "bfnrt" 5 | ] 6 | } -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Use Python 3.9 image 2 | FROM python:3.9-buster 3 | 4 | # Set environment variables 5 | ENV PYTHONUNBUFFERED=1 6 | 7 | # Set working directory 8 | WORKDIR /app 9 | 10 | # Install system dependencies, including Redis and sudo 11 | RUN apt-get update && apt-get install -y \ 12 | build-essential \ 13 | libpq-dev \ 14 | libmagic1 \ 15 | libxml2-dev \ 16 | libxslt1-dev \ 17 | antiword \ 18 | unrtf \ 19 | poppler-utils \ 20 | tesseract-ocr \ 21 | flac \ 22 | ffmpeg \ 23 | lame \ 24 | libmad0 \ 25 | libsox-fmt-mp3 \ 26 | sox \ 27 | libjpeg-dev\ 28 | swig \ 29 | curl \ 30 | redis-server \ 31 | sudo && \ 32 | rm -rf /var/lib/apt/lists/* 33 | 34 | # Install latest Rust and Cargo using rustup 35 | RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y 36 | ENV PATH="/root/.cargo/bin:${PATH}" 37 | 38 | # Upgrade pip and install wheel 39 | RUN python3 -m pip install --upgrade pip && \ 40 | python3 -m pip install wheel 41 | 42 | # Copy the requirements file and install Python dependencies 43 | COPY requirements.txt . 44 | RUN pip install --no-cache-dir -r requirements.txt 45 | 46 | # Copy the .env file 47 | COPY .env . 48 | 49 | # Copy the rest of the application 50 | COPY . . 51 | 52 | # Expose the port the app runs on and Redis default port 53 | EXPOSE 8089 6379 54 | 55 | # Command to run Redis in the background and then the application 56 | CMD redis-server & python3 swiss_army_llama.py 57 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 🇨🇭🎖️🦙 Swiss Army Llama 2 | 3 |
4 | 5 |
6 | 7 | ## Introduction 8 | 9 | The Swiss Army Llama is designed to facilitate and optimize the process of working with local LLMs by using FastAPI to expose convenient REST endpoints for various tasks, including obtaining text embeddings and completions using different LLMs via llama_cpp, as well as automating the process of obtaining all the embeddings for most common document types, including PDFs (even ones that require OCR), Word files, etc; it even allows you to submit an audio file and automatically transcribes it with the Whisper model, cleans up the resulting text, and then computes the embeddings for it. To avoid wasting computation, these embeddings are cached in SQlite and retrieved if they have already been computed before. To speed up the process of loading multiple LLMs, optional RAM Disks can be used, and the process for creating and managing them is handled automatically for you. With a quick and easy setup process, you will immediately get access to a veritable "Swiss Army Knife" of LLM related tools, all accessible via a convenient Swagger UI and ready to be integrated into your own applications with minimal fuss or configuration required. 10 | 11 | Some additional useful endpoints are provided, such as computing semantic similarity between submitted text strings. The service leverages a high-performance Rust-based library, `fast_vector_similarity`, to offer a range of similarity measures including `spearman_rho`, `kendall_tau`, `approximate_distance_correlation`, `jensen_shannon_dependency_measure`, and [`hoeffding_d`](https://blogs.sas.com/content/iml/2021/05/03/examples-hoeffding-d.html). Additionally, semantic search across all your cached embeddings is supported using FAISS vector searching. You can either use the built in cosine similarity from FAISS, or supplement this with a second pass that computes the more sophisticated similarity measures for the most relevant subset of the stored vectors found using cosine similarity (see the advanced semantic search endpoint for this functionality). 12 | 13 | Also, we now support multiple embedding pooling methods for combining token-level embedding vectors into a single fixed-length embedding vector for any length of input text, including the following: 14 | - `mean`: Mean pooling of token embeddings. 15 | - `mins_maxes`: Concatenation of the minimum and maximum values of each dimension of the token embeddings. 16 | - `svd`: Concatenation of the first two singular vectors obtained from the Singular Value Decomposition (SVD) of the token embeddings matrix. 17 | - `svd_first_four`: Concatenation of the first four singular vectors obtained from the Singular Value Decomposition (SVD) of the token embeddings matrix. 18 | - `ica`: Flattened independent components obtained from Independent Component Analysis (ICA) of the token embeddings. 19 | - `factor_analysis`: Flattened factors obtained from Factor Analysis of the token embeddings. 20 | - `gaussian_random_projection`: Flattened embeddings obtained from Gaussian Random Projection of the token embeddings. 21 | 22 | 23 | As mentioned above, you can now submit not only plaintext and fully digital PDFs but also MS Word documents, images, and other file types supported by the textract library. The library can automatically apply OCR using Tesseract for scanned text. The returned embeddings for each sentence in a document can be organized in various formats like records, table, etc., using the Pandas to_json() function. The results can be returned either as a ZIP file containing a JSON file or as a direct JSON response. You can now also submit audio files in MP3 or WAV formats. The library uses OpenAI's Whisper model, as optimized by the Faster Whisper Python library, to transcribe the audio into text. Optionally, this transcript can be treated like any other document, with each sentence's embeddings computed and stored. The results are returned as a URL to a downloadable ZIP file containing a JSON with the embedding vector data. 24 | 25 | Finally, we add a new endpoint for generating multiple text completions for a given input prompt, with the ability to specify a grammar file that will enforce a particular form of response, such as JSON. There is also a useful new utility feature: a real-time application log viewer that can be accessed via a web browser, which allows for syntax highlighting and offers options for downloading the logs or copying them to the clipboard. This allows a user to watch the logs without having direct SSH access to the server. 26 | 27 | ## Screenshots 28 | ![Swiss Army Llama Swagger UI](https://github.com/Dicklesworthstone/swiss_army_llama/raw/main/image_files/swiss_army_llama__swagger_screenshot.png) 29 | ![Swiss Army Llama Runnig](https://github.com/Dicklesworthstone/swiss_army_llama/raw/main/image_files/swiss_army_llama__swagger_screenshot_running.png) 30 | 31 | *TLDR:* If you just want to try it very quickly on a fresh Ubuntu 22+ machine (warning, this will install docker using apt): 32 | 33 | ```bash 34 | git clone https://github.com/Dicklesworthstone/swiss_army_llama 35 | cd swiss_army_llama 36 | chmod +x setup_dockerized_app_on_fresh_machine.sh 37 | sudo ./setup_dockerized_app_on_fresh_machine.sh 38 | ``` 39 | 40 | To run it natively (not using Docker) in a Python venv (recommended!), you can use these commands: 41 | 42 | ```bash 43 | sudo apt-get update 44 | sudo apt-get install build-essential libxml2-dev libxslt1-dev antiword unrtf poppler-utils pstotext tesseract-ocr flac ffmpeg lame libmad0 libsox-fmt-mp3 sox libjpeg-dev swig redis-server libpoppler-cpp-dev pkg-config -y 45 | sudo systemctl enable redis-server 46 | sudo systemctl start redis 47 | git clone https://github.com/Dicklesworthstone/swiss_army_llama 48 | cd swiss_army_llama 49 | python3 -m venv venv 50 | source venv/bin/activate 51 | python3 -m pip install --upgrade pip 52 | python3 -m pip install wheel 53 | python3 -m pip install --upgrade setuptools wheel 54 | pip install -r requirements.txt 55 | python3 swiss_army_llama.py 56 | ``` 57 | 58 | Alternatively, you can also just run the included script, which will install PyEnv if it's not already installed on your machine, and then install Python 3.12 and create a virtual environment for you. You can do everything with a single one-liner from scratch on a fresh Ubuntu machine like this: 59 | 60 | ```bash 61 | git clone https://github.com/Dicklesworthstone/swiss_army_llama && cd swiss_army_llama && chmod +x install_swiss_army_llama.sh && ./install_swiss_army_llama.sh && pyenv local 3.12 && source venv/bin/activate && python swiss_army_llama.py 62 | ``` 63 | 64 | Then open a browser to `:8089` if you're using a VPS to get to the FastAPI Swagger page at `http://localhost:8089`. 65 | 66 | Or to `localhost:8089` if you're using your own machine-- but, really, you should never run untrusted code with sudo on your own machine! Just get a cheap VPS to experiment with for $30/month. 67 | 68 | Watch the the automated setup process in action [here](https://asciinema.org/a/601603). 69 | 70 | --- 71 | 72 | ## Features 73 | 74 | 1. **Text Embedding Computation**: Utilizes pre-trained LLama3 and other LLMs via llama_cpp to generate embeddings for any provided text. 75 | 2. **Embedding Caching**: Efficiently stores and retrieves computed embeddings in SQLite, minimizing redundant computations. 76 | 3. **Advanced Similarity Measurements and Retrieval**: Utilizes the author's own `fast_vector_similarity` library written in Rust to offer highly optimized advanced similarity measures such as `spearman_rho`, `kendall_tau`, `approximate_distance_correlation`, `jensen_shannon_dependency_measure`, and `hoeffding_d`. Semantic search across cached embeddings is also supported using FAISS vector searching. 77 | 4. **Two-Step Advanced Semantic Search**: The API first leverages FAISS and cosine similarity for rapid filtering, and then applies additional similarity measures like `spearman_rho`, `kendall_tau`, `approximate_distance_correlation`, `jensen_shannon_dependency_measure`, and `hoeffding_d` for a more nuanced comparison. 78 | 5. **File Processing for Documents**: The library now accepts a broader range of file types including plaintext, PDFs, MS Word documents, and images. It can also handle OCR automatically. Returned embeddings for each sentence are organized in various formats like records, table, etc., using Pandas to_json() function. 79 | 6. **Advanced Text Preprocessing**: The library now employs a more advanced sentence splitter to segment text into meaningful sentences. It handles cases where periods are used in abbreviations, domain names, or numbers and also ensures complete sentences even when quotes are used. It also takes care of pagination issues commonly found in scanned documents, such as awkward newlines and hyphenated line breaks. 80 | 7. **Audio Transcription and Embedding**: Upload an audio file in MP3 or WAV format. The library uses OpenAI's Whisper model for transcription. Optionally, sentence embeddings can be computed for the transcript. 81 | 8. **RAM Disk Usage**: Optionally uses RAM Disk to store models for faster access and execution. Automatically handles the creation and management of RAM Disks. 82 | 9. **Robust Exception Handling**: Features comprehensive exception management to ensure system resilience. 83 | 10. **Interactive API Documentation**: Integrates with Swagger UI for an interactive and user-friendly experience, accommodating large result sets without crashing. 84 | 11. **Scalability and Concurrency**: Built on the FastAPI framework, handles concurrent requests and supports parallel inference with configurable concurrency levels. 85 | 12. **Flexible Configurations**: Offers configurable settings through environment variables and input parameters, including response formats like JSON or ZIP files. 86 | 13. **Comprehensive Logging**: Captures essential information with detailed logs, without overwhelming storage or readability. 87 | 14. **Support for Multiple Models and Measures**: Accommodates multiple embedding models and similarity measures, allowing flexibility and customization based on user needs. 88 | 15. **Ability to Generate Multiple Completions using Specified Grammar**: Get back structured LLM completions for a specified input prompt. 89 | 16. **Real-Time Log File Viewer in Browser**: Lets anyone with access to the API server conveniently watch the application logs to gain insight into the execution of their requests. 90 | 17. **Uses Redis for Request Locking**: Uses Redis to allow for multiple Uvicorn workers to run in parallel without conflicting with each other. 91 | 92 | ## Demo Screen Recording in Action 93 | [Here](https://asciinema.org/a/39dZ8vv9nkcNygasUl35wnBPq) is the live console output while I interact with it from the Swagger page to make requests. 94 | 95 | --- 96 | 97 | ## Requirements 98 | 99 | System requirements for running the application (to support all the file types handled by textract): 100 | 101 | ```bash 102 | sudo apt-get update 103 | sudo apt-get install libxml2-dev libxslt1-dev antiword unrtf poppler-utils pstotext tesseract-ocr flac ffmpeg lame libmad0 libsox-fmt-mp3 sox libjpeg-dev swig -y 104 | ``` 105 | 106 | Python Requirements: 107 | 108 | ```bash 109 | aioredis 110 | aioredlock 111 | aiosqlite 112 | apscheduler 113 | faiss-cpu 114 | fast_vector_similarity 115 | fastapi 116 | faster-whisper 117 | filelock 118 | httpx 119 | llama-cpp-python 120 | magika 121 | mutagen 122 | nvgpu 123 | pandas 124 | pillow 125 | psutil 126 | pydantic 127 | PyPDF2 128 | pytest 129 | python-decouple 130 | python-multipart 131 | pytz 132 | redis 133 | ruff 134 | scikit-learn 135 | scipy 136 | sqlalchemy 137 | textract-py3 138 | uvicorn 139 | uvloop 140 | zstandard 141 | ``` 142 | 143 | ## Running the Application 144 | 145 | You can run the application using the following command: 146 | 147 | ```bash 148 | python swiss_army_llama.py 149 | ``` 150 | 151 | The server will start on `0.0.0.0` at the port defined by the `SWISS_ARMY_LLAMA_SERVER_LISTEN_PORT` variable. 152 | 153 | Access the Swagger UI: 154 | 155 | ``` 156 | http://localhost: 157 | ``` 158 | 159 | ## Configuration 160 | 161 | You can configure the service easily by editing the included `.env` file. Here's a list of available configuration options: 162 | 163 | - `USE_SECURITY_TOKEN`: Whether to use a hardcoded security token. (e.g., `1`) 164 | - `USE_PARALLEL_INFERENCE_QUEUE`: Use parallel processing. (e.g., `1`) 165 | - `MAX_CONCURRENT_PARALLEL_INFERENCE_TASKS`: Maximum number of parallel inference tasks. (e.g., `30`) 166 | - `DEFAULT_MODEL_NAME`: Default model name to use. (e.g., `Llama-3-8B-Instruct-64k`) 167 | - `LLM_CONTEXT_SIZE_IN_TOKENS`: Context size in tokens for LLM. (e.g., `512`) 168 | - `SWISS_ARMY_LLAMA_SERVER_LISTEN_PORT`: Port number for the service. (e.g., `8089`) 169 | - `UVICORN_NUMBER_OF_WORKERS`: Number of workers for Uvicorn. (e.g., `2`) 170 | - `MINIMUM_STRING_LENGTH_FOR_DOCUMENT_EMBEDDING`: Minimum string length for document embedding. (e.g., `15`) 171 | - `MAX_RETRIES`: Maximum retries for locked database. (e.g., `10`) 172 | - `DB_WRITE_BATCH_SIZE`: Database write batch size. (e.g., `25`) 173 | - `RETRY_DELAY_BASE_SECONDS`: Retry delay base in seconds. (e.g., `1`) 174 | - `JITTER_FACTOR`: Jitter factor for retries. (e.g., `0.1`) 175 | - `USE_RAMDISK`: Use RAM disk. (e.g., `1`) 176 | - `RAMDISK_PATH`: Path to the RAM disk. (e.g., `"/mnt/ramdisk"`) 177 | - `RAMDISK_SIZE_IN_GB`: RAM disk size in GB. (e.g., `40`) 178 | 179 | ## Contributing 180 | 181 | If you'd like to contribute to the project, please submit a pull request! Seriously, I'd love to get some more community going so we can make this a standard library! 182 | 183 | ## License 184 | 185 | This project is licensed under the MIT License. 186 | 187 | ## Some Llama Knife Images I found on Google 188 |

189 | 190 | 191 |

192 | 193 | --- 194 | 195 | ## Setup and Configuration 196 | 197 | ### RAM Disk Configuration 198 | 199 | To enable password-less sudo for RAM Disk setup and teardown, edit the `sudoers` file with `sudo visudo`. Add the following lines, replacing `username` with your actual username: 200 | 201 | ```plaintext 202 | username ALL=(ALL) NOPASSWD: /bin/mount -t tmpfs -o size=*G tmpfs /mnt/ramdisk 203 | username ALL=(ALL) NOPASSWD: /bin/umount /mnt/ramdisk 204 | ``` 205 | 206 | The application provides functionalities to set up, clear, and manage RAM Disk. RAM Disk is used to store models in memory for faster access. It calculates the available RAM and sets up the RAM Disk accordingly. The functions `setup_ramdisk`, `copy_models_to_ramdisk`, and `clear_ramdisk` manage these tasks. 207 | 208 | ## API Endpoints 209 | 210 | The following endpoints are available: 211 | 212 | - **GET `/get_list_of_available_model_names/`**: Retrieve Available Model Names. Retrieves the list of available model names for generating embeddings. 213 | - **GET `/get_all_stored_strings/`**: Retrieve All Strings. Retrieves a list of all stored strings from the database for which embeddings have been computed. 214 | - **GET `/get_all_stored_documents/`**: Retrieve All Stored Documents. Retrieves a list of all stored documents from the database for which embeddings have been computed. 215 | - **GET `/show_logs/`**: Shows logs for the last 5 minutes by default. Can also provide a parameter like this: `/show_logs/{minutes}` to get the last N minutes of log data. 216 | - **POST `/add_new_model/`**: Add New Model by URL. Submit a new model URL for download and use. The model must be in `.gguf` format and larger than 100 MB to ensure it's a valid model file (you can directly paste in the Huggingface URL) 217 | - **POST `/get_embedding_vector_for_string/`**: Retrieve Embedding Vector for a Given Text String. Retrieves the embedding vector for a given input text string using the specified model. 218 | - **POST `/compute_similarity_between_strings/`**: Compute Similarity Between Two Strings. Leverages the `fast_vector_similarity` library to compute the similarity between two given input strings using specified model embeddings and a selected similarity measure. 219 | - **POST `/search_stored_embeddings_with_query_string_for_semantic_similarity/`**: Get Most Similar Strings from Stored Embeddings in Database. Find the most similar strings in the database to the given input "query" text. 220 | - **POST `/advanced_search_stored_embeddings_with_query_string_for_semantic_similarity/`**: Perform a two-step advanced semantic search. First uses FAISS and cosine similarity to narrow down the most similar strings, then applies additional similarity measures for refined comparison. 221 | - **POST `/get_all_embedding_vectors_for_document/`**: Get Embeddings for a Document. Extract text embeddings for a document. This endpoint supports plain text, .doc/.docx (MS Word), PDF files, images (using Tesseract OCR), and many other file types supported by the textract library. 222 | - **POST `/compute_transcript_with_whisper_from_audio/`**: Transcribe and Embed Audio using Whisper and LLM. This endpoint accepts an audio file and optionally computes document embeddings. The transcription and embeddings are stored, and a ZIP file containing the embeddings can be downloaded. 223 | - **POST `/get_text_completions_from_input_prompt/`**: Get back multiple completions from the specified LLM model, with the ability to specify a grammar file which will enforce a particular format of the response, such as JSON. 224 | - **POST `/clear_ramdisk/`**: Clear Ramdisk Endpoint. Clears the RAM Disk if it is enabled. 225 | 226 | For detailed request and response schemas, please refer to the Swagger UI available at the root URL or the section at the end of this `README`. 227 | 228 | ## Exception Handling 229 | 230 | The application has robust exception handling to deal with various types of errors, including database errors and general exceptions. Custom exception handlers are defined for `SQLAlchemyError` and general `Exception`. 231 | 232 | ## Logging 233 | 234 | Logging is configured at the INFO level to provide detailed logs for debugging and monitoring. The logger provides information about the state of the application, errors, and activities. 235 | 236 | The logs are stored in a file named `swiss_army_llama.log`, and a log rotation mechanism is implemented to handle log file backups. The rotating file handler is configured with a maximum file size of 10 MB, and it keeps up to 5 backup files. 237 | 238 | When a log file reaches its maximum size, it is moved to the `old_logs` directory, and a new log file is created. The log entries are also printed to the standard output stream. 239 | 240 | Here are some details of the logging configuration: 241 | 242 | - Log Level: INFO 243 | - Log Format: `%(asctime)s - %(levelname)s - %(message)s` 244 | - Max Log File Size: 10 MB 245 | - Backup Count: 5 246 | - Old Logs Directory: `old_logs` 247 | 248 | Additionally, the log level for SQLAlchemy's engine is set to WARNING to suppress verbose database logs. 249 | 250 | ## Database Structure 251 | 252 | The application uses a SQLite database via SQLAlchemy ORM. Here are the data models used, which can be found in the `embeddings_data_models.py` file: 253 | 254 | ### TextEmbedding Table 255 | 256 | - `id`: Primary Key 257 | - `text`: Text for which the embedding was computed 258 | - `text_hash`: Hash of the text, computed using SHA3-256 259 | - `embedding_pooling_method`: The method used to pool the embeddings 260 | - `embedding_hash`: Hash of the computed embedding 261 | - `llm_model_name`: Model used to compute the embedding 262 | - `corpus_identifier_string`: An optional string identifier for grouping embeddings into a specific corpus 263 | - `embedding_json`: The computed embedding in JSON format 264 | - `ip_address`: Client IP address 265 | - `request_time`: Timestamp of the request 266 | - `response_time`: Timestamp of the response 267 | - `total_time`: Total time taken to process the request 268 | - `document_file_hash`: Foreign Key referencing the DocumentEmbedding table 269 | - `document`: Relationship with DocumentEmbedding 270 | 271 | ### DocumentEmbedding Table 272 | 273 | - `id`: Primary Key 274 | - `document_hash`: Foreign Key referencing the Documents table 275 | - `filename`: Name of the document file 276 | - `mimetype`: MIME type of the document file 277 | - `document_file_hash`: Hash of the file 278 | - `embedding_pooling_method`: The method used to pool the embeddings 279 | - `llm_model_name`: Model used to compute the embedding 280 | - `corpus_identifier_string`: An optional string identifier for grouping documents into a specific corpus 281 | - `file_data`: Binary data of the original file 282 | - `sentences`: The extracted sentences from the document 283 | - `document_embedding_results_json_compressed_binary`: The computed embedding results in JSON format compressed with Z-standard compression 284 | - `ip_address`: Client IP address 285 | - `request_time`: Timestamp of the request 286 | - `response_time`: Timestamp of the response 287 | - `total_time`: Total time taken to process the request 288 | - `embeddings`: Relationship with TextEmbedding 289 | - `document`: Relationship with Document 290 | 291 | ### Document Table 292 | 293 | - `id`: Primary Key 294 | - `llm_model_name`: Model name associated with the document 295 | - `corpus_identifier_string`: An optional string identifier for grouping documents into a specific corpus 296 | - `document_hash`: Computed Hash of the document 297 | - `document_embeddings`: Relationship with DocumentEmbedding 298 | 299 | ### AudioTranscript Table 300 | 301 | - `audio_file_hash`: Primary Key 302 | - `audio_file_name`: Name of the audio file 303 | - `audio_file_size_mb`: File size in MB 304 | - `segments_json`: Transcribed segments as JSON 305 | - `combined_transcript_text`: Combined transcript text 306 | - `combined_transcript_text_list_of_metadata_dicts`: List of metadata dictionaries for each segment of the combined transcript 307 | - `info_json`: Transcription info as JSON 308 | - `ip_address`: Client IP address 309 | - `request_time`: Timestamp of the request 310 | - `response_time`: Timestamp of the response 311 | - `total_time`: Total time taken to process the request 312 | - `corpus_identifier_string`: An optional string identifier for grouping transcripts into a specific corpus 313 | 314 | ### Database Relationships 315 | 316 | 1. **TextEmbedding - DocumentEmbedding**: 317 | - `TextEmbedding` has a Foreign Key `document_file_hash` that references `DocumentEmbedding`'s `document_file_hash`. 318 | - This means multiple text embeddings can belong to a single document embedding, establishing a one-to-many relationship. 319 | 320 | 2. **DocumentEmbedding - Document**: 321 | - `DocumentEmbedding` has a Foreign Key `document_hash` that references `Document`'s `document_hash`. 322 | - This establishes a one-to-many relationship between `Document` and `DocumentEmbedding`. 323 | 324 | 3. **AudioTranscript**: 325 | - This table doesn't have a direct relationship with other tables based on the given code. 326 | 327 | 4. **Request/Response Models**: 328 | - These are not directly related to the database tables but are used for handling API requests and responses. 329 | - The following Pydantic models are used for request and response validation: 330 | - EmbeddingRequest 331 | - SimilarityRequest 332 | - SemanticSearchRequest 333 | - SemanticSearchResponse 334 | - AdvancedSemanticSearchRequest 335 | - AdvancedSemanticSearchResponse 336 | - EmbeddingResponse 337 | - SimilarityResponse 338 | - AllStringsResponse 339 | - AllDocumentsResponse 340 | - TextCompletionRequest 341 | - TextCompletionResponse 342 | - ImageQuestionResponse 343 | - AudioTranscriptResponse 344 | - ShowLogsIncrementalModel 345 | - AddGrammarRequest 346 | - AddGrammarResponse 347 | 348 | For detailed field descriptions and validations, please refer to the `embeddings_data_models.py` file. 349 | 350 | ## Performance Optimizations 351 | 352 | This section highlights the major performance enhancements integrated into the provided code to ensure swift responses and optimal resource management. 353 | 354 | ### 1. **Asynchronous Programming**: 355 | 356 | - **Benefit**: Handles multiple tasks concurrently, enhancing efficiency for I/O-bound operations like database transactions and network requests. 357 | - **Implementation**: Utilizes Python's `asyncio` library for asynchronous database operations. 358 | 359 | ### 2. **Database Optimizations**: 360 | 361 | - **Write-Ahead Logging (WAL) Mode**: Enables concurrent reads and writes, optimizing for applications with frequent write demands. 362 | - **Retry Logic with Exponential Backoff**: Manages locked databases by retrying operations with progressive waiting times. 363 | - **Batch Writes**: Aggregates write operations for more efficient database interactions. 364 | - **DB Write Queue**: Uses an asynchronous queue to serialize write operations, ensuring consistent and non-conflicting database writes. 365 | 366 | ### 3. **RAM Disk Utilization**: 367 | 368 | - **Benefit**: Speeds up I/O-bound tasks by prioritizing operations in RAM over disk. 369 | - **Implementation**: Detects and prioritizes a RAM disk (`/mnt/ramdisk`) if available, otherwise defaults to the standard file system. 370 | 371 | ### 4. **Model Caching**: 372 | 373 | - **Benefit**: Reduces overhead by keeping loaded models in memory for subsequent requests. 374 | - **Implementation**: Uses a global `model_cache` dictionary to store and retrieve models. 375 | 376 | ### 5. **Parallel Inference**: 377 | 378 | - **Benefit**: Enhances processing speed for multiple data units, like document sentences. 379 | - **Implementation**: Employs `asyncio.gather` for concurrent inferences, regulated by a semaphore (`MAX_CONCURRENT_PARALLEL_INFERENCE_TASKS`). 380 | 381 | ### 6. **Embedding Caching**: 382 | 383 | - **Benefit**: Once embeddings are computed for a particular text, they are stored in the database, eliminating the need for re-computation during subsequent requests. 384 | - **Implementation**: When a request is made to compute an embedding, the system first checks the database. If the embedding for the given text is found, it is returned immediately, ensuring faster response times. 385 | 386 | --- 387 | 388 | ### Dockerized Version 389 | 390 | A bash script is included in this repo, `setup_dockerized_app_on_fresh_machine.sh`, that will automatically do everything for you, including installing docker with apt install. 391 | 392 | To use it, first make the script executable and then run it like this: 393 | 394 | ```bash 395 | chmod +x setup_dockerized_app_on_fresh_machine.sh 396 | sudo ./setup_dockerized_app_on_fresh_machine.sh 397 | ``` 398 | 399 | If you prefer a manual setup, then read the following instructions: 400 | 401 | #### Prerequisites 402 | 403 | Ensure that you have Docker installed on your system. If not, follow these steps to install Docker on Ubuntu: 404 | 405 | ```bash 406 | sudo apt-get update 407 | sudo apt-get install docker.io 408 | sudo systemctl start docker 409 | sudo docker --version 410 | sudo usermod -aG docker $USER 411 | ``` 412 | 413 | You may need to log out and log back in or restart your system to apply the new group permissions, or use sudo in the following steps to build and run the container. 414 | 415 | #### Setup and Running the Application 416 | 417 | 1. **Clone the Repository:** 418 | 419 | Clone the Swiss Army Llama repository to your local machine: 420 | 421 | ```bash 422 | git clone https://github.com/Dicklesworthstone/swiss_army_llama 423 | cd swiss_army_llama 424 | ``` 425 | 426 | 2. **Build the Docker Image:** 427 | 428 | Build the Docker image using the provided Dockerfile: 429 | 430 | ```bash 431 | sudo docker build -t llama-embeddings . 432 | ``` 433 | 434 | 3. **Run the Docker Container:** 435 | 436 | Run the Docker container, mapping the container's port 8089 to the host's port 8089: 437 | 438 | ```bash 439 | sudo docker run -p 8089:8089 llama-embeddings 440 | ``` 441 | 442 | 4. **Accessing the Application:** 443 | 444 | The FastAPI application will now be accessible at `http://localhost:8089` or at the static IP address of your VPS instance if you're running on one (You can get a 10-core, 30gb RAM, 1tb SSD with a static IP running Ubuntu 22.04 at Contabo for around $30/month, which is the cheapest I've found so far). 445 | 446 | You can interact then with the API using tools like `curl` or by accessing the FastAPI documentation at `http://localhost:8089/docs`. 447 | 448 | 5. **Viewing Logs:** 449 | 450 | Logs from the application can be viewed directly in the terminal where you ran the `docker run` command. 451 | 452 | #### Stopping and Managing the Container 453 | 454 | - To stop the running container, press `Ctrl+C` in the terminal or find the container ID using `docker ps` and run `sudo docker stop `. 455 | - To remove the built image, use `sudo docker rmi llama-embeddings`. 456 | 457 | --- 458 | 459 | ## Startup Procedures 460 | 461 | During startup, the application performs the following tasks: 462 | 463 | 1. **Database Initialization**: 464 | - The application initializes the SQLite database, setting up tables and executing important PRAGMAs to optimize performance. 465 | - Some of the important SQLite PRAGMAs include setting the database to use Write-Ahead Logging (WAL) mode, setting synchronous mode to NORMAL, increasing cache size to 1GB, setting the busy timeout to 2 seconds, and setting the WAL autocheckpoint to 100. 466 | 2. **Initialize Database Writer**: 467 | - A dedicated database writer (`DatabaseWriter`) is initialized with a dedicated asynchronous queue to handle the write operations. 468 | - A set of hashes is created which represents the operations that are currently being processed or have already been processed. This avoids any duplicate operations in the queue. 469 | 3. **RAM Disk Setup**: 470 | - If the `USE_RAMDISK` variable is enabled and the user has the required permissions, the application sets up a RAM Disk. 471 | - The application checks if there's already a RAM Disk set up at the specified path, if not, it calculates the optimal size for the RAM Disk and sets it up. 472 | - If the RAM Disk is enabled but the user lacks the required permissions, the RAM Disk feature is disabled and the application proceeds without it. 473 | 4. **Model Downloads**: 474 | - The application downloads the required models. 475 | 5. **Model Loading**: 476 | - Each downloaded model is loaded into memory. If any model file is not found, an error log is recorded. 477 | 6. **Build FAISS Indexes**: 478 | - The application creates FAISS indexes for efficient similarity search using the embeddings from the database. 479 | - Associated texts are stored by model name for further use. 480 | 481 | Note: 482 | - If the RAM Disk feature is enabled but the user lacks the required permissions, the application will disable the RAM Disk feature and proceed without it. 483 | - For any database operations, if the database is locked, the application will attempt to retry the operation a few times with an exponential backoff and a jitter. 484 | 485 | --- 486 | 487 | ## Endpoint Functionality and Workflow Overview 488 | 489 | Here's a detailed breakdown of the main endpoints provided by the FastAPI server, explaining their functionality, input parameters, and how they interact with underlying models and systems: 490 | 491 | ### 1. `/get_embedding_vector_for_string/` (POST) 492 | 493 | #### Purpose 494 | Retrieve the embedding vector for a given input text string using the specified model. 495 | 496 | #### Parameters 497 | - `text`: The input text for which the embedding vector is to be retrieved. 498 | - `model_name`: The model used to calculate the embedding (optional, will use the default model if not provided). 499 | - `token`: Security token (optional). 500 | - `client_ip`: Client IP address (optional). 501 | 502 | #### Workflow 503 | 1. **Retrieve Embedding**: The function retrieves or computes the embedding vector for the provided text using the specified or default model. 504 | 2. **Return Result**: The embedding vector for the input text string is returned in the response. 505 | 506 | ### 2. `/compute_similarity_between_strings/` (POST) 507 | 508 | #### Purpose 509 | Compute the similarity between two given input strings using specified model embeddings and a selected similarity measure. 510 | 511 | #### Parameters 512 | - `text1`: The first input text. 513 | - `text2`: The second input text. 514 | - `llm_model_name`: The model used to calculate embeddings (optional). 515 | - `similarity_measure`: The similarity measure to be used. Supported measures include `all`, `spearman_rho`, `kendall_tau`, `approximate_distance_correlation`, `jensen_shannon_dependency_measure`, and `hoeffding_d` (optional, default is `all`). 516 | 517 | #### Workflow 518 | 1. **Retrieve Embeddings**: The embeddings for `text1` and `text2` are retrieved or computed using the specified or default model. 519 | 2. **Compute Similarity**: The similarity between the two embeddings is calculated using the specified similarity measure. 520 | 3. **Return Result**: The similarity score, along with the embeddings and input texts, is returned in the response. 521 | 522 | ### 3. `/search_stored_embeddings_with_query_string_for_semantic_similarity/` (POST) 523 | 524 | #### Purpose 525 | Find the most similar strings in the database to the given input "query" text. This endpoint uses a pre-computed FAISS index to quickly search for the closest matching strings. 526 | 527 | #### Parameters 528 | - `query_text`: The input text for which to find the most similar string. 529 | - `model_name`: The model used to calculate embeddings. 530 | - `number_of_most_similar_strings_to_return`: (Optional) The number of most similar strings to return, defaults to 10. 531 | - `token`: Security token (optional). 532 | 533 | #### Workflow 534 | 1. **Search FAISS Index**: The FAISS index, built on stored embeddings, is searched to find the most similar embeddings to the `query_text`. 535 | 2. **Return Result**: The most similar strings found in the database, along with the similarity scores, are returned in the response. 536 | 537 | ### 4. `/advanced_search_stored_embeddings_with_query_string_for_semantic_similarity/` (POST) 538 | 539 | #### Purpose 540 | Performs a two-step advanced semantic search. Utilizes FAISS and cosine similarity for initial filtering, followed by additional similarity measures for refined comparisons. 541 | 542 | #### Parameters 543 | - `query_text`: The input text for which to find the most similar strings. 544 | - `llm_model_name`: The model used to calculate embeddings. 545 | - `similarity_filter_percentage`: (Optional) Percentage of embeddings to filter based on cosine similarity; defaults to 0.02 (i.e., top 2%). 546 | - `number_of_most_similar_strings_to_return`: (Optional) Number of most similar strings to return after second similarity measure; defaults to 10. 547 | 548 | #### Workflow 549 | 1. **Initial Filtering**: Use FAISS and cosine similarity to find a set of similar strings. 550 | 2. **Refined Comparison**: Apply additional similarity measures to the filtered set. 551 | 3. **Return Result**: Return the most similar strings along with their multiple similarity scores. 552 | 553 | #### Example Request 554 | ```json 555 | { 556 | "query_text": "Find me the most similar string!", 557 | "llm_model_name": "openchat_v3.2_super", 558 | "similarity_filter_percentage": 0.02, 559 | "number_of_most_similar_strings_to_return": 5 560 | } 561 | ``` 562 | 563 | ### 5. `/get_all_embedding_vectors_for_document/` (POST) 564 | 565 | #### Purpose 566 | Extract text embeddings for a document. The library now supports a wide range of file types including plain text, .doc/.docx, PDF files, images (using Tesseract OCR), and many other types supported by the `textract` library. 567 | 568 | #### Parameters 569 | - `file`: The uploaded document file (either plain text, .doc/.docx, PDF, etc.). 570 | - `llm_model_name`: (Optional) The model used to calculate embeddings. 571 | - `json_format`: (Optional) The format of the JSON response. 572 | - `send_back_json_or_zip_file`: Whether to return a JSON file or a ZIP file containing the embeddings file (optional, defaults to `zip`). 573 | - `token`: Security token (optional). 574 | 575 | ### 6. `/compute_transcript_with_whisper_from_audio/` (POST) 576 | 577 | #### Purpose 578 | Transcribe an audio file and optionally compute document embeddings for the resulting transcript. This endpoint uses the Whisper model for transcription and a language model for generating embeddings. The transcription and embeddings can then be stored, and a ZIP file containing the embeddings can be made available for download. 579 | 580 | #### Parameters 581 | - `file`: The audio file that you need to upload for transcription. 582 | - `compute_embeddings_for_resulting_transcript_document`: Boolean to indicate whether document embeddings should be computed (optional, defaults to False). 583 | - `llm_model_name`: The language model used for computing embeddings (optional, defaults to the default model name). 584 | - `req`: HTTP request object for additional request metadata (optional). 585 | - `token`: Security token (optional). 586 | - `client_ip`: Client IP address (optional). 587 | 588 | #### Request File and Parameters 589 | You will need to use a multipart/form-data request to upload the audio file. The additional parameters like `compute_embeddings_for_resulting_transcript_document` and `llm_model_name` can be sent along as form fields. 590 | 591 | #### Example Request 592 | ```bash 593 | curl -X 'POST' \ 594 | 'http://localhost:8000/compute_transcript_with_whisper_from_audio/' \ 595 | -H 'accept: application/json' \ 596 | -H 'Authorization: Bearer YOUR_ACCESS_TOKEN' \ 597 | -F 'file=@your_audio_file.wav' \ 598 | -F 'compute_embeddings_for_resulting_transcript_document=true' \ 599 | -F 'llm_model_name=custom-llm-model' 600 | ``` 601 | 602 | ### 7. `/get_text_completions_from_input_prompt/` (POST) 603 | 604 | #### Purpose 605 | Generate text completions for a given input prompt using the specified model. 606 | 607 | #### Parameters 608 | - `request`: A JSON object containing various options like `input_prompt`, `llm_model_name`, etc. 609 | - `token`: Security token (optional). 610 | - `req`: HTTP request object (optional). 611 | - `client_ip`: Client IP address (optional). 612 | 613 | #### Request JSON Format 614 | The JSON object should have the following keys: 615 | - `input_prompt` 616 | - `llm_model_name` 617 | - `temperature` 618 | - `grammar_file_string` 619 | - `number_of_completions_to_generate` 620 | - `number_of_tokens_to_generate` 621 | 622 | #### Example Request 623 | ```json 624 | { 625 | "input_prompt": "The Kings of France in the 17th Century:", 626 | "llm_model_name": "phind-codellama-34b-python-v1", 627 | "temperature": 0.95, 628 | "grammar_file_string": "json", 629 | "number_of_tokens_to_generate": 500, 630 | "number_of_completions_to_generate": 3 631 | } 632 | ``` 633 | 634 | ### 8. `/get_list_of_available_model_names/` (GET) 635 | 636 | #### Purpose 637 | Retrieve the list of available model names for generating embeddings. 638 | 639 | #### Parameters 640 | - `token`: Security token (optional). 641 | 642 | ### 9. `/get_all_stored_strings/` (GET) 643 | 644 | #### Purpose 645 | Retrieve a list of all stored strings from the database for which embeddings have been computed. 646 | 647 | #### Parameters 648 | - `token`: Security token (optional). 649 | 650 | ### 10. `/get_all_stored_documents/` (GET) 651 | 652 | #### Purpose 653 | Retrieve a list of all stored documents from the database for which embeddings have been computed. 654 | 655 | #### Parameters 656 | - `token`: Security token (optional). 657 | 658 | ### 11. `/clear_ramdisk/` (POST) 659 | 660 | #### Purpose 661 | Clear the RAM Disk to free up memory. 662 | 663 | #### Parameters 664 | - `token`: Security token (optional). 665 | 666 | ### 12. `/download/{file_name}` (GET) 667 | 668 | #### Purpose 669 | Download a ZIP file containing document embeddings that were generated through the `/compute_transcript_with_whisper_from_audio/` endpoint. The URL for this download will be supplied in the JSON response of the audio file transcription endpoint. 670 | 671 | #### Parameters 672 | - `file_name`: The name of the ZIP file that you want to download. 673 | 674 | ### 13. `/add_new_model/` (POST) 675 | 676 | #### Purpose 677 | Submit a new model URL for download and use. The model must be in `.gguf` format and larger than 100 MB to ensure it's a valid model file. 678 | 679 | #### Parameters 680 | - `model_url`: The URL of the model weight file, which must end with `.gguf`. 681 | - `token`: Security token (optional). 682 | 683 | 684 | ### Token-Level Embedding Vector Pooling 685 | 686 | Pooling methods are designed to aggregate token-level embeddings, which are typically variable in length due to differing numbers of tokens in sentences or documents. By converting these token-level embeddings into a single, fixed-length vector, we ensure that each input text is represented consistently, regardless of its length. This fixed-length vector can then be used in various machine learning models that require inputs of a consistent size. 687 | 688 | The primary goal of these pooling methods is to retain as much useful information as possible from the original token-level embeddings while ensuring that the transformation is deterministic and does not distort the data. Each method achieves this by applying different statistical or mathematical techniques to summarize the token embeddings. 689 | 690 | #### Explanation of Pooling Methods 691 | 692 | 1. **SVD (Singular Value Decomposition)**: 693 | - **How it works**: Concatenates the first two singular vectors obtained from the SVD of the token embeddings matrix. 694 | - **Rationale**: SVD is a dimensionality reduction technique that captures the most important features of the data. Using the first two singular vectors provides a compact representation that retains significant information. 695 | 696 | 2. **SVD_First_Four**: 697 | - **How it works**: Uses the first four singular vectors obtained from the SVD of the token embeddings matrix. 698 | - **Rationale**: By using more singular vectors, this method captures more of the variance in the data, providing a richer representation while still reducing dimensionality. 699 | 700 | 3. **ICA (Independent Component Analysis)**: 701 | - **How it works**: Applies ICA to the embeddings matrix to find statistically independent components, then flattens the result. 702 | - **Rationale**: ICA is useful for identifying independent sources in the data, providing a representation that highlights these independent features. 703 | 704 | 4. **Factor_Analysis**: 705 | - **How it works**: Applies factor analysis to the embeddings matrix to identify underlying factors, then flattens the result. 706 | - **Rationale**: Factor analysis models the data in terms of latent factors, providing a summary that captures these underlying influences. 707 | 708 | 5. **Gaussian_Random_Projection**: 709 | - **How it works**: Applies Gaussian random projection to reduce the dimensionality of the embeddings, then flattens the result. 710 | - **Rationale**: This method provides a fast and efficient way to reduce dimensionality while preserving the pairwise distances between points, useful for large datasets. 711 | 712 | --- 713 | 714 | Thanks for your interest in my open-source project! I hope you find it useful. You might also find my commercial web apps useful, and I would really appreciate it if you checked them out: 715 | 716 | **[YoutubeTranscriptOptimizer.com](https://youtubetranscriptoptimizer.com)** makes it really quick and easy to paste in a YouTube video URL and have it automatically generate not just a really accurate direct transcription, but also a super polished and beautifully formatted written document that can be used independently of the video. 717 | 718 | The document basically sticks to the same material as discussed in the video, but it sounds much more like a real piece of writing and not just a transcript. It also lets you optionally generate quizzes based on the contents of the document, which can be either multiple choice or short-answer quizzes, and the multiple choice quizzes get turned into interactive HTML files that can be hosted and easily shared, where you can actually take the quiz and it will grade your answers and score the quiz for you. 719 | 720 | **[FixMyDocuments.com](https://fixmydocuments.com/)** lets you submit any kind of document— PDFs (including scanned PDFs that require OCR), MS Word and Powerpoint files, images, audio files (mp3, m4a, etc.) —and turn them into highly optimized versions in nice markdown formatting, from which HTML and PDF versions are automatically generated. Once converted, you can also edit them directly in the site using the built-in markdown editor, where it saves a running revision history and regenerates the PDF/HTML versions. 721 | 722 | In addition to just getting the optimized version of the document, you can also generate many other kinds of "derived documents" from the original: interactive multiple-choice quizzes that you can actually take and get graded on; slick looking presentation slides as PDF or HTML (using LaTeX and Reveal.js), an in-depth summary, a concept mind map (using Mermaid diagrams) and outline, custom lesson plans where you can select your target audience, a readability analysis and grade-level versions of your original document (good for simplifying concepts for students), Anki Flashcards that you can import directly into the Anki app or use on the site in a nice interface, and more. 723 | -------------------------------------------------------------------------------- /add_mount.sh: -------------------------------------------------------------------------------- 1 | sudo mkfs.ext4 /dev/nvme1n1 2 | sudo mount /dev/nvme1n1 /mnt/models 3 | sudo chown -R ubuntu:ubuntu /mnt/models 4 | -------------------------------------------------------------------------------- /database_functions.py: -------------------------------------------------------------------------------- 1 | from embeddings_data_models import Base, TextEmbedding, DocumentEmbedding, Document, AudioTranscript 2 | from logger_config import setup_logger 3 | import traceback 4 | import asyncio 5 | import random 6 | from sqlalchemy import select, update, UniqueConstraint, exists 7 | from sqlalchemy import text as sql_text 8 | from sqlalchemy.exc import SQLAlchemyError, OperationalError, IntegrityError 9 | from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession 10 | from sqlalchemy.orm import sessionmaker 11 | from decouple import config 12 | from datetime import datetime, timedelta 13 | 14 | logger = setup_logger() 15 | db_writer = None 16 | DATABASE_URL = "sqlite+aiosqlite:///swiss_army_llama.sqlite" 17 | MAX_RETRIES = config("MAX_RETRIES", default=3, cast=int) 18 | DB_WRITE_BATCH_SIZE = config("DB_WRITE_BATCH_SIZE", default=25, cast=int) 19 | RETRY_DELAY_BASE_SECONDS = config("RETRY_DELAY_BASE_SECONDS", default=1, cast=int) 20 | JITTER_FACTOR = config("JITTER_FACTOR", default=0.1, cast=float) 21 | TIME_IN_DAYS_BEFORE_RECORDS_ARE_PURGED = config("TIME_IN_DAYS_BEFORE_RECORDS_ARE_PURGED", default=2, cast=int) 22 | 23 | engine = create_async_engine(DATABASE_URL, echo=False, connect_args={"check_same_thread": False}) 24 | AsyncSessionLocal = sessionmaker(bind=engine, class_=AsyncSession, expire_on_commit=False, autoflush=False) 25 | 26 | async def consolidate_wal_data(): 27 | consolidate_command = "PRAGMA wal_checkpoint(FULL);" 28 | try: 29 | async with engine.begin() as conn: 30 | result = await conn.execute(sql_text(consolidate_command)) 31 | result_fetch = result.fetchone() 32 | return result_fetch 33 | except Exception as e: 34 | logger.error(f"Error during WAL consolidation: {e}") 35 | return None 36 | 37 | class DatabaseWriter: 38 | def __init__(self, queue): 39 | self.queue = queue 40 | self.processing_hashes = set() 41 | 42 | def _get_hash_from_operation(self, operation): 43 | if isinstance(operation, TextEmbedding): 44 | return f"{operation.embedding_hash}" 45 | elif isinstance(operation, DocumentEmbedding): 46 | return f"{operation.document_embedding_results_json_compressed_binary}" 47 | elif isinstance(operation, Document): 48 | return operation.document_hash 49 | elif isinstance(operation, AudioTranscript): 50 | return operation.audio_file_hash 51 | return None 52 | 53 | async def initialize_processing_hashes(self, chunk_size=1000): 54 | start_time = datetime.utcnow() 55 | async with AsyncSessionLocal() as session: 56 | queries = [ 57 | (select(TextEmbedding.embedding_hash), TextEmbedding), 58 | (select(DocumentEmbedding.document_embedding_results_json_compressed_binary), DocumentEmbedding), 59 | (select(Document.document_hash), Document), 60 | (select(AudioTranscript.audio_file_hash), AudioTranscript) 61 | ] 62 | for query, model_class in queries: 63 | offset = 0 64 | while True: 65 | result = await session.execute(query.limit(chunk_size).offset(offset)) 66 | rows = result.fetchall() 67 | if not rows: 68 | break 69 | for row in rows: 70 | if model_class == TextEmbedding: 71 | hash_with_model = row[0] 72 | elif model_class == DocumentEmbedding: 73 | hash_with_model = row[0] 74 | elif model_class == Document: 75 | hash_with_model = row[0] 76 | elif model_class == AudioTranscript: 77 | hash_with_model = row[0] 78 | self.processing_hashes.add(hash_with_model) 79 | offset += chunk_size 80 | end_time = datetime.utcnow() 81 | total_time = (end_time - start_time).total_seconds() 82 | if len(self.processing_hashes) > 0: 83 | logger.info(f"Finished initializing set of input hash/llm_model_name combinations that are either currently being processed or have already been processed. Set size: {len(self.processing_hashes)}; Took {total_time} seconds, for an average of {total_time / len(self.processing_hashes)} seconds per hash.") 84 | 85 | async def _record_exists(self, session, operation): 86 | model_class = type(operation) 87 | if model_class == TextEmbedding: 88 | return await session.execute(select(exists().where(TextEmbedding.embedding_hash == operation.embedding_hash))) 89 | elif model_class == DocumentEmbedding: 90 | return await session.execute(select(exists().where(DocumentEmbedding.document_embedding_results_json_compressed_binary == operation.document_embedding_results_json_compressed_binary))) 91 | elif model_class == Document: 92 | return await session.execute(select(exists().where(Document.document_hash == operation.document_hash))) 93 | elif model_class == AudioTranscript: 94 | return await session.execute(select(exists().where(AudioTranscript.audio_file_hash == operation.audio_file_hash))) 95 | return None 96 | 97 | async def dedicated_db_writer(self): 98 | while True: 99 | write_operations_batch = await self.queue.get() 100 | async with AsyncSessionLocal() as session: 101 | filtered_operations = [] 102 | try: 103 | if write_operations_batch: 104 | for write_operation in write_operations_batch: 105 | existing_record = await self._record_exists(session, write_operation) 106 | if not existing_record.scalar(): 107 | filtered_operations.append(write_operation) 108 | hash_value = self._get_hash_from_operation(write_operation) 109 | if hash_value: 110 | self.processing_hashes.add(hash_value) 111 | else: 112 | await self._update_existing_record(session, write_operation) 113 | if filtered_operations: 114 | await consolidate_wal_data() # Consolidate WAL before performing writes 115 | session.add_all(filtered_operations) 116 | await session.flush() # Flush to get the IDs 117 | await session.commit() 118 | for write_operation in filtered_operations: 119 | hash_to_remove = self._get_hash_from_operation(write_operation) 120 | if hash_to_remove is not None and hash_to_remove in self.processing_hashes: 121 | self.processing_hashes.remove(hash_to_remove) 122 | except IntegrityError as e: 123 | await self._handle_integrity_error(e, write_operation, session) 124 | except SQLAlchemyError as e: 125 | logger.error(f"Database error: {e}") 126 | await session.rollback() 127 | except Exception as e: 128 | tb = traceback.format_exc() 129 | logger.error(f"Unexpected error: {e}\n{tb}") 130 | await session.rollback() 131 | self.queue.task_done() 132 | 133 | async def _update_existing_record(self, session, operation): 134 | model_class = type(operation) 135 | primary_keys = [key.name for key in model_class.__table__.primary_key] 136 | unique_constraints = [c for c in model_class.__table__.constraints if isinstance(c, UniqueConstraint)] 137 | conditions = [] 138 | for constraint in unique_constraints: 139 | if set(constraint.columns.keys()).issubset(set(operation.__dict__.keys())): 140 | for col in constraint.columns.keys(): 141 | conditions.append(getattr(model_class, col) == getattr(operation, col)) 142 | break 143 | if not conditions: 144 | for pk in primary_keys: 145 | conditions.append(getattr(model_class, pk) == getattr(operation, pk)) 146 | values = {col: getattr(operation, col) for col in operation.__dict__.keys() if col in model_class.__table__.columns.keys()} 147 | stmt = update(model_class).where(*conditions).values(**values) 148 | await session.execute(stmt) 149 | await session.commit() 150 | 151 | async def _handle_integrity_error(self, e, write_operation, session): 152 | unique_constraint_msg = { 153 | TextEmbedding: "embeddings.embedding_hash", 154 | DocumentEmbedding: "document_embeddings.document_embedding_results_json_compressed_binary", 155 | Document: "documents.document_hash", 156 | AudioTranscript: "audio_transcripts.audio_file_hash" 157 | }.get(type(write_operation)) 158 | if unique_constraint_msg and unique_constraint_msg in str(e): 159 | logger.warning(f"Embedding already exists in the database for given input: {e}") 160 | await self._update_existing_record(session, write_operation) 161 | else: 162 | raise 163 | 164 | async def enqueue_write(self, write_operations): 165 | write_operations = [op for op in write_operations if self._get_hash_from_operation(op) not in self.processing_hashes] 166 | if not write_operations: 167 | return 168 | for op in write_operations: 169 | hash_value = self._get_hash_from_operation(op) 170 | if hash_value: 171 | self.processing_hashes.add(hash_value) 172 | await self.queue.put(write_operations) 173 | 174 | 175 | async def execute_with_retry(func, *args, **kwargs): 176 | retries = 0 177 | while retries < MAX_RETRIES: 178 | try: 179 | return await func(*args, **kwargs) 180 | except OperationalError as e: 181 | if 'database is locked' in str(e): 182 | retries += 1 183 | sleep_time = RETRY_DELAY_BASE_SECONDS * (2 ** retries) + (random.random() * JITTER_FACTOR) # Implementing exponential backoff with jitter 184 | logger.warning(f"Database is locked. Retrying ({retries}/{MAX_RETRIES})... Waiting for {sleep_time} seconds") 185 | await asyncio.sleep(sleep_time) 186 | else: 187 | raise 188 | raise OperationalError("Database is locked after multiple retries") 189 | 190 | async def initialize_db(use_verbose = 0): 191 | logger.info("Initializing database, creating tables, and setting SQLite PRAGMAs...") 192 | list_of_sqlite_pragma_strings = [ 193 | "PRAGMA journal_mode=WAL;", 194 | "PRAGMA synchronous = NORMAL;", 195 | "PRAGMA cache_size = -1048576;", 196 | "PRAGMA busy_timeout = 2000;", 197 | "PRAGMA wal_autocheckpoint = 100;" 198 | ] 199 | list_of_sqlite_pragma_justification_strings = [ 200 | "Set SQLite to use Write-Ahead Logging (WAL) mode (from default DELETE mode) so that reads and writes can occur simultaneously", 201 | "Set synchronous mode to NORMAL (from FULL) so that writes are not blocked by reads", 202 | "Set cache size to 1GB (from default 2MB) so that more data can be cached in memory and not read from disk; to make this 256MB, set it to -262144 instead", 203 | "Increase the busy timeout to 2 seconds so that the database waits", 204 | "Set the WAL autocheckpoint to 100 (from default 1000) so that the WAL file is checkpointed more frequently" 205 | ] 206 | assert len(list_of_sqlite_pragma_strings) == len(list_of_sqlite_pragma_justification_strings) 207 | async with engine.begin() as conn: 208 | for pragma_string in list_of_sqlite_pragma_strings: 209 | await conn.execute(sql_text(pragma_string)) 210 | if use_verbose: 211 | logger.info(f"Executed SQLite PRAGMA: {pragma_string}") 212 | logger.info(f"Justification: {list_of_sqlite_pragma_justification_strings[list_of_sqlite_pragma_strings.index(pragma_string)]}") 213 | try: 214 | await conn.run_sync(Base.metadata.create_all) # Create tables if they don't exist 215 | except Exception as e: # noqa: F841 216 | pass 217 | logger.info("Database initialization completed.") 218 | 219 | def get_db_writer() -> DatabaseWriter: 220 | return db_writer # Return the existing DatabaseWriter instance 221 | 222 | def delete_expired_rows(session_factory): 223 | async def async_delete_expired_rows(): 224 | async with session_factory() as session: 225 | expiration_time = datetime.utcnow() - timedelta(days=TIME_IN_DAYS_BEFORE_RECORDS_ARE_PURGED) 226 | models = [TextEmbedding, DocumentEmbedding, Document, AudioTranscript] 227 | for model in models: 228 | expired_rows = await session.execute( 229 | select(model).where(model.created_at < expiration_time) 230 | ) 231 | expired_rows = expired_rows.scalars().all() 232 | for row in expired_rows: 233 | await session.delete(row) 234 | await session.commit() 235 | return async_delete_expired_rows 236 | -------------------------------------------------------------------------------- /embeddings_data_models.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy import Column, String, Float, DateTime, Integer, UniqueConstraint, ForeignKey, LargeBinary 2 | from sqlalchemy.dialects.sqlite import JSON 3 | from sqlalchemy.orm import declarative_base, relationship 4 | from sqlalchemy.ext.declarative import declared_attr 5 | from hashlib import sha3_256 6 | from pydantic import BaseModel, field_validator 7 | from typing import List, Optional, Union, Dict 8 | from decouple import config 9 | from sqlalchemy import event 10 | from datetime import datetime 11 | 12 | Base = declarative_base() 13 | DEFAULT_MODEL_NAME = config("DEFAULT_MODEL_NAME", default="Meta-Llama-3-8B-Instruct.Q3_K_S", cast=str) 14 | DEFAULT_EMBEDDING_MODEL_NAME = config("DEFAULT_EMBEDDING_MODEL_NAME", default="nomic-embed-text-v1.5.Q6_K", cast=str) 15 | DEFAULT_MULTI_MODAL_MODEL_NAME = config("DEFAULT_MULTI_MODAL_MODEL_NAME", default="llava-llama-3-8b-v1_1-int4", cast=str) 16 | DEFAULT_MAX_COMPLETION_TOKENS = config("DEFAULT_MAX_COMPLETION_TOKENS", default=100, cast=int) 17 | DEFAULT_NUMBER_OF_COMPLETIONS_TO_GENERATE = config("DEFAULT_NUMBER_OF_COMPLETIONS_TO_GENERATE", default=4, cast=int) 18 | DEFAULT_COMPLETION_TEMPERATURE = config("DEFAULT_COMPLETION_TEMPERATURE", default=0.7, cast=float) 19 | DEFAULT_EMBEDDING_POOLING_METHOD = config("DEFAULT_EMBEDDING_POOLING_METHOD", default="mean", cast=str) 20 | 21 | class SerializerMixin: 22 | @declared_attr 23 | def __tablename__(cls): 24 | return cls.__name__.lower() 25 | 26 | def as_dict(self): 27 | return {c.key: getattr(self, c.key) for c in self.__table__.columns} 28 | 29 | class TextEmbedding(Base, SerializerMixin): 30 | __tablename__ = "embeddings" 31 | id = Column(Integer, primary_key=True, index=True) 32 | text = Column(String, index=True) 33 | text_hash = Column(String, index=True) 34 | embedding_pooling_method = Column(String, index=True) 35 | embedding_hash = Column(String, index=True) 36 | llm_model_name = Column(String, index=True) 37 | corpus_identifier_string = Column(String, index=True) 38 | embedding_json = Column(String) 39 | ip_address = Column(String) 40 | request_time = Column(DateTime) 41 | response_time = Column(DateTime) 42 | total_time = Column(Float) 43 | document_file_hash = Column(String, ForeignKey('document_embeddings.document_file_hash')) 44 | document = relationship("DocumentEmbedding", back_populates="embeddings", foreign_keys=[document_file_hash, corpus_identifier_string]) 45 | __table_args__ = (UniqueConstraint('embedding_hash', name='_embedding_hash_uc'),) 46 | 47 | class DocumentEmbedding(Base): 48 | __tablename__ = "document_embeddings" 49 | id = Column(Integer, primary_key=True, index=True) 50 | document_hash = Column(String, ForeignKey('documents.document_hash')) 51 | filename = Column(String) 52 | mimetype = Column(String) 53 | document_file_hash = Column(String, index=True) 54 | embedding_pooling_method = Column(String, index=True) 55 | llm_model_name = Column(String, index=True) 56 | corpus_identifier_string = Column(String, index=True) 57 | file_data = Column(LargeBinary) # To store the original file 58 | sentences = Column(String) 59 | document_embedding_results_json_compressed_binary = Column(LargeBinary) # To store the embedding results JSON 60 | ip_address = Column(String) 61 | request_time = Column(DateTime) 62 | response_time = Column(DateTime) 63 | total_time = Column(Float) 64 | embeddings = relationship("TextEmbedding", back_populates="document", foreign_keys=[TextEmbedding.document_file_hash]) 65 | __table_args__ = (UniqueConstraint('document_embedding_results_json_compressed_binary', name='_document_embedding_results_json_compressed_binary_uc'),) 66 | document = relationship("Document", back_populates="document_embeddings", foreign_keys=[document_hash]) 67 | 68 | class Document(Base): 69 | __tablename__ = "documents" 70 | id = Column(Integer, primary_key=True, index=True) 71 | llm_model_name = Column(String, index=True) 72 | corpus_identifier_string = Column(String, index=True) 73 | document_hash = Column(String, index=True) 74 | document_embeddings = relationship("DocumentEmbedding", back_populates="document", foreign_keys=[DocumentEmbedding.document_hash]) 75 | def update_hash(self): # Concatenate specific attributes from the document_embeddings relationship 76 | hash_data = "".join([emb.filename + emb.mimetype for emb in self.document_embeddings]) 77 | self.document_hash = sha3_256(hash_data.encode('utf-8')).hexdigest() 78 | @event.listens_for(Document.document_embeddings, 'append') 79 | def update_document_hash_on_append(target, value, initiator): 80 | target.update_hash() 81 | @event.listens_for(Document.document_embeddings, 'remove') 82 | def update_document_hash_on_remove(target, value, initiator): 83 | target.update_hash() 84 | 85 | # Request/Response models start here: 86 | 87 | class EmbeddingRequest(BaseModel): 88 | text: str = "" 89 | llm_model_name: str = DEFAULT_EMBEDDING_MODEL_NAME 90 | embedding_pooling_method: str = DEFAULT_EMBEDDING_POOLING_METHOD 91 | corpus_identifier_string: str = "" 92 | 93 | class SimilarityRequest(BaseModel): 94 | text1: str = "" 95 | text2: str = "" 96 | llm_model_name: str = DEFAULT_EMBEDDING_MODEL_NAME 97 | embedding_pooling_method: str = DEFAULT_EMBEDDING_POOLING_METHOD 98 | similarity_measure: str = "all" 99 | @field_validator('similarity_measure') 100 | def validate_similarity_measure(cls, value): 101 | valid_measures = ["all", "spearman_rho", "kendall_tau", "approximate_distance_correlation", "jensen_shannon_dependency_measure", "hoeffding_d"] 102 | if value.lower() not in valid_measures: 103 | raise ValueError(f"Invalid similarity measure. Supported measures are: {', '.join(valid_measures)}") 104 | return value.lower() 105 | 106 | class SemanticSearchRequest(BaseModel): 107 | query_text: str = "" 108 | number_of_most_similar_strings_to_return: int = 10 109 | llm_model_name: str = DEFAULT_EMBEDDING_MODEL_NAME 110 | embedding_pooling_method: str = DEFAULT_EMBEDDING_POOLING_METHOD 111 | corpus_identifier_string: str = "" 112 | 113 | class SemanticSearchResponse(BaseModel): 114 | query_text: str 115 | corpus_identifier_string: str 116 | embedding_pooling_method: str 117 | results: List[dict] # List of similar strings and their similarity scores using cosine similarity with Faiss (in descending order) 118 | 119 | class AdvancedSemanticSearchRequest(BaseModel): 120 | query_text: str = "" 121 | llm_model_name: str = DEFAULT_EMBEDDING_MODEL_NAME 122 | embedding_pooling_method: str = DEFAULT_EMBEDDING_POOLING_METHOD 123 | corpus_identifier_string: str = "" 124 | similarity_filter_percentage: float = 0.01 125 | number_of_most_similar_strings_to_return: int = 10 126 | result_sorting_metric: str = "hoeffding_d" 127 | @field_validator('result_sorting_metric') 128 | def validate_similarity_measure(cls, value): 129 | valid_measures = ["spearman_rho", "kendall_tau", "approximate_distance_correlation", "jensen_shannon_dependency_measure", "hoeffding_d"] 130 | if value.lower() not in valid_measures: 131 | raise ValueError(f"Invalid similarity measure. Supported measures are: {', '.join(valid_measures)}") 132 | return value.lower() 133 | 134 | class AdvancedSemanticSearchResponse(BaseModel): 135 | query_text: str 136 | corpus_identifier_string: str 137 | embedding_pooling_method: str 138 | results: List[Dict[str, Union[str, float, Dict[str, float]]]] 139 | 140 | class EmbeddingResponse(BaseModel): 141 | id: int 142 | text: str 143 | text_hash: str 144 | embedding_pooling_method: str 145 | embedding_hash: str 146 | llm_model_name: str 147 | corpus_identifier_string: str 148 | embedding_json: str 149 | ip_address: Optional[str] 150 | request_time: datetime 151 | response_time: datetime 152 | total_time: float 153 | document_file_hash: Optional[str] 154 | embedding: List[float] 155 | 156 | class SimilarityResponse(BaseModel): 157 | text1: str 158 | text2: str 159 | similarity_measure: str 160 | embedding_pooling_method: str 161 | similarity_score: Union[float, Dict[str, float]] # Now can be either a float or a dictionary 162 | embedding1: List[float] 163 | embedding2: List[float] 164 | 165 | class AllStringsResponse(BaseModel): 166 | strings: List[str] 167 | 168 | class AllDocumentsResponse(BaseModel): 169 | documents: List[str] 170 | 171 | class TextCompletionRequest(BaseModel): 172 | input_prompt: str = "" 173 | llm_model_name: str = DEFAULT_MODEL_NAME 174 | temperature: float = DEFAULT_COMPLETION_TEMPERATURE 175 | grammar_file_string: str = "" 176 | number_of_tokens_to_generate: int = DEFAULT_MAX_COMPLETION_TOKENS 177 | number_of_completions_to_generate: int = DEFAULT_NUMBER_OF_COMPLETIONS_TO_GENERATE 178 | 179 | class TextCompletionResponse(BaseModel): 180 | input_prompt: str 181 | llm_model_name: str 182 | grammar_file_string: str 183 | number_of_tokens_to_generate: int 184 | number_of_completions_to_generate: int 185 | time_taken_in_seconds: float 186 | generated_text: str 187 | finish_reason: str 188 | llm_model_usage_json: str 189 | 190 | class ImageQuestionResponse(BaseModel): 191 | question: str 192 | llm_model_name: str 193 | image_hash: str 194 | time_taken_in_seconds: float 195 | number_of_tokens_to_generate: int 196 | number_of_completions_to_generate: int 197 | time_taken_in_seconds: float 198 | generated_text: str 199 | finish_reason: str 200 | llm_model_usage_json: str 201 | 202 | class AudioTranscript(Base): 203 | __tablename__ = "audio_transcripts" 204 | audio_file_hash = Column(String, primary_key=True, index=True) 205 | audio_file_name = Column(String, index=True) 206 | audio_file_size_mb = Column(Float) # File size in MB 207 | segments_json = Column(JSON) # Transcribed segments as JSON 208 | combined_transcript_text = Column(String) 209 | combined_transcript_text_list_of_metadata_dicts = Column(JSON) 210 | info_json = Column(JSON) # Transcription info as JSON 211 | ip_address = Column(String) 212 | request_time = Column(DateTime) 213 | response_time = Column(DateTime) 214 | total_time = Column(Float) 215 | corpus_identifier_string = Column(String, index=True) 216 | 217 | class AudioTranscriptResponse(BaseModel): 218 | audio_file_hash: str 219 | audio_file_name: str 220 | audio_file_size_mb: float 221 | segments_json: List[dict] 222 | combined_transcript_text: str 223 | combined_transcript_text_list_of_metadata_dicts: List[dict] 224 | info_json: dict 225 | url_to_download_zip_file_of_embeddings: str 226 | ip_address: str 227 | request_time: datetime 228 | response_time: datetime 229 | total_time: float 230 | url_to_download_zip_file_of_embeddings: str 231 | llm_model_name: str 232 | embedding_pooling_method: str 233 | corpus_identifier_string: str 234 | 235 | class ShowLogsIncrementalModel(BaseModel): 236 | logs: str 237 | last_position: int 238 | 239 | class AddGrammarRequest(BaseModel): 240 | bnf_grammar: str 241 | grammar_file_name: str 242 | 243 | class AddGrammarResponse(BaseModel): 244 | valid_grammar_files: List[str] 245 | -------------------------------------------------------------------------------- /end_to_end_tests.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import httpx 3 | import json 4 | import os 5 | import time 6 | from decouple import config 7 | from typing import List, Dict, Any 8 | 9 | LLAMA_EMBEDDING_SERVER_LISTEN_PORT = config("LLAMA_EMBEDDING_SERVER_LISTEN_PORT", default=8089, cast=int) 10 | 11 | BASE_URL = f"http://localhost:{LLAMA_EMBEDDING_SERVER_LISTEN_PORT}" 12 | DOCUMENT_PATH = "sample_input_files_for_end_to_end_tests/tale_two_cities_first_3_chapters.txt" 13 | DOCUMENT_PATH_PDF = "sample_input_files_for_end_to_end_tests/bh-us-03-sassaman-conference-slides.pdf" 14 | IMAGE_PATH = "sample_input_files_for_end_to_end_tests/sunset.jpg" 15 | AUDIO_PATH = "sample_input_files_for_end_to_end_tests/Don_King_if_he_lived_in_the_tiny_island_nation_known_as_Japan.mp3" 16 | TEXT_PROMPT = "Make up a poem about Bitcoin in the style of John Donne's 'The Canonization'." 17 | CORPUS_IDENTIFIER_STRING = "end_to_end_test" 18 | SEARCH_STRING = "equine" 19 | SEARCH_STRING_PDF = "Threat model" 20 | HTTPX_TIMEOUT_IN_SECONDS = 600 21 | USE_MANUAL_MODEL_NAME_LIST = 1 22 | MANUAL_MODEL_NAME_LIST = ["Meta-Llama-3-8B-Instruct.Q3_K_S"] 23 | 24 | async def get_model_names() -> List[str]: 25 | print("Requesting list of available model names...") 26 | async with httpx.AsyncClient(timeout=HTTPX_TIMEOUT_IN_SECONDS) as client: 27 | response = await client.get(f"{BASE_URL}/get_list_of_available_model_names/") 28 | model_names = response.json()["model_names"] 29 | print(f"Received model names: {model_names}") 30 | return [name for name in model_names if "llava" not in name] 31 | 32 | async def get_embedding_pooling_methods() -> List[str]: 33 | pooling_methods = ['meam', 'mins_maxes', 'svd', 'svd_first_four', 'ica', 'factor_analysis', 'gaussian_random_projection'] 34 | print(f"Using embedding pooling methods: {pooling_methods}") 35 | return pooling_methods 36 | 37 | async def compute_document_embeddings(model_name: str, embedding_pooling_method: str) -> float: 38 | print(f"Reading document from {DOCUMENT_PATH} for model {model_name} with pooling method {embedding_pooling_method}...") 39 | with open(os.path.expanduser(DOCUMENT_PATH), "rb") as file: 40 | start_time = time.time() 41 | async with httpx.AsyncClient(timeout=HTTPX_TIMEOUT_IN_SECONDS) as client: 42 | print(f"Sending request to compute document embeddings with model {model_name} and pooling method {embedding_pooling_method}...") 43 | url = ( 44 | f"{BASE_URL}/get_all_embedding_vectors_for_document/" 45 | f"?llm_model_name={model_name}" 46 | f"&embedding_pooling_method={embedding_pooling_method}" 47 | f"&corpus_identifier_string={CORPUS_IDENTIFIER_STRING}" 48 | f"&json_format=records" 49 | f"&send_back_json_or_zip_file=zip" 50 | ) 51 | response = await client.post( 52 | url, 53 | files={"file": ("document.txt", file, "text/plain")}, 54 | data={ 55 | "url": "", 56 | "hash": "", 57 | "size": "", 58 | } 59 | ) 60 | print(f"Request sent with embedding_pooling_method: {embedding_pooling_method}. Status code: {response.status_code}") 61 | response_json = response.json() 62 | print(f"Server response received: {response_json}") 63 | end_time = time.time() 64 | elapsed_time = end_time - start_time 65 | print(f"Document embeddings computed in {elapsed_time:.2f} seconds with pooling method {embedding_pooling_method}.") 66 | return elapsed_time 67 | 68 | async def perform_semantic_search(model_name: str, embedding_pooling_method: str) -> Dict[str, Any]: 69 | print(f"Performing semantic search for model {model_name} with pooling method {embedding_pooling_method}...") 70 | try: 71 | async with httpx.AsyncClient(timeout=HTTPX_TIMEOUT_IN_SECONDS) as client: 72 | response = await client.post( 73 | f"{BASE_URL}/search_stored_embeddings_with_query_string_for_semantic_similarity/", 74 | json={ 75 | "query_text": SEARCH_STRING, 76 | "number_of_most_similar_strings_to_return": 10, 77 | "llm_model_name": model_name, 78 | "embedding_pooling_method": embedding_pooling_method, 79 | "corpus_identifier_string": CORPUS_IDENTIFIER_STRING, 80 | } 81 | ) 82 | response.raise_for_status() # Raise an exception for HTTP errors 83 | search_results = response.json() 84 | print(f"Semantic search completed. Results: {search_results}") 85 | return search_results 86 | except httpx.HTTPStatusError as e: 87 | print(f"HTTP error occurred: {e.response.status_code} - {e.response.text}") 88 | return {"error": f"HTTP error occurred: {e.response.status_code}"} 89 | except Exception as e: 90 | print(f"An error occurred: {str(e)}") 91 | return {"error": str(e)} 92 | 93 | async def perform_advanced_semantic_search(model_name: str, embedding_pooling_method: str) -> Dict[str, Any]: 94 | print(f"Performing advanced semantic search for model {model_name} with pooling method {embedding_pooling_method}...") 95 | try: 96 | async with httpx.AsyncClient(timeout=HTTPX_TIMEOUT_IN_SECONDS) as client: 97 | response = await client.post( 98 | f"{BASE_URL}/advanced_search_stored_embeddings_with_query_string_for_semantic_similarity/", 99 | json={ 100 | "query_text": SEARCH_STRING, 101 | "llm_model_name": model_name, 102 | "embedding_pooling_method": embedding_pooling_method, 103 | "corpus_identifier_string": CORPUS_IDENTIFIER_STRING, 104 | "similarity_filter_percentage": 0.01, 105 | "number_of_most_similar_strings_to_return": 10, 106 | "result_sorting_metric": "hoeffding_d" 107 | } 108 | ) 109 | response.raise_for_status() # Raise an exception for HTTP errors 110 | advanced_search_results = response.json() 111 | print(f"Advanced semantic search completed. Results: {advanced_search_results}") 112 | return advanced_search_results 113 | except httpx.HTTPStatusError as e: 114 | print(f"HTTP error occurred: {e.response.status_code} - {e.response.text}") 115 | return {"error": f"HTTP error occurred: {e.response.status_code}"} 116 | except Exception as e: 117 | print(f"An error occurred: {str(e)}") 118 | return {"error": str(e)} 119 | 120 | async def generate_text_completion(input_prompt: str, model_name: str) -> Dict[str, Any]: 121 | print(f"Generating text completion for model {model_name} with prompt '{input_prompt}'...") 122 | async with httpx.AsyncClient(timeout=HTTPX_TIMEOUT_IN_SECONDS) as client: 123 | response = await client.post( 124 | f"{BASE_URL}/get_text_completions_from_input_prompt/", 125 | json={ 126 | "input_prompt": input_prompt, 127 | "llm_model_name": model_name, 128 | "temperature": 0.7, 129 | "number_of_completions_to_generate": 1, 130 | "number_of_tokens_to_generate": 150 131 | } 132 | ) 133 | completion_results = response.json() 134 | print(f"Text completion generated. Results: {completion_results}") 135 | return completion_results 136 | 137 | async def ask_question_about_image(image_path: str, question: str, model_name: str) -> Dict[str, Any]: 138 | print(f"Asking question '{question}' about image at {image_path} with model {model_name}...") 139 | with open(os.path.expanduser(image_path), "rb") as file: 140 | async with httpx.AsyncClient(timeout=HTTPX_TIMEOUT_IN_SECONDS) as client: 141 | response = await client.post( 142 | f"{BASE_URL}/ask_question_about_image/", 143 | files={"image": file}, 144 | data={ 145 | "question": question, 146 | "llm_model_name": model_name, 147 | "temperature": 0.7, 148 | "number_of_tokens_to_generate": 256, 149 | "number_of_completions_to_generate": 1 150 | } 151 | ) 152 | image_question_results = response.json() 153 | print(f"Question about image answered. Results: {image_question_results}") 154 | return image_question_results 155 | 156 | async def compute_transcript_with_whisper(audio_path: str) -> Dict[str, Any]: 157 | print(f"Computing transcript for audio file at {audio_path}...") 158 | with open(os.path.expanduser(audio_path), "rb") as file: 159 | async with httpx.AsyncClient(timeout=HTTPX_TIMEOUT_IN_SECONDS) as client: 160 | response = await client.post( 161 | f"{BASE_URL}/compute_transcript_with_whisper_from_audio/", 162 | files={"file": file}, 163 | data={ 164 | "compute_embeddings_for_resulting_transcript_document": True, 165 | "llm_model_name": config("DEFAULT_MODEL_NAME", default="Meta-Llama-3-8B-Instruct.Q3_K_S"), 166 | "embedding_pooling_method": "svd", 167 | "corpus_identifier_string": CORPUS_IDENTIFIER_STRING 168 | } 169 | ) 170 | transcript_results = response.json() 171 | print(f"Transcript computed. Results: {transcript_results}") 172 | return transcript_results 173 | 174 | async def main(): 175 | start_time = time.time() 176 | print("Starting the main async process...") 177 | 178 | if USE_MANUAL_MODEL_NAME_LIST: 179 | model_names = MANUAL_MODEL_NAME_LIST 180 | else: 181 | model_names = await get_model_names() 182 | embedding_pooling_methods = await get_embedding_pooling_methods() 183 | 184 | results = {} 185 | for model_name in model_names: 186 | for embedding_pooling_method in embedding_pooling_methods: 187 | print(f"\n{'_'*100}\n") 188 | print(f"Computing embeddings for model {model_name} and pooling method {embedding_pooling_method}...") 189 | total_time = await compute_document_embeddings(model_name, embedding_pooling_method) 190 | print(f"Embeddings computed in {total_time:.2f} seconds.") 191 | results[(model_name, embedding_pooling_method)] = total_time 192 | 193 | for model_name, embedding_pooling_method in results: 194 | print(f"\n{'_'*100}\n") 195 | print(f"Performing semantic search for model {model_name} and pooling method {embedding_pooling_method}...") 196 | search_results = await perform_semantic_search(model_name, embedding_pooling_method) 197 | saved_outputs_dir = "saved_outputs" 198 | if not os.path.exists(saved_outputs_dir): 199 | os.makedirs(saved_outputs_dir) 200 | filename = f"{model_name}_{embedding_pooling_method}_search_results.json" 201 | file_path = os.path.join(saved_outputs_dir, filename) 202 | with open(file_path, "w") as f: 203 | json.dump(search_results, f, indent=2) 204 | print(f"Search results saved to {file_path}.") 205 | 206 | print(f"Performing advanced semantic search for model {model_name} and pooling method {embedding_pooling_method}...") 207 | advanced_search_results = await perform_advanced_semantic_search(model_name, embedding_pooling_method) 208 | advanced_filename = f"{model_name}_{embedding_pooling_method}_advanced_search_results.json" 209 | advanced_file_path = os.path.join(saved_outputs_dir, advanced_filename) 210 | with open(advanced_file_path, "w") as f: 211 | json.dump(advanced_search_results, f, indent=2) 212 | print(f"Advanced search results saved to {advanced_file_path}.") 213 | 214 | # Test text completion 215 | for model_name in model_names: 216 | print(f"\n{'_'*100}\n") 217 | print(f"Generating text completion for model {model_name}...") 218 | completion_results = await generate_text_completion(TEXT_PROMPT, model_name) 219 | completion_file = f"{model_name}_text_completion.json" 220 | completion_file_path = os.path.join(saved_outputs_dir, completion_file) 221 | with open(completion_file_path, "w") as f: 222 | json.dump(completion_results, f, indent=2) 223 | print(f"Text completion results saved to {completion_file_path}.") 224 | 225 | # Test image question 226 | print(f"\n{'_'*100}\n") 227 | image_question_model_name = config("DEFAULT_MULTI_MODAL_MODEL_NAME", default="llava-llama-3-8b-v1_1-int4") 228 | print(f"Asking question about image with model {image_question_model_name}...") 229 | image_question_results = await ask_question_about_image(IMAGE_PATH, "What is happening in this image?", image_question_model_name) 230 | image_question_file = f"{image_question_model_name}_image_question.json" 231 | image_question_file_path = os.path.join(saved_outputs_dir, image_question_file) 232 | with open(image_question_file_path, "w") as f: 233 | json.dump(image_question_results, f, indent=2) 234 | print(f"Image question results saved to {image_question_file_path}.") 235 | 236 | # Test Whisper transcript 237 | print(f"\n{'_'*100}\n") 238 | print(f"Computing transcript with Whisper for audio file {AUDIO_PATH}...") 239 | transcript_results = await compute_transcript_with_whisper(AUDIO_PATH) 240 | transcript_file = "whisper_transcript.json" 241 | transcript_file_path = os.path.join(saved_outputs_dir, transcript_file) 242 | with open(transcript_file_path, "w") as f: 243 | json.dump(transcript_results, f, indent=2) 244 | print(f"Whisper transcript results saved to {transcript_file_path}.") 245 | 246 | end_time = time.time() 247 | print(f"\n{'_'*100}\n") 248 | print(f"All tests completed in {end_time - start_time:.2f} seconds.") 249 | 250 | if __name__ == "__main__": 251 | asyncio.run(main()) 252 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: swiss_army_llama_service_environment 2 | channels: 3 | - conda-forge 4 | - defaults 5 | dependencies: 6 | - python=3.11 7 | - aioredis 8 | - aioredlock 9 | - aiosqlite 10 | - apscheduler 11 | - faiss-cpu 12 | - fast_vector_similarity 13 | - fastapi 14 | - faster-whisper 15 | - filelock 16 | - httpx 17 | - llama-cpp-python 18 | - magika 19 | - mutagen 20 | - nvgpu 21 | - pandas 22 | - pillow 23 | - psutil 24 | - pydantic 25 | - PyPDF2 26 | - pytest 27 | - python-decouple 28 | - python-multipart 29 | - pytz 30 | - redis 31 | - ruff 32 | - scipy 33 | - scikit-learn 34 | - sqlalchemy 35 | - textract-py3 36 | - uvicorn 37 | - uvloop 38 | - zstandard -------------------------------------------------------------------------------- /grammar_builder.py: -------------------------------------------------------------------------------- 1 | from service_functions import validate_bnf_grammar_func 2 | from typing import List, Dict 3 | import json 4 | 5 | use_grammarbuilder_demo = 0 6 | 7 | def normalize_json(json_str): 8 | output = [] 9 | in_string = False 10 | escape_char = False 11 | for char in json_str: 12 | if char == "\\" and not escape_char: 13 | escape_char = True 14 | output.append(char) 15 | continue 16 | if char == '"' and not escape_char: 17 | in_string = not in_string 18 | if in_string: 19 | output.append(char) 20 | else: 21 | if char.strip(): 22 | output.append(char) 23 | if escape_char: 24 | escape_char = False 25 | return ''.join(output) 26 | 27 | class GrammarBuilder: 28 | type_to_bnf: Dict[str, str] = { 29 | "str": "string", 30 | "float": "number", 31 | "int": "number", 32 | "bool": "bool", 33 | "datetime": "datetime", 34 | "List": "list", 35 | "Dict": "dict", 36 | "Optional": "optional" 37 | } 38 | 39 | def __init__(self): 40 | self.rules = { 41 | "ws": "([ \\t\\n] ws)?", 42 | "string": '\\" ([^"\\\\] | "\\\\" (["\\\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]))* \\" ws', 43 | "number": '("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws', 44 | "bool": "('true' | 'false') ws", 45 | "datetime": "string", 46 | "dict": "'{' ws dict_pair_list ws '}' ws", 47 | "dict_pair_list": "dict_pair (',' ws dict_pair)*", 48 | "dict_pair": "string ':' ws value ws", 49 | "list": "'[' ws list_items ws ']' ws", 50 | "list_items": "value (',' ws value)*" 51 | } 52 | 53 | 54 | def generate_bnf_from_fields(self, fields: List[str], parent="root") -> str: 55 | bnf = [] 56 | keys = ' | '.join([f'"{field.split(":")[0].strip()}"' for field in fields]) 57 | bnf.append(f"{parent} ::= '{{' ws {parent}_pair_list ws '}}' ws") 58 | bnf.append(f"{parent}_pair_list ::= {parent}_pair (',' ws {parent}_pair)*") 59 | bnf.append(f"{parent}_pair ::= allowed_keys_{parent} ':' ws value ws") 60 | bnf.append(f"allowed_keys_{parent} ::= {keys}") 61 | value_types = set() 62 | for field in fields: 63 | field_name, field_type = field.split(":") 64 | field_name, field_type = field_name.strip(), field_type.strip() 65 | parsed_type = self.type_to_bnf.get(field_type, field_type) 66 | if field_type.startswith("List"): 67 | parsed_type = "list" 68 | value_types.add(parsed_type) 69 | bnf.append(f"value ::= {' | '.join(value_types)}") 70 | return "\n".join(bnf) 71 | 72 | def pydantic_to_json_bnf(self, model_description: str) -> str: 73 | lines = model_description.strip().split('\n')[1:] 74 | fields = [line.strip() for line in lines if ':' in line] 75 | bnf_for_fields = self.generate_bnf_from_fields(fields) 76 | return f"{bnf_for_fields}\n{self.generate_base_rules()}" 77 | 78 | def generate_base_rules(self): 79 | return "\n".join([f"{key} ::= {value}" for key, value in self.rules.items()]) 80 | 81 | def generate_bnf(self, data, parent="root"): 82 | bnf = [] 83 | if isinstance(data, dict): 84 | keys = ' | '.join([f'\"{key}\"' for key in data.keys()]) 85 | bnf.append(f"{parent} ::= '{{' ws {parent}_pair_list ws '}}' ws") 86 | bnf.append(f"{parent}_pair_list ::= {parent}_pair (',' ws {parent}_pair)*") 87 | bnf.append(f"{parent}_pair ::= allowed_keys_{parent} ':' ws value ws") 88 | bnf.append(f"allowed_keys_{parent} ::= {keys}") 89 | sample_key = next(iter(data.keys())) 90 | if isinstance(data[sample_key], dict): 91 | bnf.append(f"value ::= {self.generate_bnf(data[sample_key], 'nested_value')}") 92 | elif isinstance(data, list): 93 | if len(data) > 0: 94 | sample_item = data[0] 95 | rule_name = f"{parent}_item" 96 | bnf.append(f"{parent} ::= '[' ws {rule_name} (',' ws {rule_name})* ']' ws") 97 | bnf.append(f"{rule_name} ::= {self.type_to_bnf.get(type(sample_item).__name__, type(sample_item).__name__)}") 98 | else: 99 | bnf.append(f"{parent} ::= '[' ws ']' ws") 100 | else: 101 | bnf.append(f"{parent} ::= {self.type_to_bnf.get(type(data).__name__, type(data).__name__)} ws") 102 | return "\n".join(bnf) 103 | 104 | def json_to_bnf(self, json_str): 105 | normalized_str = normalize_json(json_str) 106 | try: 107 | parsed_data = json.loads(normalized_str) 108 | except json.JSONDecodeError as e: 109 | return f"Invalid JSON: {e}" 110 | bnf_grammar = self.generate_bnf(parsed_data) 111 | return f"{bnf_grammar}\n{self.generate_base_rules()}" 112 | 113 | 114 | if use_grammarbuilder_demo: 115 | gb = GrammarBuilder() 116 | sample_json = ''' 117 | { 118 | "Optimistic": { 119 | "score": 70.0, 120 | "explanation": "The statement talks about secular industry tailwinds and expectations to grow the business at a rate exceeding global GDP." 121 | }, 122 | "Pessimistic": { 123 | "score": -20.0, 124 | "explanation": "The paragraph acknowledges that they've experienced equity losses year-to-date." 125 | }, 126 | "Confident": { 127 | "score": 60.0, 128 | "explanation": "The text shows belief in their people, platform, and their prospect of gaining market share." 129 | }, 130 | "Cautious": { 131 | "score": 40.0, 132 | "explanation": "Mentions the possibility of falling below the target margins but aims to stay within the range." 133 | }, 134 | "Transparent": { 135 | "score": 80.0, 136 | "explanation": "Provides clear information on financial outlook, including specifics about Adjusted EBITDA." 137 | }, 138 | "Vague": { 139 | "score": -80.0, 140 | "explanation": "The text is quite specific and does not evade details." 141 | }, 142 | "Upbeat": { 143 | "score": 20.0, 144 | "explanation": "The tone is more balanced and not overtly enthusiastic." 145 | }, 146 | "Disappointed": { 147 | "score": -10.0, 148 | "explanation": "Acknowledges equity losses but doesn't express dissatisfaction." 149 | }, 150 | "Reassuring": { 151 | "score": 50.0, 152 | "explanation": "Tries to reassure by focusing on core business and tailwinds." 153 | }, 154 | "Evasive": { 155 | "score": -100.0, 156 | "explanation": "No signs of avoiding any topics; quite straightforward." 157 | }, 158 | "Committed": { 159 | "score": 60.0, 160 | "explanation": "Shows dedication to running the core business within the stated margin." 161 | }, 162 | "Analytical": { 163 | "score": 70.0, 164 | "explanation": "Provides a breakdown of the financial situation and market conditions." 165 | }, 166 | "Ambitious": { 167 | "score": 50.0, 168 | "explanation": "Talks about exceeding global GDP growth." 169 | }, 170 | "Concerned": { 171 | "score": -10.0, 172 | "explanation": "Reflects worry about equity losses but not overly so." 173 | }, 174 | "Focused": { 175 | "score": 80.0, 176 | "explanation": "Focuses on core business and previously stated margin." 177 | }, 178 | "Uncertain": { 179 | "score": -90.0, 180 | "explanation": "No ambiguity in the statements; quite specific." 181 | }, 182 | "Responsive": { 183 | "score": 60.0, 184 | "explanation": "Directly addresses the financial outlook and plans." 185 | }, 186 | "Defensive": { 187 | "score": -100.0, 188 | "explanation": "No signs of defending or justifying decisions." 189 | }, 190 | "Strategic": { 191 | "score": 60.0, 192 | "explanation": "Discusses gaining share and investment in people and platform." 193 | }, 194 | "Realistic": { 195 | "score": 40.0, 196 | "explanation": "Acknowledges challenges but maintains a balanced view." 197 | } 198 | } 199 | ''' 200 | print('\n' + '_' * 80 + '\n') 201 | bnf_grammar = gb.json_to_bnf(sample_json) 202 | print(bnf_grammar) 203 | print('\n' + '_' * 80 + '\n') 204 | print("Validating grammar...") 205 | is_valid, validation_message = validate_bnf_grammar_func(bnf_grammar) 206 | print(validation_message) 207 | 208 | print('\n\n\n') 209 | 210 | gb = GrammarBuilder() 211 | sample_pydantic_model_description = ''' 212 | class AudioTranscriptResponse(BaseModel): 213 | audio_file_hash: str 214 | audio_file_name: str 215 | audio_file_size_mb: float 216 | segments_json: List[dict] 217 | combined_transcript_text: str 218 | combined_transcript_text_list_of_metadata_dicts: List[dict] 219 | info_json: dict 220 | url_to_download_zip_file_of_embeddings: str 221 | ip_address: str 222 | request_time: datetime 223 | response_time: datetime 224 | total_time: float 225 | ''' 226 | 227 | bnf_grammar = gb.pydantic_to_json_bnf(sample_pydantic_model_description) 228 | print(bnf_grammar) 229 | print('\n' + '_' * 80 + '\n') 230 | print("Validating grammar...") 231 | is_valid, validation_message = validate_bnf_grammar_func(bnf_grammar) 232 | print(validation_message) 233 | 234 | -------------------------------------------------------------------------------- /grammar_files/accept_or_reject.gbnf: -------------------------------------------------------------------------------- 1 | root ::= response 2 | 3 | response ::= "ACCEPT" | "REJECT" 4 | -------------------------------------------------------------------------------- /grammar_files/investor_sentiment_json.gbnf: -------------------------------------------------------------------------------- 1 | root ::= "{" ws fixed_sentiment_list ws "}" ws 2 | 3 | fixed_sentiment_list ::= 4 | fixed_sentiment_pair ("," ws fixed_sentiment_pair)* 5 | 6 | fixed_sentiment_pair ::= 7 | allowed_keys ":" ws "{" ws "score" ":" ws number "," ws "explanation" ":" ws string ws "}" 8 | 9 | allowed_keys ::= 10 | "\"Optimistic\"" | 11 | "\"Pessimistic\"" | 12 | "\"Confident\"" | 13 | "\"Cautious\"" | 14 | "\"Transparent\"" | 15 | "\"Vague\"" | 16 | "\"Upbeat\"" | 17 | "\"Disappointed\"" | 18 | "\"Reassuring\"" | 19 | "\"Evasive\"" | 20 | "\"Committed\"" | 21 | "\"Analytical\"" | 22 | "\"Ambitious\"" | 23 | "\"Concerned\"" | 24 | "\"Focused\"" | 25 | "\"Uncertain\"" | 26 | "\"Responsive\"" | 27 | "\"Defensive\"" | 28 | "\"Strategic\"" | 29 | "\"Realistic\"" 30 | 31 | string ::= 32 | "\"" ([^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]))* "\"" ws 33 | 34 | number ::= 35 | ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws 36 | 37 | # Optional space: by convention, applied in this grammar after literal chars when allowed 38 | ws ::= ([ \t\n] ws)? 39 | -------------------------------------------------------------------------------- /grammar_files/json.gbnf: -------------------------------------------------------------------------------- 1 | root ::= object 2 | value ::= object | array | string | number | ("true" | "false" | "null") ws 3 | 4 | object ::= 5 | "{" ws ( 6 | string ":" ws value 7 | ("," ws string ":" ws value)* 8 | )? "}" ws 9 | 10 | array ::= 11 | "[" ws ( 12 | value 13 | ("," ws value)* 14 | )? "]" ws 15 | 16 | string ::= 17 | "\"" ( 18 | [^"\\] | 19 | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes 20 | )* "\"" ws 21 | 22 | number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws 23 | 24 | # Optional space: by convention, applied in this grammar after literal chars when allowed 25 | ws ::= ([ \t\n] ws)? 26 | -------------------------------------------------------------------------------- /grammar_files/list.gbnf: -------------------------------------------------------------------------------- 1 | root ::= item+ 2 | 3 | # Excludes various line break characters 4 | item ::= "- " [^\r\n\x0b\x0c\x85\u2028\u2029]+ "\n" 5 | -------------------------------------------------------------------------------- /image_files/llama_knife_sticker.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dicklesworthstone/swiss_army_llama/7bd155410ff2cdf71b4ddf4ccd5a626a600690b3/image_files/llama_knife_sticker.webp -------------------------------------------------------------------------------- /image_files/llama_knife_sticker2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dicklesworthstone/swiss_army_llama/7bd155410ff2cdf71b4ddf4ccd5a626a600690b3/image_files/llama_knife_sticker2.jpg -------------------------------------------------------------------------------- /image_files/swiss_army_llama__swagger_screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dicklesworthstone/swiss_army_llama/7bd155410ff2cdf71b4ddf4ccd5a626a600690b3/image_files/swiss_army_llama__swagger_screenshot.png -------------------------------------------------------------------------------- /image_files/swiss_army_llama__swagger_screenshot_running.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dicklesworthstone/swiss_army_llama/7bd155410ff2cdf71b4ddf4ccd5a626a600690b3/image_files/swiss_army_llama__swagger_screenshot_running.png -------------------------------------------------------------------------------- /image_files/swiss_army_llama_logo.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Dicklesworthstone/swiss_army_llama/7bd155410ff2cdf71b4ddf4ccd5a626a600690b3/image_files/swiss_army_llama_logo.webp -------------------------------------------------------------------------------- /install_swiss_army_llama.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | echo "________________________________________________" 6 | echo "Stage 1: Checking for pyenv and installing if not present" 7 | echo "________________________________________________" 8 | 9 | # Check for pyenv and install if not present 10 | if ! command -v pyenv &> /dev/null; then 11 | echo "pyenv not found, installing dependencies..." 12 | sudo apt-get update 13 | sudo apt-get install -y build-essential libssl-dev zlib1g-dev libbz2-dev \ 14 | libreadline-dev libsqlite3-dev wget curl llvm libncurses5-dev libncursesw5-dev \ 15 | xz-utils tk-dev libffi-dev liblzma-dev python3-openssl git 16 | 17 | echo "Cloning pyenv repository..." 18 | git clone https://github.com/pyenv/pyenv.git ~/.pyenv 19 | else 20 | echo "pyenv is already installed" 21 | fi 22 | 23 | echo "________________________________________________" 24 | echo "Stage 2: Configuring pyenv in shell" 25 | echo "________________________________________________" 26 | 27 | # Detect default shell and add pyenv to the shell configuration file if not already present 28 | DEFAULT_SHELL=$(basename "$SHELL") 29 | if [ "$DEFAULT_SHELL" = "zsh" ]; then 30 | CONFIG_FILE="$HOME/.zshrc" 31 | else 32 | CONFIG_FILE="$HOME/.bashrc" 33 | fi 34 | 35 | if ! grep -q 'export PYENV_ROOT="$HOME/.pyenv"' "$CONFIG_FILE"; then 36 | echo "Adding pyenv configuration to $CONFIG_FILE" 37 | echo 'export PYENV_ROOT="$HOME/.pyenv"' >> "$CONFIG_FILE" 38 | echo 'export PATH="$PYENV_ROOT/bin:$PATH"' >> "$CONFIG_FILE" 39 | echo 'eval "$(pyenv init --path)"' >> "$CONFIG_FILE" 40 | 41 | if [ "$DEFAULT_SHELL" = "zsh" ]; then 42 | zsh -c "source $CONFIG_FILE" 43 | else 44 | bash -c "source $CONFIG_FILE" 45 | fi 46 | else 47 | echo "pyenv configuration already present in $CONFIG_FILE" 48 | if [ "$DEFAULT_SHELL" = "zsh" ]; then 49 | zsh -c "source $CONFIG_FILE" 50 | else 51 | bash -c "source $CONFIG_FILE" 52 | fi 53 | fi 54 | 55 | echo "________________________________________________" 56 | echo "Stage 3: Backing up and preparing requirements.txt" 57 | echo "________________________________________________" 58 | 59 | # Back up requirements.txt if not already backed up 60 | if [ ! -f requirements_original.txt ]; then 61 | echo "Backing up requirements.txt to requirements_original.txt" 62 | cp requirements.txt requirements_original.txt 63 | else 64 | echo "Restoring requirements.txt from requirements_original.txt" 65 | cp requirements_original.txt requirements.txt 66 | fi 67 | 68 | echo "________________________________________________" 69 | echo "Stage 4: Updating pyenv and installing Python 3.12" 70 | echo "________________________________________________" 71 | 72 | cd ~/.pyenv && git pull && cd - 73 | echo "Installing Python 3.12 with pyenv" 74 | pyenv install -f 3.12 75 | 76 | echo "________________________________________________" 77 | echo "Stage 5: Installing additional dependencies" 78 | echo "________________________________________________" 79 | 80 | sudo apt-get update 81 | sudo apt-get install -y libxml2-dev libxslt1-dev antiword unrtf poppler-utils pstotext tesseract-ocr flac ffmpeg lame libmad0 libsox-fmt-mp3 sox libjpeg-dev swig redis-server 82 | 83 | echo "Enabling and starting Redis server" 84 | sudo systemctl enable redis-server 85 | sudo systemctl start redis 86 | 87 | echo "________________________________________________" 88 | echo "Stage 6: Detecting CUDA and updating requirements.txt" 89 | echo "________________________________________________" 90 | 91 | if command -v nvidia-smi &> /dev/null; then 92 | CUDA_VERSION=$(nvidia-smi | grep -oP "CUDA Version: \K[0-9.]+") 93 | echo "CUDA detected, version: $CUDA_VERSION" 94 | case $CUDA_VERSION in 95 | 12.1*) CUDA_TAG="cu121" ;; 96 | 12.2*) CUDA_TAG="cu122" ;; 97 | 12.3*) CUDA_TAG="cu123" ;; 98 | 12.4*) CUDA_TAG="cu124" ;; 99 | *) CUDA_TAG="" ;; 100 | esac 101 | 102 | if [ -n "$CUDA_TAG" ]; then 103 | echo "Updating requirements.txt for CUDA version $CUDA_TAG" 104 | sed -i 's@llama-cpp-python@llama-cpp-python --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/'"$CUDA_TAG"'@' requirements.txt 105 | echo "Setting up Faiss for GPU" 106 | export FAISS_ENABLE_GPU=ON 107 | pip install --no-binary :all: faiss-cpu 108 | fi 109 | else 110 | echo "CUDA not detected" 111 | fi 112 | 113 | echo "________________________________________________" 114 | echo "Stage 7: Setting up Python environment" 115 | echo "________________________________________________" 116 | 117 | pyenv local 3.12 118 | echo "Creating virtual environment" 119 | python -m venv venv 120 | echo "Activating virtual environment" 121 | source venv/bin/activate 122 | echo "Upgrading pip, setuptools, and wheel" 123 | python -m pip install --upgrade pip setuptools wheel 124 | echo "Installing dependencies from requirements.txt" 125 | pip install -r requirements.txt 126 | 127 | echo "________________________________________________" 128 | echo "Installation complete" 129 | echo "________________________________________________" 130 | -------------------------------------------------------------------------------- /log_viewer_functions.py: -------------------------------------------------------------------------------- 1 | import re 2 | import logging 3 | import html 4 | from datetime import datetime, timedelta 5 | from pytz import timezone 6 | 7 | log_file_path = 'swiss_army_llama.log' 8 | 9 | def safe_highlight_func(text, pattern, replacement): 10 | try: 11 | return re.sub(pattern, replacement, text) 12 | except Exception as e: 13 | logging.warning(f"Failed to apply highlight rule: {e}") 14 | return text 15 | 16 | 17 | def highlight_rules_func(text): 18 | rules = [ 19 | (re.compile(r"\b(success\w*)\b", re.IGNORECASE), '#COLOR1_OPEN#', '#COLOR1_CLOSE#'), 20 | (re.compile(r"\b(error|fail\w*)\b", re.IGNORECASE), '#COLOR2_OPEN#', '#COLOR2_CLOSE#'), 21 | (re.compile(r"\b(pending)\b", re.IGNORECASE), '#COLOR3_OPEN#', '#COLOR3_CLOSE#'), 22 | (re.compile(r"\b(response)\b", re.IGNORECASE), '#COLOR4_OPEN#', '#COLOR4_CLOSE#'), 23 | (re.compile(r'\"(.*?)\"', re.IGNORECASE), '#COLOR5_OPEN#', '#COLOR5_CLOSE#'), 24 | (re.compile(r"\'(.*?)\'", re.IGNORECASE), "#COLOR6_OPEN#", '#COLOR6_CLOSE#'), 25 | (re.compile(r"\`(.*?)\`", re.IGNORECASE), '#COLOR7_OPEN#', '#COLOR7_CLOSE#'), 26 | (re.compile(r"\b(https?://\S+)\b", re.IGNORECASE), '#COLOR8_OPEN#', '#COLOR8_CLOSE#'), 27 | (re.compile(r"\b(\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2},\d{3})\b", re.IGNORECASE), '#COLOR9_OPEN#', '#COLOR9_CLOSE#'), 28 | (re.compile(r"\b(_{100,})\b", re.IGNORECASE), '#COLOR10_OPEN#', '#COLOR10_CLOSE#'), 29 | (re.compile(r"\b(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+)\b", re.IGNORECASE), '#COLOR11_OPEN#', '#COLOR11_CLOSE#'), 30 | (re.compile(r"\b([a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12})\b", re.IGNORECASE), '#COLOR12_OPEN#', '#COLOR12_CLOSE#'), 31 | (re.compile(r"\b([a-f0-9]{64})\b", re.IGNORECASE), '#COLOR13_OPEN#', '#COLOR13_CLOSE#') 32 | ] 33 | for pattern, replacement_open, replacement_close in rules: 34 | text = pattern.sub(f"{replacement_open}\\1{replacement_close}", text) 35 | text = html.escape(text) 36 | text = text.replace('#COLOR1_OPEN#', '').replace('#COLOR1_CLOSE#', '') 37 | text = text.replace('#COLOR2_OPEN#', '').replace('#COLOR2_CLOSE#', '') 38 | text = text.replace('#COLOR3_OPEN#', '').replace('#COLOR3_CLOSE#', '') 39 | text = text.replace('#COLOR4_OPEN#', '').replace('#COLOR4_CLOSE#', '') 40 | text = text.replace('#COLOR5_OPEN#', '').replace('#COLOR5_CLOSE#', '') 41 | text = text.replace('#COLOR6_OPEN#', "").replace('#COLOR6_CLOSE#', '') 42 | text = text.replace('#COLOR7_OPEN#', '').replace('#COLOR7_CLOSE#', '') 43 | text = text.replace('#COLOR8_OPEN#', '').replace('#COLOR8_CLOSE#', '') 44 | text = text.replace('#COLOR9_OPEN#', '').replace('#COLOR9_CLOSE#', '') 45 | text = text.replace('#COLOR10_OPEN#', '').replace('#COLOR10_CLOSE#', '') 46 | text = text.replace('#COLOR11_OPEN#', '').replace('#COLOR11_CLOSE#', '') 47 | text = text.replace('#COLOR12_OPEN#', '').replace('#COLOR12_CLOSE#', '') 48 | text = text.replace('#COLOR13_OPEN#', '').replace('#COLOR13_CLOSE#', '') 49 | return text 50 | 51 | def show_logs_incremental_func(minutes: int, last_position: int): 52 | new_logs = [] 53 | now = datetime.now(timezone('UTC')) # get current time, make it timezone-aware 54 | with open(log_file_path, "r") as f: 55 | f.seek(last_position) # seek to `last_position` 56 | while True: 57 | line = f.readline() 58 | if line == "": # if EOF 59 | break 60 | if line.strip() == "": 61 | continue 62 | try: # Try to parse the datetime 63 | log_datetime_str = line.split(" - ")[0] # assuming the datetime is at the start of each line 64 | log_datetime = datetime.strptime(log_datetime_str, "%Y-%m-%d %H:%M:%S,%f") # parse the datetime string to a datetime object 65 | log_datetime = log_datetime.replace(tzinfo=timezone('UTC')) # set the datetime object timezone to UTC to match `now` 66 | if now - log_datetime > timedelta(minutes=minutes): # if the log is older than `minutes` minutes from now 67 | continue # ignore the log and continue with the next line 68 | except ValueError: 69 | pass # If the line does not start with a datetime, ignore the ValueError and process the line anyway 70 | new_logs.append(highlight_rules_func(line.rstrip('\n'))) # add the highlighted log to the list and strip any newline at the end 71 | last_position = f.tell() # get the last position 72 | new_logs_as_string = "
".join(new_logs) # joining with
directly 73 | return {"logs": new_logs_as_string, "last_position": last_position} # also return the last position 74 | 75 | 76 | def show_logs_func(minutes: int = 5): 77 | with open(log_file_path, "r") as f: 78 | lines = f.readlines() 79 | logs = [] 80 | now = datetime.now(timezone('UTC')) 81 | for line in lines: 82 | if line.strip() == "": 83 | continue 84 | try: 85 | log_datetime_str = line.split(" - ")[0] 86 | log_datetime = datetime.strptime(log_datetime_str, "%Y-%m-%d %H:%M:%S,%f") 87 | log_datetime = log_datetime.replace(tzinfo=timezone('UTC')) 88 | if now - log_datetime <= timedelta(minutes=minutes): 89 | logs.append(highlight_rules_func(line.rstrip('\n'))) 90 | except (ValueError, IndexError): 91 | logs.append(highlight_rules_func(line.rstrip('\n'))) # Line didn't meet datetime parsing criteria, continue with processing 92 | logs_as_string = "
".join(logs) 93 | logs_as_string_newlines_rendered = logs_as_string.replace("\n", "
") 94 | logs_as_string_newlines_rendered_font_specified = """ 95 | 96 | 97 | 98 | 99 | 170 | 171 | 220 | 221 |

222 |     
223 |     
224 |     
225 |     """.format(logs_as_string_newlines_rendered, minutes)
226 |     return logs_as_string_newlines_rendered_font_specified


--------------------------------------------------------------------------------
/logger_config.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | import shutil
 4 | import queue
 5 | from logging.handlers import RotatingFileHandler, QueueHandler, QueueListener
 6 | 
 7 | logger = logging.getLogger("swiss_army_llama")
 8 | 
 9 | def setup_logger():
10 |     if logger.handlers:
11 |         return logger
12 |     old_logs_dir = 'old_logs'
13 |     if not os.path.exists(old_logs_dir):
14 |         os.makedirs(old_logs_dir)
15 |     logger.setLevel(logging.INFO)
16 |     formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
17 |     log_file_path = 'swiss_army_llama.log'
18 |     log_queue = queue.Queue(-1)  # Create a queue for the handlers
19 |     fh = RotatingFileHandler(log_file_path, maxBytes=10*1024*1024, backupCount=5)
20 |     fh.setFormatter(formatter)
21 |     def namer(default_log_name):  # Function to move rotated logs to the old_logs directory
22 |         return os.path.join(old_logs_dir, os.path.basename(default_log_name))
23 |     def rotator(source, dest):
24 |         shutil.move(source, dest)
25 |     fh.namer = namer
26 |     fh.rotator = rotator
27 |     sh = logging.StreamHandler()  # Stream handler
28 |     sh.setFormatter(formatter)
29 |     queue_handler = QueueHandler(log_queue)  # Create QueueHandler
30 |     queue_handler.setFormatter(formatter)
31 |     logger.addHandler(queue_handler)
32 |     listener = QueueListener(log_queue, sh)
33 |     listener.start()
34 |     logging.getLogger('sqlalchemy.engine').setLevel(logging.WARNING)  # Configure SQLalchemy logging
35 |     return logger


--------------------------------------------------------------------------------
/misc_utility_functions.py:
--------------------------------------------------------------------------------
  1 | from logger_config import setup_logger
  2 | from embeddings_data_models import TextEmbedding
  3 | import socket
  4 | import os
  5 | import re
  6 | import json
  7 | import io
  8 | import glob
  9 | import redis
 10 | import sys
 11 | import threading
 12 | import numpy as np
 13 | import faiss
 14 | import base64
 15 | from typing import Optional
 16 | from pathlib import Path
 17 | from typing import Any
 18 | from database_functions import AsyncSessionLocal
 19 | from sqlalchemy import select
 20 | from collections import defaultdict
 21 | from PIL import Image
 22 | from decouple import config
 23 | 
 24 | logger = setup_logger()
 25 | USE_RAMDISK = config("USE_RAMDISK", default=False, cast=bool)
 26 | RAMDISK_PATH = config("RAMDISK_PATH", default="/mnt/ramdisk", cast=str)
 27 | BASE_DIRECTORY = os.path.dirname(os.path.abspath(__file__))
 28 | 
 29 | class suppress_stdout_stderr(object):
 30 |     def __enter__(self):
 31 |         self.outnull_file = open(os.devnull, 'w')
 32 |         self.errnull_file = open(os.devnull, 'w')
 33 |         self.old_stdout_fileno_undup    = sys.stdout.fileno()
 34 |         self.old_stderr_fileno_undup    = sys.stderr.fileno()
 35 |         self.old_stdout_fileno = os.dup ( sys.stdout.fileno() )
 36 |         self.old_stderr_fileno = os.dup ( sys.stderr.fileno() )
 37 |         self.old_stdout = sys.stdout
 38 |         self.old_stderr = sys.stderr
 39 |         os.dup2 ( self.outnull_file.fileno(), self.old_stdout_fileno_undup )
 40 |         os.dup2 ( self.errnull_file.fileno(), self.old_stderr_fileno_undup )
 41 |         sys.stdout = self.outnull_file        
 42 |         sys.stderr = self.errnull_file
 43 |         return self
 44 | 
 45 |     def __exit__(self, *_):        
 46 |         sys.stdout = self.old_stdout
 47 |         sys.stderr = self.old_stderr
 48 |         os.dup2 ( self.old_stdout_fileno, self.old_stdout_fileno_undup )
 49 |         os.dup2 ( self.old_stderr_fileno, self.old_stderr_fileno_undup )
 50 |         os.close ( self.old_stdout_fileno )
 51 |         os.close ( self.old_stderr_fileno )
 52 |         self.outnull_file.close()
 53 |         self.errnull_file.close()
 54 |     
 55 | def safe_path(base_path, file_name):
 56 |     abs_base_path = os.path.abspath(base_path)
 57 |     abs_user_path = os.path.abspath(os.path.join(base_path, file_name))
 58 |     return abs_user_path.startswith(abs_base_path), abs_user_path
 59 | 
 60 | def clean_filename_for_url_func(dirty_filename: str) -> str:
 61 |     clean_filename = re.sub(r'[^\w\s]', '', dirty_filename) # Remove special characters and replace spaces with underscores
 62 |     clean_filename = clean_filename.replace(' ', '_')
 63 |     return clean_filename
 64 | 
 65 | def is_redis_running(host='localhost', port=6379):
 66 |     s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
 67 |     try:
 68 |         s.connect((host, port))
 69 |         return True
 70 |     except ConnectionRefusedError:
 71 |         return False
 72 |     finally:
 73 |         s.close()
 74 |         
 75 | def start_redis_server():
 76 |     try:
 77 |         result = os.system("sudo service redis-server start")
 78 |         if result == 0:
 79 |             print("Redis server started successfully.")
 80 |         else:
 81 |             logger.error(f"Failed to start Redis server, return code: {result}")
 82 |             raise Exception("Failed to start Redis server.")
 83 |     except Exception as e:
 84 |         logger.error(f"Failed to start Redis server: {e}")
 85 |         raise
 86 | 
 87 | def restart_redis_server():
 88 |     try:
 89 |         result = os.system("sudo service redis-server stop")
 90 |         if result != 0:
 91 |             logger.warning(f"Failed to stop Redis server, it might not be running. Return code: {result}")
 92 |         result = os.system("sudo service redis-server start")
 93 |         if result == 0:
 94 |             print("Redis server started successfully.")
 95 |         else:
 96 |             logger.error(f"Failed to start Redis server, return code: {result}")
 97 |             raise Exception("Failed to start Redis server.")
 98 |     except Exception as e:
 99 |         logger.error(f"Failed to restart Redis server: {e}")
100 |         raise
101 | 
102 | def configure_redis_optimally(redis_host='localhost', redis_port=6379, maxmemory='1gb'):
103 |     configured_file = 'redis_configured.txt'
104 |     if os.path.exists(configured_file):
105 |         print("Redis has already been configured. Skipping configuration.")
106 |         return
107 |     if not is_redis_running(redis_host, redis_port):
108 |         start_redis_server()
109 |     r = redis.StrictRedis(host=redis_host, port=redis_port, decode_responses=True)
110 |     output = []
111 |     def set_config(key, value):
112 |         try:
113 |             response = r.config_set(key, value)
114 |             msg = f"Successfully set {key} to {value}" if response else f"Failed to set {key} to {value}"
115 |             output.append(msg)
116 |             print(msg)
117 |         except redis.exceptions.ConnectionError as e:
118 |             logger.error(f"Failed to set config {key}: {e}")
119 |             raise
120 |     set_config('maxmemory', maxmemory)
121 |     set_config('maxmemory-policy', 'allkeys-lru')
122 |     max_clients = min(os.cpu_count() * 1000, 50000)
123 |     set_config('maxclients', max_clients)
124 |     set_config('timeout', 300)
125 |     set_config('save', '900 1 300 10 60 10000')
126 |     set_config('appendonly', 'yes')
127 |     set_config('appendfsync', 'everysec')
128 |     set_config('stop-writes-on-bgsave-error', 'no')
129 |     output.append("Redis configuration optimized successfully.")
130 |     output.append("Restarting Redis server to apply changes...")
131 |     with open(configured_file, 'w') as f:
132 |         f.write("\n".join(output))
133 |     print("\n".join(output))
134 |     restart_redis_server()
135 |     
136 | def configure_redis_in_background():
137 |     threading.Thread(target=configure_redis_optimally).start()
138 |     
139 | async def build_faiss_indexes(force_rebuild=False):
140 |     global faiss_indexes, associated_texts_by_model_and_pooling_method
141 |     if os.environ.get("FAISS_SETUP_DONE") == "1" and not force_rebuild:
142 |         return faiss_indexes, associated_texts_by_model_and_pooling_method
143 |     faiss_indexes = {}
144 |     associated_texts_by_model_and_pooling_method = defaultdict(lambda: defaultdict(list))  # Create a nested dictionary to store associated texts by model name and pooling method
145 |     async with AsyncSessionLocal() as session:
146 |         result = await session.execute(select(TextEmbedding.llm_model_name, TextEmbedding.text, TextEmbedding.embedding_json, TextEmbedding.embedding_pooling_method))
147 |         embeddings_by_model_and_pooling = defaultdict(lambda: defaultdict(list))
148 |         for row in result.fetchall():  # Process regular embeddings
149 |             llm_model_name = row[0]
150 |             embedding_pooling_method = row[3]
151 |             associated_texts_by_model_and_pooling_method[llm_model_name][embedding_pooling_method].append(row[1])  # Store the associated text by model name and pooling method
152 |             embeddings_by_model_and_pooling[llm_model_name][embedding_pooling_method].append((row[1], json.loads(row[2])))
153 |         for llm_model_name, embeddings_by_pooling in embeddings_by_model_and_pooling.items():
154 |             for embedding_pooling_method, embeddings in embeddings_by_pooling.items():
155 |                 logger.info(f"Building Faiss index over embeddings for model {llm_model_name} with pooling method {embedding_pooling_method}...")
156 |                 embeddings_array = np.array([e[1] for e in embeddings]).astype('float32')
157 |                 if embeddings_array.size == 0:
158 |                     logger.error(f"No embeddings were loaded from the database for model {llm_model_name} with pooling method {embedding_pooling_method}, so nothing to build the Faiss index with!")
159 |                     continue
160 |                 faiss.normalize_L2(embeddings_array)  # Normalize the vectors for cosine similarity
161 |                 faiss_index = faiss.IndexFlatIP(embeddings_array.shape[1])  # Use IndexFlatIP for cosine similarity
162 |                 faiss_index.add(embeddings_array)
163 |                 faiss_indexes[(llm_model_name, embedding_pooling_method)] = faiss_index  # Store the index by model name and pooling method
164 |     os.environ["FAISS_SETUP_DONE"] = "1"
165 |     return faiss_indexes, associated_texts_by_model_and_pooling_method
166 | 
167 | def normalize_logprobs(avg_logprob, min_logprob, max_logprob):
168 |     range_logprob = max_logprob - min_logprob
169 |     return (avg_logprob - min_logprob) / range_logprob if range_logprob != 0 else 0.5
170 | 
171 | def truncate_string(s: str, max_length: int = 100) -> str:
172 |     return s[:max_length]
173 | 
174 | def remove_pagination_breaks(text: str) -> str:
175 |     text = re.sub(r'-(\n)(?=[a-z])', '', text) # Remove hyphens at the end of lines when the word continues on the next line
176 |     text = re.sub(r'(?<=\w)(?'):
250 |         items = {}
251 |         for k, v in json_obj.items():
252 |             new_key = f"{parent_key}{sep}{k}" if parent_key else k
253 |             if isinstance(v, dict):
254 |                 items.update(JSONAggregator.flatten_json(v, new_key, sep=sep))
255 |             else:
256 |                 items[new_key] = v
257 |         return items
258 | 
259 |     @staticmethod
260 |     def get_value_by_path(json_obj, path, sep='->'):
261 |         keys = path.split(sep)
262 |         item = json_obj
263 |         for k in keys:
264 |             item = item[k]
265 |         return item
266 | 
267 |     @staticmethod
268 |     def set_value_by_path(json_obj, path, value, sep='->'):
269 |         keys = path.split(sep)
270 |         item = json_obj
271 |         for k in keys[:-1]:
272 |             item = item.setdefault(k, {})
273 |         item[keys[-1]] = value
274 | 
275 |     def calculate_path_weights(self):
276 |         all_paths = []
277 |         for j in self.completions:
278 |             all_paths += list(self.flatten_json(j).keys())
279 |         path_weights = defaultdict(float)
280 |         for path in all_paths:
281 |             path_weights[path] += 1.0
282 |         return path_weights
283 | 
284 |     def aggregate(self):
285 |         path_weights = self.calculate_path_weights()
286 |         aggregate = {}
287 |         for path, weight in path_weights.items():
288 |             values = [self.get_value_by_path(j, path) for j in self.completions if path in self.flatten_json(j)]
289 |             weights = [weight] * len(values)
290 |             aggregate_value = self.weighted_vote(values, weights)
291 |             self.set_value_by_path(aggregate, path, aggregate_value)
292 |         self.aggregate_result = aggregate
293 | 
294 | class FakeUploadFile:
295 |     def __init__(self, filename: str, content: Any, content_type: str = 'text/plain'):
296 |         self.filename = filename
297 |         self.content_type = content_type
298 |         self.file = io.BytesIO(content)
299 |     def read(self, size: int = -1) -> bytes:
300 |         return self.file.read(size)
301 |     def seek(self, offset: int, whence: int = 0) -> int:
302 |         return self.file.seek(offset, whence)
303 |     def tell(self) -> int:
304 |         return self.file.tell()
305 |     
306 | def process_image(image_path, max_dimension=1024):
307 |     original_path = Path(image_path)
308 |     processed_image_path = original_path.with_stem(original_path.stem + "_processed").with_suffix(original_path.suffix)
309 |     with Image.open(image_path) as img:
310 |         img.thumbnail((max_dimension, max_dimension), Image.LANCZOS)
311 |         img.save(processed_image_path)
312 |     return processed_image_path
313 | 
314 | def alpha_remover_func(img):
315 |     if img.mode != 'RGBA':
316 |         return img
317 |     canvas = Image.new('RGBA', img.size, (255, 255, 255, 255))
318 |     canvas.paste(img, mask=img)
319 |     return canvas.convert('RGB')
320 | 
321 | def image_to_base64_data_uri(file_path):
322 |     with open(file_path, "rb") as img_file:
323 |         base64_data = base64.b64encode(img_file.read()).decode('utf-8')
324 |         return f"data:image/png;base64,{base64_data}"    
325 |     
326 | def find_clip_model_path(llm_model_name: str) -> Optional[str]:
327 |     models_dir = os.path.join(RAMDISK_PATH, 'models') if USE_RAMDISK else os.path.join(BASE_DIRECTORY, 'models')
328 |     base_name = os.path.splitext(os.path.basename(llm_model_name))[0]
329 |     mmproj_model_name = base_name.replace("-f16", "-mmproj-f16").replace("-int4", "-mmproj-f16")
330 |     mmproj_files = glob.glob(os.path.join(models_dir, f"{mmproj_model_name}.gguf"))
331 |     if not mmproj_files:
332 |         logger.error(f"No mmproj file found matching: {mmproj_model_name}")
333 |         return None
334 |     return mmproj_files[0]    
335 | 


--------------------------------------------------------------------------------
/model_urls.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   "https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-8B-GGUF/resolve/main/Hermes-3-Llama-3.1-8B.Q4_K_M.gguf",
 3 |   "https://huggingface.co/lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF/resolve/main/Meta-Llama-3.1-8B-Instruct-Q5_K_M.gguf",
 4 |   "https://huggingface.co/Orenguteng/Llama-3.1-8B-Lexi-Uncensored-V2-GGUF/resolve/main/Llama-3.1-8B-Lexi-Uncensored_V2_Q5.gguf",
 5 |   "https://huggingface.co/vonjack/bge-m3-gguf/resolve/main/bge-m3-q8_0.gguf",
 6 |   "https://huggingface.co/nomic-ai/nomic-embed-text-v1.5-GGUF/resolve/main/nomic-embed-text-v1.5.Q6_K.gguf",
 7 |   "https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-gguf/resolve/main/llava-llama-3-8b-v1_1-mmproj-f16.gguf",
 8 |   "https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-gguf/resolve/main/llava-llama-3-8b-v1_1-int4.gguf"
 9 | ]
10 | 


--------------------------------------------------------------------------------
/models/download.lock:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Dicklesworthstone/swiss_army_llama/7bd155410ff2cdf71b4ddf4ccd5a626a600690b3/models/download.lock


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.ruff]
2 | ignore = ["E501"]


--------------------------------------------------------------------------------
/ramdisk_functions.py:
--------------------------------------------------------------------------------
 1 | from logger_config import setup_logger
 2 | import os
 3 | import subprocess
 4 | import shutil
 5 | import psutil
 6 | from decouple import config
 7 | logger = setup_logger()
 8 | 
 9 | RAMDISK_PATH = config("RAMDISK_PATH", default="/mnt/ramdisk", cast=str)
10 | RAMDISK_SIZE_IN_GB = config("RAMDISK_SIZE_IN_GB", default=1, cast=int)
11 | 
12 | def check_that_user_has_required_permissions_to_manage_ramdisks():
13 |     try: # Try to run a harmless command with sudo to test if the user has password-less sudo permissions
14 |         result = subprocess.run(["sudo", "ls"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
15 |         if "password" in result.stderr.lower():
16 |             raise PermissionError("Password required for sudo")
17 |         logger.info("User has sufficient permissions to manage RAM Disks.")
18 |         return True
19 |     except (PermissionError, subprocess.CalledProcessError) as e:
20 |         logger.info("Sorry, current user does not have sufficient permissions to manage RAM Disks! Disabling RAM Disks for now...")
21 |         logger.debug(f"Permission check error detail: {e}")
22 |         return False
23 |     
24 | def setup_ramdisk():
25 |     if os.environ.get("RAMDISK_SETUP_DONE") == "1":
26 |         logger.info("RAM Disk setup already completed by another worker. Skipping.")
27 |         return    
28 |     cmd_check = f"sudo mount | grep {RAMDISK_PATH}" # Check if RAM disk already exists at the path
29 |     result = subprocess.run(cmd_check, shell=True, stdout=subprocess.PIPE).stdout.decode('utf-8')
30 |     if RAMDISK_PATH in result:
31 |         logger.info(f"RAM Disk already set up at {RAMDISK_PATH}. Skipping setup.")
32 |         return
33 |     total_ram_gb = psutil.virtual_memory().total / (1024 ** 3)
34 |     free_ram_gb = psutil.virtual_memory().free / (1024 ** 3)
35 |     buffer_gb = 2  # buffer to ensure we don't use all the free RAM
36 |     ramdisk_size_gb = max(min(RAMDISK_SIZE_IN_GB, free_ram_gb - buffer_gb), 0.1)
37 |     ramdisk_size_mb = int(ramdisk_size_gb * 1024)
38 |     ramdisk_size_str = f"{ramdisk_size_mb}M"
39 |     logger.info(f"Total RAM: {total_ram_gb}G")
40 |     logger.info(f"Free RAM: {free_ram_gb}G")
41 |     logger.info(f"Calculated RAM Disk Size: {ramdisk_size_gb}G")
42 |     if RAMDISK_SIZE_IN_GB > total_ram_gb:
43 |         raise ValueError(f"Cannot allocate {RAMDISK_SIZE_IN_GB}G for RAM Disk. Total system RAM is {total_ram_gb:.2f}G.")
44 |     logger.info("Setting up RAM Disk...")
45 |     os.makedirs(RAMDISK_PATH, exist_ok=True)
46 |     mount_command = ["sudo", "mount", "-t", "tmpfs", "-o", f"size={ramdisk_size_str}", "tmpfs", RAMDISK_PATH]
47 |     subprocess.run(mount_command, check=True)
48 |     logger.info(f"RAM Disk set up at {RAMDISK_PATH} with size {ramdisk_size_gb}G")
49 |     os.environ["RAMDISK_SETUP_DONE"] = "1"
50 | 
51 | def copy_models_to_ramdisk(models_directory, ramdisk_directory):
52 |     total_size = sum(os.path.getsize(os.path.join(models_directory, model)) for model in os.listdir(models_directory))
53 |     free_ram = psutil.virtual_memory().free
54 |     if total_size > free_ram:
55 |         logger.warning(f"Not enough space on RAM Disk. Required: {total_size}, Available: {free_ram}. Rebuilding RAM Disk.")
56 |         clear_ramdisk()
57 |         free_ram = psutil.virtual_memory().free  # Recompute the available RAM after clearing the RAM disk
58 |         if total_size > free_ram:
59 |             logger.error(f"Still not enough space on RAM Disk even after clearing. Required: {total_size}, Available: {free_ram}.")
60 |             raise ValueError("Not enough RAM space to copy models.")
61 |     os.makedirs(ramdisk_directory, exist_ok=True)
62 |     for model in os.listdir(models_directory):
63 |         src_path = os.path.join(models_directory, model)
64 |         dest_path = os.path.join(ramdisk_directory, model)
65 |         if os.path.exists(dest_path) and os.path.getsize(dest_path) == os.path.getsize(src_path): # Check if the file already exists in the RAM disk and has the same size
66 |             logger.info(f"Model {model} already exists in RAM Disk and is the same size. Skipping copy.")
67 |             continue
68 |         shutil.copyfile(src_path, dest_path)
69 |         logger.info(f"Copied model {model} to RAM Disk at {dest_path}")
70 | 
71 | def clear_ramdisk():
72 |     while True:
73 |         cmd_check = f"sudo mount | grep {RAMDISK_PATH}"
74 |         result = subprocess.run(cmd_check, shell=True, stdout=subprocess.PIPE).stdout.decode('utf-8')
75 |         if RAMDISK_PATH not in result:
76 |             break  # Exit the loop if the RAMDISK_PATH is not in the mount list
77 |         cmd_umount = f"sudo umount -l {RAMDISK_PATH}"
78 |         subprocess.run(cmd_umount, shell=True, check=True)
79 |     logger.info(f"Cleared RAM Disk at {RAMDISK_PATH}")


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | aioredis
 2 | aioredlock
 3 | aiosqlite
 4 | apscheduler
 5 | faiss-cpu
 6 | fast_vector_similarity
 7 | fastapi
 8 | faster-whisper
 9 | filelock
10 | httpx
11 | llama-cpp-python
12 | magika
13 | mutagen
14 | nvgpu
15 | pandas
16 | pillow
17 | psutil
18 | pydantic
19 | PyPDF2
20 | pytest
21 | python-decouple
22 | python-multipart
23 | pytz
24 | redis
25 | ruff
26 | scipy
27 | scikit-learn
28 | sqlalchemy
29 | textract-py3
30 | uvicorn
31 | uvloop
32 | zstandard


--------------------------------------------------------------------------------
/sample_input_files_for_end_to_end_tests/Don_King_if_he_lived_in_the_tiny_island_nation_known_as_Japan.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Dicklesworthstone/swiss_army_llama/7bd155410ff2cdf71b4ddf4ccd5a626a600690b3/sample_input_files_for_end_to_end_tests/Don_King_if_he_lived_in_the_tiny_island_nation_known_as_Japan.mp3


--------------------------------------------------------------------------------
/sample_input_files_for_end_to_end_tests/bh-us-03-sassaman-conference-slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Dicklesworthstone/swiss_army_llama/7bd155410ff2cdf71b4ddf4ccd5a626a600690b3/sample_input_files_for_end_to_end_tests/bh-us-03-sassaman-conference-slides.pdf


--------------------------------------------------------------------------------
/sample_input_files_for_end_to_end_tests/sunset.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Dicklesworthstone/swiss_army_llama/7bd155410ff2cdf71b4ddf4ccd5a626a600690b3/sample_input_files_for_end_to_end_tests/sunset.jpg


--------------------------------------------------------------------------------
/sample_input_files_for_end_to_end_tests/tale_two_cities_first_3_chapters.txt:
--------------------------------------------------------------------------------
  1 | Book the First--Recalled to Life
  2 | 
  3 | 
  4 | 
  5 | 
  6 | CHAPTER I.
  7 | The Period
  8 | 
  9 | 
 10 | It was the best of times, it was the worst of times, it was the age of
 11 | wisdom, it was the age of foolishness, it was the epoch of belief, it
 12 | was the epoch of incredulity, it was the season of Light, it was the
 13 | season of Darkness, it was the spring of hope, it was the winter of
 14 | despair, we had everything before us, we had nothing before us, we were
 15 | all going direct to Heaven, we were all going direct the other way--in
 16 | short, the period was so far like the present period, that some of its
 17 | noisiest authorities insisted on its being received, for good or for
 18 | evil, in the superlative degree of comparison only.
 19 | 
 20 | There were a king with a large jaw and a queen with a plain face, on the
 21 | throne of England; there were a king with a large jaw and a queen with
 22 | a fair face, on the throne of France. In both countries it was clearer
 23 | than crystal to the lords of the State preserves of loaves and fishes,
 24 | that things in general were settled for ever.
 25 | 
 26 | It was the year of Our Lord one thousand seven hundred and seventy-five.
 27 | Spiritual revelations were conceded to England at that favoured period,
 28 | as at this. Mrs. Southcott had recently attained her five-and-twentieth
 29 | blessed birthday, of whom a prophetic private in the Life Guards had
 30 | heralded the sublime appearance by announcing that arrangements were
 31 | made for the swallowing up of London and Westminster. Even the Cock-lane
 32 | ghost had been laid only a round dozen of years, after rapping out its
 33 | messages, as the spirits of this very year last past (supernaturally
 34 | deficient in originality) rapped out theirs. Mere messages in the
 35 | earthly order of events had lately come to the English Crown and People,
 36 | from a congress of British subjects in America: which, strange
 37 | to relate, have proved more important to the human race than any
 38 | communications yet received through any of the chickens of the Cock-lane
 39 | brood.
 40 | 
 41 | France, less favoured on the whole as to matters spiritual than her
 42 | sister of the shield and trident, rolled with exceeding smoothness down
 43 | hill, making paper money and spending it. Under the guidance of her
 44 | Christian pastors, she entertained herself, besides, with such humane
 45 | achievements as sentencing a youth to have his hands cut off, his tongue
 46 | torn out with pincers, and his body burned alive, because he had not
 47 | kneeled down in the rain to do honour to a dirty procession of monks
 48 | which passed within his view, at a distance of some fifty or sixty
 49 | yards. It is likely enough that, rooted in the woods of France and
 50 | Norway, there were growing trees, when that sufferer was put to death,
 51 | already marked by the Woodman, Fate, to come down and be sawn into
 52 | boards, to make a certain movable framework with a sack and a knife in
 53 | it, terrible in history. It is likely enough that in the rough outhouses
 54 | of some tillers of the heavy lands adjacent to Paris, there were
 55 | sheltered from the weather that very day, rude carts, bespattered with
 56 | rustic mire, snuffed about by pigs, and roosted in by poultry, which
 57 | the Farmer, Death, had already set apart to be his tumbrils of
 58 | the Revolution. But that Woodman and that Farmer, though they work
 59 | unceasingly, work silently, and no one heard them as they went about
 60 | with muffled tread: the rather, forasmuch as to entertain any suspicion
 61 | that they were awake, was to be atheistical and traitorous.
 62 | 
 63 | In England, there was scarcely an amount of order and protection to
 64 | justify much national boasting. Daring burglaries by armed men, and
 65 | highway robberies, took place in the capital itself every night;
 66 | families were publicly cautioned not to go out of town without removing
 67 | their furniture to upholsterers’ warehouses for security; the highwayman
 68 | in the dark was a City tradesman in the light, and, being recognised and
 69 | challenged by his fellow-tradesman whom he stopped in his character of
 70 | “the Captain,” gallantly shot him through the head and rode away; the
 71 | mail was waylaid by seven robbers, and the guard shot three dead, and
 72 | then got shot dead himself by the other four, “in consequence of the
 73 | failure of his ammunition:” after which the mail was robbed in peace;
 74 | that magnificent potentate, the Lord Mayor of London, was made to stand
 75 | and deliver on Turnham Green, by one highwayman, who despoiled the
 76 | illustrious creature in sight of all his retinue; prisoners in London
 77 | gaols fought battles with their turnkeys, and the majesty of the law
 78 | fired blunderbusses in among them, loaded with rounds of shot and ball;
 79 | thieves snipped off diamond crosses from the necks of noble lords at
 80 | Court drawing-rooms; musketeers went into St. Giles’s, to search
 81 | for contraband goods, and the mob fired on the musketeers, and the
 82 | musketeers fired on the mob, and nobody thought any of these occurrences
 83 | much out of the common way. In the midst of them, the hangman, ever busy
 84 | and ever worse than useless, was in constant requisition; now, stringing
 85 | up long rows of miscellaneous criminals; now, hanging a housebreaker on
 86 | Saturday who had been taken on Tuesday; now, burning people in the
 87 | hand at Newgate by the dozen, and now burning pamphlets at the door of
 88 | Westminster Hall; to-day, taking the life of an atrocious murderer,
 89 | and to-morrow of a wretched pilferer who had robbed a farmer’s boy of
 90 | sixpence.
 91 | 
 92 | All these things, and a thousand like them, came to pass in and close
 93 | upon the dear old year one thousand seven hundred and seventy-five.
 94 | Environed by them, while the Woodman and the Farmer worked unheeded,
 95 | those two of the large jaws, and those other two of the plain and the
 96 | fair faces, trod with stir enough, and carried their divine rights
 97 | with a high hand. Thus did the year one thousand seven hundred
 98 | and seventy-five conduct their Greatnesses, and myriads of small
 99 | creatures--the creatures of this chronicle among the rest--along the
100 | roads that lay before them.
101 | 
102 | 
103 | 
104 | 
105 | CHAPTER II.
106 | The Mail
107 | 
108 | 
109 | It was the Dover road that lay, on a Friday night late in November,
110 | before the first of the persons with whom this history has business.
111 | The Dover road lay, as to him, beyond the Dover mail, as it lumbered up
112 | Shooter’s Hill. He walked up hill in the mire by the side of the mail,
113 | as the rest of the passengers did; not because they had the least relish
114 | for walking exercise, under the circumstances, but because the hill,
115 | and the harness, and the mud, and the mail, were all so heavy, that the
116 | horses had three times already come to a stop, besides once drawing the
117 | coach across the road, with the mutinous intent of taking it back
118 | to Blackheath. Reins and whip and coachman and guard, however, in
119 | combination, had read that article of war which forbade a purpose
120 | otherwise strongly in favour of the argument, that some brute animals
121 | are endued with Reason; and the team had capitulated and returned to
122 | their duty.
123 | 
124 | With drooping heads and tremulous tails, they mashed their way through
125 | the thick mud, floundering and stumbling between whiles, as if they were
126 | falling to pieces at the larger joints. As often as the driver rested
127 | them and brought them to a stand, with a wary “Wo-ho! so-ho-then!” the
128 | near leader violently shook his head and everything upon it--like an
129 | unusually emphatic horse, denying that the coach could be got up the
130 | hill. Whenever the leader made this rattle, the passenger started, as a
131 | nervous passenger might, and was disturbed in mind.
132 | 
133 | There was a steaming mist in all the hollows, and it had roamed in its
134 | forlornness up the hill, like an evil spirit, seeking rest and finding
135 | none. A clammy and intensely cold mist, it made its slow way through the
136 | air in ripples that visibly followed and overspread one another, as the
137 | waves of an unwholesome sea might do. It was dense enough to shut out
138 | everything from the light of the coach-lamps but these its own workings,
139 | and a few yards of road; and the reek of the labouring horses steamed
140 | into it, as if they had made it all.
141 | 
142 | Two other passengers, besides the one, were plodding up the hill by the
143 | side of the mail. All three were wrapped to the cheekbones and over the
144 | ears, and wore jack-boots. Not one of the three could have said, from
145 | anything he saw, what either of the other two was like; and each was
146 | hidden under almost as many wrappers from the eyes of the mind, as from
147 | the eyes of the body, of his two companions. In those days, travellers
148 | were very shy of being confidential on a short notice, for anybody on
149 | the road might be a robber or in league with robbers. As to the latter,
150 | when every posting-house and ale-house could produce somebody in
151 | “the Captain’s” pay, ranging from the landlord to the lowest stable
152 | non-descript, it was the likeliest thing upon the cards. So the guard
153 | of the Dover mail thought to himself, that Friday night in November, one
154 | thousand seven hundred and seventy-five, lumbering up Shooter’s Hill, as
155 | he stood on his own particular perch behind the mail, beating his feet,
156 | and keeping an eye and a hand on the arm-chest before him, where a
157 | loaded blunderbuss lay at the top of six or eight loaded horse-pistols,
158 | deposited on a substratum of cutlass.
159 | 
160 | The Dover mail was in its usual genial position that the guard suspected
161 | the passengers, the passengers suspected one another and the guard, they
162 | all suspected everybody else, and the coachman was sure of nothing but
163 | the horses; as to which cattle he could with a clear conscience have
164 | taken his oath on the two Testaments that they were not fit for the
165 | journey.
166 | 
167 | “Wo-ho!” said the coachman. “So, then! One more pull and you’re at the
168 | top and be damned to you, for I have had trouble enough to get you to
169 | it!--Joe!”
170 | 
171 | “Halloa!” the guard replied.
172 | 
173 | “What o’clock do you make it, Joe?”
174 | 
175 | “Ten minutes, good, past eleven.”
176 | 
177 | “My blood!” ejaculated the vexed coachman, “and not atop of Shooter’s
178 | yet! Tst! Yah! Get on with you!”
179 | 
180 | The emphatic horse, cut short by the whip in a most decided negative,
181 | made a decided scramble for it, and the three other horses followed
182 | suit. Once more, the Dover mail struggled on, with the jack-boots of its
183 | passengers squashing along by its side. They had stopped when the coach
184 | stopped, and they kept close company with it. If any one of the three
185 | had had the hardihood to propose to another to walk on a little ahead
186 | into the mist and darkness, he would have put himself in a fair way of
187 | getting shot instantly as a highwayman.
188 | 
189 | The last burst carried the mail to the summit of the hill. The horses
190 | stopped to breathe again, and the guard got down to skid the wheel for
191 | the descent, and open the coach-door to let the passengers in.
192 | 
193 | “Tst! Joe!” cried the coachman in a warning voice, looking down from his
194 | box.
195 | 
196 | “What do you say, Tom?”
197 | 
198 | They both listened.
199 | 
200 | “I say a horse at a canter coming up, Joe.”
201 | 
202 | “_I_ say a horse at a gallop, Tom,” returned the guard, leaving his hold
203 | of the door, and mounting nimbly to his place. “Gentlemen! In the king’s
204 | name, all of you!”
205 | 
206 | With this hurried adjuration, he cocked his blunderbuss, and stood on
207 | the offensive.
208 | 
209 | The passenger booked by this history, was on the coach-step, getting in;
210 | the two other passengers were close behind him, and about to follow. He
211 | remained on the step, half in the coach and half out of; they remained
212 | in the road below him. They all looked from the coachman to the guard,
213 | and from the guard to the coachman, and listened. The coachman looked
214 | back and the guard looked back, and even the emphatic leader pricked up
215 | his ears and looked back, without contradicting.
216 | 
217 | The stillness consequent on the cessation of the rumbling and labouring
218 | of the coach, added to the stillness of the night, made it very quiet
219 | indeed. The panting of the horses communicated a tremulous motion to
220 | the coach, as if it were in a state of agitation. The hearts of the
221 | passengers beat loud enough perhaps to be heard; but at any rate, the
222 | quiet pause was audibly expressive of people out of breath, and holding
223 | the breath, and having the pulses quickened by expectation.
224 | 
225 | The sound of a horse at a gallop came fast and furiously up the hill.
226 | 
227 | “So-ho!” the guard sang out, as loud as he could roar. “Yo there! Stand!
228 | I shall fire!”
229 | 
230 | The pace was suddenly checked, and, with much splashing and floundering,
231 | a man’s voice called from the mist, “Is that the Dover mail?”
232 | 
233 | “Never you mind what it is!” the guard retorted. “What are you?”
234 | 
235 | “_Is_ that the Dover mail?”
236 | 
237 | “Why do you want to know?”
238 | 
239 | “I want a passenger, if it is.”
240 | 
241 | “What passenger?”
242 | 
243 | “Mr. Jarvis Lorry.”
244 | 
245 | Our booked passenger showed in a moment that it was his name. The guard,
246 | the coachman, and the two other passengers eyed him distrustfully.
247 | 
248 | “Keep where you are,” the guard called to the voice in the mist,
249 | “because, if I should make a mistake, it could never be set right in
250 | your lifetime. Gentleman of the name of Lorry answer straight.”
251 | 
252 | “What is the matter?” asked the passenger, then, with mildly quavering
253 | speech. “Who wants me? Is it Jerry?”
254 | 
255 | (“I don’t like Jerry’s voice, if it is Jerry,” growled the guard to
256 | himself. “He’s hoarser than suits me, is Jerry.”)
257 | 
258 | “Yes, Mr. Lorry.”
259 | 
260 | “What is the matter?”
261 | 
262 | “A despatch sent after you from over yonder. T. and Co.”
263 | 
264 | “I know this messenger, guard,” said Mr. Lorry, getting down into the
265 | road--assisted from behind more swiftly than politely by the other two
266 | passengers, who immediately scrambled into the coach, shut the door, and
267 | pulled up the window. “He may come close; there’s nothing wrong.”
268 | 
269 | “I hope there ain’t, but I can’t make so ‘Nation sure of that,” said the
270 | guard, in gruff soliloquy. “Hallo you!”
271 | 
272 | “Well! And hallo you!” said Jerry, more hoarsely than before.
273 | 
274 | “Come on at a footpace! d’ye mind me? And if you’ve got holsters to that
275 | saddle o’ yourn, don’t let me see your hand go nigh ’em. For I’m a devil
276 | at a quick mistake, and when I make one it takes the form of Lead. So
277 | now let’s look at you.”
278 | 
279 | The figures of a horse and rider came slowly through the eddying mist,
280 | and came to the side of the mail, where the passenger stood. The rider
281 | stooped, and, casting up his eyes at the guard, handed the passenger
282 | a small folded paper. The rider’s horse was blown, and both horse and
283 | rider were covered with mud, from the hoofs of the horse to the hat of
284 | the man.
285 | 
286 | “Guard!” said the passenger, in a tone of quiet business confidence.
287 | 
288 | The watchful guard, with his right hand at the stock of his raised
289 | blunderbuss, his left at the barrel, and his eye on the horseman,
290 | answered curtly, “Sir.”
291 | 
292 | “There is nothing to apprehend. I belong to Tellson’s Bank. You must
293 | know Tellson’s Bank in London. I am going to Paris on business. A crown
294 | to drink. I may read this?”
295 | 
296 | “If so be as you’re quick, sir.”
297 | 
298 | He opened it in the light of the coach-lamp on that side, and
299 | read--first to himself and then aloud: “‘Wait at Dover for Mam’selle.’
300 | It’s not long, you see, guard. Jerry, say that my answer was, RECALLED
301 | TO LIFE.”
302 | 
303 | Jerry started in his saddle. “That’s a Blazing strange answer, too,”
304 |  said he, at his hoarsest.
305 | 
306 | “Take that message back, and they will know that I received this, as
307 | well as if I wrote. Make the best of your way. Good night.”
308 | 
309 | With those words the passenger opened the coach-door and got in; not at
310 | all assisted by his fellow-passengers, who had expeditiously secreted
311 | their watches and purses in their boots, and were now making a general
312 | pretence of being asleep. With no more definite purpose than to escape
313 | the hazard of originating any other kind of action.
314 | 
315 | The coach lumbered on again, with heavier wreaths of mist closing round
316 | it as it began the descent. The guard soon replaced his blunderbuss
317 | in his arm-chest, and, having looked to the rest of its contents, and
318 | having looked to the supplementary pistols that he wore in his belt,
319 | looked to a smaller chest beneath his seat, in which there were a
320 | few smith’s tools, a couple of torches, and a tinder-box. For he was
321 | furnished with that completeness that if the coach-lamps had been blown
322 | and stormed out, which did occasionally happen, he had only to shut
323 | himself up inside, keep the flint and steel sparks well off the straw,
324 | and get a light with tolerable safety and ease (if he were lucky) in
325 | five minutes.
326 | 
327 | “Tom!” softly over the coach roof.
328 | 
329 | “Hallo, Joe.”
330 | 
331 | “Did you hear the message?”
332 | 
333 | “I did, Joe.”
334 | 
335 | “What did you make of it, Tom?”
336 | 
337 | “Nothing at all, Joe.”
338 | 
339 | “That’s a coincidence, too,” the guard mused, “for I made the same of it
340 | myself.”
341 | 
342 | Jerry, left alone in the mist and darkness, dismounted meanwhile, not
343 | only to ease his spent horse, but to wipe the mud from his face, and
344 | shake the wet out of his hat-brim, which might be capable of
345 | holding about half a gallon. After standing with the bridle over his
346 | heavily-splashed arm, until the wheels of the mail were no longer within
347 | hearing and the night was quite still again, he turned to walk down the
348 | hill.
349 | 
350 | “After that there gallop from Temple Bar, old lady, I won’t trust your
351 | fore-legs till I get you on the level,” said this hoarse messenger,
352 | glancing at his mare. “‘Recalled to life.’ That’s a Blazing strange
353 | message. Much of that wouldn’t do for you, Jerry! I say, Jerry! You’d
354 | be in a Blazing bad way, if recalling to life was to come into fashion,
355 | Jerry!”
356 | 
357 | 
358 | 
359 | 
360 | CHAPTER III.
361 | The Night Shadows
362 | 
363 | 
364 | A wonderful fact to reflect upon, that every human creature is
365 | constituted to be that profound secret and mystery to every other. A
366 | solemn consideration, when I enter a great city by night, that every
367 | one of those darkly clustered houses encloses its own secret; that every
368 | room in every one of them encloses its own secret; that every beating
369 | heart in the hundreds of thousands of breasts there, is, in some of
370 | its imaginings, a secret to the heart nearest it! Something of the
371 | awfulness, even of Death itself, is referable to this. No more can I
372 | turn the leaves of this dear book that I loved, and vainly hope in time
373 | to read it all. No more can I look into the depths of this unfathomable
374 | water, wherein, as momentary lights glanced into it, I have had glimpses
375 | of buried treasure and other things submerged. It was appointed that the
376 | book should shut with a spring, for ever and for ever, when I had read
377 | but a page. It was appointed that the water should be locked in an
378 | eternal frost, when the light was playing on its surface, and I stood
379 | in ignorance on the shore. My friend is dead, my neighbour is dead,
380 | my love, the darling of my soul, is dead; it is the inexorable
381 | consolidation and perpetuation of the secret that was always in that
382 | individuality, and which I shall carry in mine to my life’s end. In
383 | any of the burial-places of this city through which I pass, is there
384 | a sleeper more inscrutable than its busy inhabitants are, in their
385 | innermost personality, to me, or than I am to them?
386 | 
387 | As to this, his natural and not to be alienated inheritance, the
388 | messenger on horseback had exactly the same possessions as the King, the
389 | first Minister of State, or the richest merchant in London. So with the
390 | three passengers shut up in the narrow compass of one lumbering old mail
391 | coach; they were mysteries to one another, as complete as if each had
392 | been in his own coach and six, or his own coach and sixty, with the
393 | breadth of a county between him and the next.
394 | 
395 | The messenger rode back at an easy trot, stopping pretty often at
396 | ale-houses by the way to drink, but evincing a tendency to keep his
397 | own counsel, and to keep his hat cocked over his eyes. He had eyes that
398 | assorted very well with that decoration, being of a surface black, with
399 | no depth in the colour or form, and much too near together--as if they
400 | were afraid of being found out in something, singly, if they kept too
401 | far apart. They had a sinister expression, under an old cocked-hat like
402 | a three-cornered spittoon, and over a great muffler for the chin and
403 | throat, which descended nearly to the wearer’s knees. When he stopped
404 | for drink, he moved this muffler with his left hand, only while he
405 | poured his liquor in with his right; as soon as that was done, he
406 | muffled again.
407 | 
408 | “No, Jerry, no!” said the messenger, harping on one theme as he rode.
409 | “It wouldn’t do for you, Jerry. Jerry, you honest tradesman, it wouldn’t
410 | suit _your_ line of business! Recalled--! Bust me if I don’t think he’d
411 | been a drinking!”
412 | 
413 | His message perplexed his mind to that degree that he was fain, several
414 | times, to take off his hat to scratch his head. Except on the crown,
415 | which was raggedly bald, he had stiff, black hair, standing jaggedly all
416 | over it, and growing down hill almost to his broad, blunt nose. It was
417 | so like Smith’s work, so much more like the top of a strongly spiked
418 | wall than a head of hair, that the best of players at leap-frog might
419 | have declined him, as the most dangerous man in the world to go over.
420 | 
421 | While he trotted back with the message he was to deliver to the night
422 | watchman in his box at the door of Tellson’s Bank, by Temple Bar, who
423 | was to deliver it to greater authorities within, the shadows of the
424 | night took such shapes to him as arose out of the message, and took such
425 | shapes to the mare as arose out of _her_ private topics of uneasiness.
426 | They seemed to be numerous, for she shied at every shadow on the road.
427 | 
428 | What time, the mail-coach lumbered, jolted, rattled, and bumped upon
429 | its tedious way, with its three fellow-inscrutables inside. To whom,
430 | likewise, the shadows of the night revealed themselves, in the forms
431 | their dozing eyes and wandering thoughts suggested.
432 | 
433 | Tellson’s Bank had a run upon it in the mail. As the bank
434 | passenger--with an arm drawn through the leathern strap, which did what
435 | lay in it to keep him from pounding against the next passenger,
436 | and driving him into his corner, whenever the coach got a special
437 | jolt--nodded in his place, with half-shut eyes, the little
438 | coach-windows, and the coach-lamp dimly gleaming through them, and the
439 | bulky bundle of opposite passenger, became the bank, and did a great
440 | stroke of business. The rattle of the harness was the chink of money,
441 | and more drafts were honoured in five minutes than even Tellson’s, with
442 | all its foreign and home connection, ever paid in thrice the time. Then
443 | the strong-rooms underground, at Tellson’s, with such of their valuable
444 | stores and secrets as were known to the passenger (and it was not a
445 | little that he knew about them), opened before him, and he went in among
446 | them with the great keys and the feebly-burning candle, and found them
447 | safe, and strong, and sound, and still, just as he had last seen them.
448 | 
449 | But, though the bank was almost always with him, and though the coach
450 | (in a confused way, like the presence of pain under an opiate) was
451 | always with him, there was another current of impression that never
452 | ceased to run, all through the night. He was on his way to dig some one
453 | out of a grave.
454 | 
455 | Now, which of the multitude of faces that showed themselves before him
456 | was the true face of the buried person, the shadows of the night did
457 | not indicate; but they were all the faces of a man of five-and-forty by
458 | years, and they differed principally in the passions they expressed,
459 | and in the ghastliness of their worn and wasted state. Pride, contempt,
460 | defiance, stubbornness, submission, lamentation, succeeded one another;
461 | so did varieties of sunken cheek, cadaverous colour, emaciated hands
462 | and figures. But the face was in the main one face, and every head was
463 | prematurely white. A hundred times the dozing passenger inquired of this
464 | spectre:
465 | 
466 | “Buried how long?”
467 | 
468 | The answer was always the same: “Almost eighteen years.”
469 | 
470 | “You had abandoned all hope of being dug out?”
471 | 
472 | “Long ago.”
473 | 
474 | “You know that you are recalled to life?”
475 | 
476 | “They tell me so.”
477 | 
478 | “I hope you care to live?”
479 | 
480 | “I can’t say.”
481 | 
482 | “Shall I show her to you? Will you come and see her?”
483 | 
484 | The answers to this question were various and contradictory. Sometimes
485 | the broken reply was, “Wait! It would kill me if I saw her too soon.”
486 |  Sometimes, it was given in a tender rain of tears, and then it was,
487 | “Take me to her.” Sometimes it was staring and bewildered, and then it
488 | was, “I don’t know her. I don’t understand.”
489 | 
490 | After such imaginary discourse, the passenger in his fancy would dig,
491 | and dig, dig--now with a spade, now with a great key, now with his
492 | hands--to dig this wretched creature out. Got out at last, with earth
493 | hanging about his face and hair, he would suddenly fan away to dust. The
494 | passenger would then start to himself, and lower the window, to get the
495 | reality of mist and rain on his cheek.
496 | 
497 | Yet even when his eyes were opened on the mist and rain, on the moving
498 | patch of light from the lamps, and the hedge at the roadside retreating
499 | by jerks, the night shadows outside the coach would fall into the train
500 | of the night shadows within. The real Banking-house by Temple Bar, the
501 | real business of the past day, the real strong rooms, the real express
502 | sent after him, and the real message returned, would all be there. Out
503 | of the midst of them, the ghostly face would rise, and he would accost
504 | it again.
505 | 
506 | “Buried how long?”
507 | 
508 | “Almost eighteen years.”
509 | 
510 | “I hope you care to live?”
511 | 
512 | “I can’t say.”
513 | 
514 | Dig--dig--dig--until an impatient movement from one of the two
515 | passengers would admonish him to pull up the window, draw his arm
516 | securely through the leathern strap, and speculate upon the two
517 | slumbering forms, until his mind lost its hold of them, and they again
518 | slid away into the bank and the grave.
519 | 
520 | “Buried how long?”
521 | 
522 | “Almost eighteen years.”
523 | 
524 | “You had abandoned all hope of being dug out?”
525 | 
526 | “Long ago.”
527 | 
528 | The words were still in his hearing as just spoken--distinctly in
529 | his hearing as ever spoken words had been in his life--when the weary
530 | passenger started to the consciousness of daylight, and found that the
531 | shadows of the night were gone.
532 | 
533 | He lowered the window, and looked out at the rising sun. There was a
534 | ridge of ploughed land, with a plough upon it where it had been left
535 | last night when the horses were unyoked; beyond, a quiet coppice-wood,
536 | in which many leaves of burning red and golden yellow still remained
537 | upon the trees. Though the earth was cold and wet, the sky was clear,
538 | and the sun rose bright, placid, and beautiful.
539 | 
540 | “Eighteen years!” said the passenger, looking at the sun. “Gracious
541 | Creator of day! To be buried alive for eighteen years!”


--------------------------------------------------------------------------------
/setup_dockerized_app_on_fresh_machine.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Update system packages
 4 | echo "Updating system packages..."
 5 | sudo apt-get update
 6 | 
 7 | # Install Docker
 8 | echo "Installing Docker..."
 9 | sudo apt-get install docker.io -y
10 | 
11 | # Start Docker service
12 | echo "Starting Docker service..."
13 | sudo systemctl start docker
14 | 
15 | # Display Docker version
16 | echo "Checking Docker version..."
17 | sudo docker --version
18 | 
19 | # Add the current user to the docker group
20 | echo "Adding current user to the Docker group..."
21 | sudo usermod -aG docker $USER
22 | 
23 | # Build the Docker image
24 | echo "Building the Docker image..."
25 | arch=$(uname -m)
26 | base_image="ubuntu:latest"
27 | 
28 | if [ "$arch" = "x86_64" ]; then
29 |   echo "Building for x86_64..."
30 |   sudo docker build --build-arg BASE_IMAGE=$base_image --build-arg ARCH="amd64" -t swiss-army-llama .
31 | elif [ "$arch" = "aarch64" ]; then
32 |   echo "Building for aarch64..."
33 |   sudo docker build --build-arg BASE_IMAGE=$base_image --build-arg ARCH="arm64" -t  swiss-army-llama .
34 | else
35 |   echo "Unsupported architecture."
36 |   exit 1
37 | fi
38 | 
39 | 
40 | # Run the Docker container
41 | echo "Running the Docker container..."
42 | sudo docker run -e TERM=$TERM -p 8089:8089 swiss-army-llama
43 | 
44 | echo "Script completed!"
45 | 


--------------------------------------------------------------------------------
/shared_resources.py:
--------------------------------------------------------------------------------
  1 | from misc_utility_functions import  is_redis_running, start_redis_server, build_faiss_indexes
  2 | from database_functions import DatabaseWriter, initialize_db, AsyncSessionLocal, delete_expired_rows 
  3 | from ramdisk_functions import setup_ramdisk, copy_models_to_ramdisk, check_that_user_has_required_permissions_to_manage_ramdisks
  4 | from logger_config import setup_logger
  5 | from aioredlock import Aioredlock
  6 | import aioredis
  7 | import asyncio
  8 | import urllib.request
  9 | import os
 10 | import glob
 11 | import json
 12 | from filelock import FileLock, Timeout
 13 | import traceback
 14 | import llama_cpp
 15 | from typing import List, Tuple, Dict
 16 | from decouple import config
 17 | from fastapi import HTTPException
 18 | from apscheduler.schedulers.asyncio import AsyncIOScheduler
 19 | try:
 20 |     import nvgpu
 21 |     GPU_AVAILABLE = True
 22 | except ImportError:
 23 |     GPU_AVAILABLE = False
 24 | logger = setup_logger()
 25 | 
 26 | embedding_model_cache = {} # Model cache to store loaded models
 27 | text_completion_model_cache = {} # Model cache to store loaded text completion models
 28 | 
 29 | SWISS_ARMY_LLAMA_SERVER_LISTEN_PORT = config("SWISS_ARMY_LLAMA_SERVER_LISTEN_PORT", default=8089, cast=int)
 30 | DEFAULT_MODEL_NAME = config("DEFAULT_MODEL_NAME", default="openchat_v3.2_super", cast=str) 
 31 | LLM_CONTEXT_SIZE_IN_TOKENS = config("LLM_CONTEXT_SIZE_IN_TOKENS", default=512, cast=int)
 32 | TEXT_COMPLETION_CONTEXT_SIZE_IN_TOKENS = config("TEXT_COMPLETION_CONTEXT_SIZE_IN_TOKENS", default=4000, cast=int)
 33 | DEFAULT_MAX_COMPLETION_TOKENS = config("DEFAULT_MAX_COMPLETION_TOKENS", default=100, cast=int)
 34 | DEFAULT_NUMBER_OF_COMPLETIONS_TO_GENERATE = config("DEFAULT_NUMBER_OF_COMPLETIONS_TO_GENERATE", default=4, cast=int)
 35 | DEFAULT_COMPLETION_TEMPERATURE = config("DEFAULT_COMPLETION_TEMPERATURE", default=0.7, cast=float)
 36 | MINIMUM_STRING_LENGTH_FOR_DOCUMENT_EMBEDDING = config("MINIMUM_STRING_LENGTH_FOR_DOCUMENT_EMBEDDING", default=15, cast=int)
 37 | USE_PARALLEL_INFERENCE_QUEUE = config("USE_PARALLEL_INFERENCE_QUEUE", default=False, cast=bool)
 38 | MAX_CONCURRENT_PARALLEL_INFERENCE_TASKS = config("MAX_CONCURRENT_PARALLEL_INFERENCE_TASKS", default=10, cast=int)
 39 | USE_RAMDISK = config("USE_RAMDISK", default=False, cast=bool)
 40 | USE_VERBOSE = config("USE_VERBOSE", default=False, cast=bool)
 41 | RAMDISK_PATH = config("RAMDISK_PATH", default="/mnt/ramdisk", cast=str)
 42 | BASE_DIRECTORY = os.path.dirname(os.path.abspath(__file__))
 43 | USE_AUTOMATIC_PURGING_OF_EXPIRED_RECORDS = config("USE_AUTOMATIC_PURGING_OF_EXPIRED_RECORDS", default=1, cast=bool)
 44 | 
 45 | if USE_AUTOMATIC_PURGING_OF_EXPIRED_RECORDS:
 46 |     scheduler = AsyncIOScheduler()
 47 |     scheduler.add_job(delete_expired_rows(AsyncSessionLocal), 'interval', hours=1)
 48 |     scheduler.start()
 49 | 
 50 | def is_gpu_available():
 51 |     if not GPU_AVAILABLE:
 52 |         return {
 53 |             "gpu_found": False,
 54 |             "num_gpus": 0,
 55 |             "first_gpu_vram": 0,
 56 |             "total_vram": 0,
 57 |             "error": "nvgpu module not found"
 58 |         }
 59 |     try:
 60 |         gpu_info = nvgpu.gpu_info()
 61 |         num_gpus = len(gpu_info)
 62 |         if num_gpus == 0:
 63 |             return {
 64 |                 "gpu_found": False,
 65 |                 "num_gpus": 0,
 66 |                 "first_gpu_vram": 0,
 67 |                 "total_vram": 0
 68 |             }
 69 |         first_gpu_vram = gpu_info[0]['mem_total']
 70 |         total_vram = sum(gpu['mem_total'] for gpu in gpu_info)
 71 |         return {
 72 |             "gpu_found": True,
 73 |             "num_gpus": num_gpus,
 74 |             "first_gpu_vram": first_gpu_vram,
 75 |             "total_vram": total_vram,
 76 |             "gpu_info": gpu_info
 77 |         }
 78 |     except Exception as e:
 79 |         return {
 80 |             "gpu_found": False,
 81 |             "num_gpus": 0,
 82 |             "first_gpu_vram": 0,
 83 |             "total_vram": 0,
 84 |             "error": str(e)
 85 |         }
 86 |         
 87 | async def initialize_globals():
 88 |     global db_writer, faiss_indexes, associated_texts_by_model_and_pooling_method, redis, lock_manager
 89 |     if not is_redis_running():
 90 |         logger.info("Starting Redis server...")
 91 |         start_redis_server()
 92 |         await asyncio.sleep(1)  # Sleep for 1 second to give Redis time to start
 93 |     redis = await aioredis.create_redis_pool('redis://localhost')
 94 |     lock_manager = Aioredlock([redis])
 95 |     lock_manager.default_lock_timeout = 20000  # Lock timeout in milliseconds (20 seconds)
 96 |     lock_manager.retry_count = 5  # Number of retries
 97 |     lock_manager.retry_delay_min = 100  # Minimum delay between retries in milliseconds
 98 |     lock_manager.retry_delay_max = 1000  # Maximum delay between retries in milliseconds
 99 |     await initialize_db()
100 |     queue = asyncio.Queue()
101 |     db_writer = DatabaseWriter(queue)
102 |     await db_writer.initialize_processing_hashes()
103 |     asyncio.create_task(db_writer.dedicated_db_writer())
104 |     global USE_RAMDISK
105 |     if USE_RAMDISK and not check_that_user_has_required_permissions_to_manage_ramdisks():
106 |         USE_RAMDISK = False
107 |     elif USE_RAMDISK:
108 |         setup_ramdisk()
109 |     list_of_downloaded_model_names, download_status = download_models()
110 |     faiss_indexes, associated_texts_by_model_and_pooling_method = await build_faiss_indexes()
111 | 
112 | 
113 | # other shared variables and methods
114 | db_writer = None
115 | faiss_indexes = None
116 | associated_texts_by_model_and_pooling_method = None
117 | redis = None
118 | lock_manager = None
119 | 
120 | def download_models() -> Tuple[List[str], List[Dict[str, str]]]:
121 |     download_status = []    
122 |     json_path = os.path.join(BASE_DIRECTORY, "model_urls.json")
123 |     if not os.path.exists(json_path):
124 |         initial_model_urls = [
125 |             "https://huggingface.co/NousResearch/Hermes-2-Theta-Llama-3-8B-GGUF/resolve/main/Hermes-2-Pro-Llama-3-Instruct-Merged-DPO-Q4_K_M.gguf",
126 |             "https://huggingface.co/QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct.Q3_K_S.gguf",
127 |             "https://huggingface.co/vonjack/bge-m3-gguf/resolve/main/bge-m3-q8_0.gguf"
128 |         ]
129 |         with open(json_path, "w") as f:
130 |             json.dump(initial_model_urls, f)
131 |     with open(json_path, "r") as f:
132 |         list_of_model_download_urls = json.load(f)
133 |     model_names = [os.path.basename(url) for url in list_of_model_download_urls]
134 |     current_file_path = os.path.abspath(__file__)
135 |     base_dir = os.path.dirname(current_file_path)
136 |     models_dir = os.path.join(base_dir, 'models')
137 |     logger.info("Checking models directory...")
138 |     if USE_RAMDISK:
139 |         ramdisk_models_dir = os.path.join(RAMDISK_PATH, 'models')
140 |         if not os.path.exists(RAMDISK_PATH):
141 |             setup_ramdisk()
142 |         if all(os.path.exists(os.path.join(ramdisk_models_dir, llm_model_name)) for llm_model_name in model_names):
143 |             logger.info("Models found in RAM Disk.")
144 |             for url in list_of_model_download_urls:
145 |                 download_status.append({"url": url, "status": "success", "message": "Model found in RAM Disk."})
146 |             return model_names, download_status
147 |     if not os.path.exists(models_dir):
148 |         os.makedirs(models_dir)
149 |         logger.info(f"Created models directory: {models_dir}")
150 |     else:
151 |         logger.info(f"Models directory exists: {models_dir}")
152 |     lock = FileLock(os.path.join(models_dir, "download.lock"))
153 |     for url, model_name_with_extension in zip(list_of_model_download_urls, model_names):
154 |         status = {"url": url, "status": "success", "message": "File already exists."}
155 |         filename = os.path.join(models_dir, model_name_with_extension)
156 |         try:
157 |             with lock.acquire(timeout=1200): # Wait up to 20 minutes for the file to be downloaded before returning failure
158 |                 if not os.path.exists(filename):
159 |                     logger.info(f"Downloading model {model_name_with_extension} from {url}...")
160 |                     urllib.request.urlretrieve(url, filename)
161 |                     file_size = os.path.getsize(filename) / (1024 * 1024)  # Convert bytes to MB
162 |                     if file_size < 100:
163 |                         os.remove(filename)
164 |                         status["status"] = "failure"
165 |                         status["message"] = "Downloaded file is too small, probably not a valid model file."
166 |                     else:
167 |                         logger.info(f"Downloaded: {filename}")
168 |                 else:
169 |                     logger.info(f"File already exists: {filename}")
170 |         except Timeout:
171 |             logger.warning(f"Could not acquire lock for downloading {model_name_with_extension}")
172 |             status["status"] = "failure"
173 |             status["message"] = "Could not acquire lock for downloading."
174 |         download_status.append(status)
175 |     if USE_RAMDISK:
176 |         copy_models_to_ramdisk(models_dir, ramdisk_models_dir)
177 |     logger.info("Model downloads completed.")
178 |     return model_names, download_status
179 | 
180 | def load_model(llm_model_name: str, raise_http_exception: bool = True):
181 |     global USE_VERBOSE
182 |     model_instance = None
183 |     try:
184 |         models_dir = os.path.join(RAMDISK_PATH, 'models') if USE_RAMDISK else os.path.join(BASE_DIRECTORY, 'models')
185 |         if llm_model_name in embedding_model_cache:
186 |             return embedding_model_cache[llm_model_name]
187 |         matching_files = glob.glob(os.path.join(models_dir, f"{llm_model_name}*"))
188 |         if not matching_files:
189 |             logger.error(f"No model file found matching: {llm_model_name}")
190 |             raise FileNotFoundError
191 |         matching_files.sort(key=os.path.getmtime, reverse=True)
192 |         model_file_path = matching_files[0]
193 |         gpu_info = is_gpu_available()
194 |         if 'llava' in llm_model_name:
195 |             is_llava_multimodal_model = 1
196 |         else:
197 |             is_llava_multimodal_model = 0
198 |         if not is_llava_multimodal_model:
199 |             if gpu_info['gpu_found']:
200 |                 try:
201 |                     model_instance = llama_cpp.Llama(model_path=model_file_path, embedding=True, n_ctx=LLM_CONTEXT_SIZE_IN_TOKENS, verbose=USE_VERBOSE, n_gpu_layers=-1) # Load the model with GPU acceleration
202 |                 except Exception as e:  # noqa: F841
203 |                     model_instance = llama_cpp.Llama(model_path=model_file_path, embedding=True, n_ctx=LLM_CONTEXT_SIZE_IN_TOKENS, verbose=USE_VERBOSE)
204 |             else:
205 |                 model_instance = llama_cpp.Llama(model_path=model_file_path, embedding=True, n_ctx=LLM_CONTEXT_SIZE_IN_TOKENS, verbose=USE_VERBOSE) # Load the model without GPU acceleration        
206 |             embedding_model_cache[llm_model_name] = model_instance
207 |         return model_instance
208 |     except TypeError as e:
209 |         logger.error(f"TypeError occurred while loading the model: {e}")
210 |         raise
211 |     except Exception as e:
212 |         logger.error(f"Exception occurred while loading the model: {e}")
213 |         traceback.print_exc()
214 |         if raise_http_exception:
215 |             raise HTTPException(status_code=404, detail="Model file not found")
216 |         else:
217 |             raise FileNotFoundError(f"No model file found matching: {llm_model_name}")


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Dicklesworthstone/swiss_army_llama/7bd155410ff2cdf71b4ddf4ccd5a626a600690b3/tests/conftest.py


--------------------------------------------------------------------------------
/tests/swiss_army_llama/test_audio_transcription_functions.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | import json
  3 | import os
  4 | import re
  5 | import shutil
  6 | import tempfile
  7 | from hashlib import sha3_256
  8 | from datetime import datetime
  9 | from fastapi import Request
 10 | from fastapi.datastructures import UploadFile
 11 | from sqlalchemy import create_engine, text as sql_text
 12 | from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine
 13 | from sqlalchemy.orm import sessionmaker
 14 | from swiss_army_llama import (normalize_logprobs, remove_pagination_breaks, sophisticated_sentence_splitter, get_transcript_from_db, save_transcript_to_db, execute_with_retry, db_writer,
 15 |                             merge_transcript_segments_into_combined_text, compute_and_store_transcript_embeddings, compute_transcript_with_whisper_from_audio_func, get_or_compute_transcript)
 16 | from embeddings_data_models import  AudioTranscript, AudioTranscriptResponse
 17 | 
 18 | 
 19 | DATABASE_URL = "sqlite+aiosqlite:///test_swiss_army_llama.sqlite"
 20 | engine = create_engine(DATABASE_URL)
 21 | async_engine = create_async_engine(DATABASE_URL)
 22 | 
 23 | TestingSessionLocal = sessionmaker(
 24 |     bind=engine,
 25 |     expire_on_commit=False,
 26 | )
 27 | 
 28 | # Async Session for testing
 29 | AsyncTestingSessionLocal = sessionmaker(
 30 |     bind=async_engine,
 31 |     class_=AsyncSession,
 32 |     expire_on_commit=False,
 33 | )
 34 | 
 35 | @pytest.mark.asyncio
 36 | async def test_get_and_save_transcript():
 37 |     audio_file_hash = "test_audio_file_hash"
 38 |     audio_file_name = "test_audio_file_name"
 39 |     audio_file_size_mb = 1.0
 40 |     transcript_segments = json.dumps({"test": "segment"})
 41 |     info = json.dumps({"test": "info"})
 42 |     ip_address = "127.0.0.1"
 43 |     request_time = datetime.now()
 44 |     response_time = datetime.now()
 45 |     total_time = 1.0
 46 |     combined_transcript_text = "test text"
 47 |     combined_transcript_text_list_of_metadata_dicts = json.dumps({"test": "metadata"})
 48 |     
 49 |     # Save transcript to DB
 50 |     await save_transcript_to_db(
 51 |         audio_file_hash, audio_file_name, audio_file_size_mb, transcript_segments, info,
 52 |         ip_address, request_time, response_time, total_time, combined_transcript_text,
 53 |         combined_transcript_text_list_of_metadata_dicts
 54 |     )
 55 |     await db_writer.dedicated_db_writer()
 56 | 
 57 |     # Raw SQL query to validate data using sql_text
 58 |     async with AsyncTestingSessionLocal() as session:
 59 |         query = sql_text("SELECT audio_file_name FROM audio_transcripts WHERE audio_file_hash=:audio_file_hash")
 60 |         result = await session.execute(query, {"audio_file_hash": audio_file_hash})
 61 |         row = result.fetchone()
 62 |         assert row[0] == audio_file_name
 63 |     
 64 |     # Get transcript from DB using execute_with_retry
 65 |     result = await execute_with_retry(get_transcript_from_db, audio_file_hash)
 66 |         
 67 |     assert isinstance(result, AudioTranscriptResponse)
 68 |     assert result.audio_file_name == audio_file_name
 69 |     assert result.audio_file_size_mb == audio_file_size_mb
 70 |     
 71 |     # Raw SQL query to validate data using sql_text
 72 |     async with AsyncTestingSessionLocal() as session:
 73 |         query = sql_text("SELECT audio_file_name FROM audio_transcripts WHERE audio_file_hash=:audio_file_hash")
 74 |         result = await session.execute(query, {"audio_file_hash": audio_file_hash})
 75 |         row = result.fetchone()
 76 |         assert row[0] == audio_file_name
 77 | 
 78 | @pytest.fixture(scope="module", autouse=True)
 79 | def setup_and_teardown():
 80 |     # Setup: Create tables
 81 |     async with AsyncTestingSessionLocal() as session:
 82 |         await session.run_sync(AudioTranscript.metadata.create_all)
 83 |     yield
 84 |     # Teardown: Drop tables
 85 |     async with AsyncTestingSessionLocal() as session:
 86 |         await session.run_sync(AudioTranscript.metadata.drop_all)
 87 | 
 88 | @pytest.mark.asyncio
 89 | async def test_text_related_functions():
 90 |     # Testing normalize_logprobs
 91 |     assert normalize_logprobs(5, 2, 10) == 0.375
 92 | 
 93 |     # Testing remove_pagination_breaks (utilizes 're' module)
 94 |     assert remove_pagination_breaks("This is a test-\nexample.") == "This is a testexample."
 95 |     
 96 |     # Using re for an additional test
 97 |     assert (re.match(r'^This is', 'This is a test')) is True
 98 | 
 99 |     # Testing sophisticated_sentence_splitter
100 |     assert sophisticated_sentence_splitter("This is a test. And another.") == ["This is a test.", "And another."]
101 | 
102 | 
103 | @pytest.mark.asyncio
104 | async def test_merge_transcript_segments_into_combined_text():
105 |     segments = [{"start": 0, "end": 2, "text": "Hi", "avg_logprob": -0.5},
106 |                 {"start": 2, "end": 5, "text": "there", "avg_logprob": -0.7}]
107 |     combined_text, metadata_dicts, sentences = merge_transcript_segments_into_combined_text(segments)
108 |     assert combined_text == "Hi there "
109 |     assert metadata_dicts[0]['model_confidence_score'] == 1.0
110 | 
111 | 
112 | @pytest.mark.asyncio
113 | async def test_get_or_compute_transcript():
114 |     # Preparing a fake audio file
115 |     audio_content = b"fake_audio_data"
116 |     audio_file = UploadFile("fake_audio.wav", file=tempfile.NamedTemporaryFile(delete=False))
117 |     audio_file.file.write(audio_content)
118 |     audio_file.file.seek(0)
119 |     
120 |     # Hashing the audio content using sha3_256
121 |     audio_hash = sha3_256(audio_content).hexdigest()
122 | 
123 |     # Simulate a request object
124 |     req = Request({"type": "http", "client": ("127.0.0.1", 12345)}, {})
125 |             
126 |     # Actual function call
127 |     result = await get_or_compute_transcript(audio_file, True, "test_model", req)
128 | 
129 |     # Validate
130 |     assert isinstance(result, AudioTranscriptResponse)
131 |     assert result.audio_file_name == "fake_audio.wav"
132 | 
133 |     # Compute the hash and validate
134 |     audio_hash = sha3_256(audio_content).hexdigest()
135 |     assert result.audio_file_hash == audio_hash  # Using the hash here for validation
136 | 
137 |     # Compute and store transcript embeddings (Mocking the function for test)
138 |     # Here, you can replace 'dummy_transcript' and 'dummy_model' with actual data if available
139 |     await compute_and_store_transcript_embeddings('dummy_transcript', [], 'dummy_model', '127.0.0.1', 'dummy_text', req)
140 | 
141 |     # Cleanup
142 |     audio_file.file.close()
143 |     shutil.rmtree('generated_transcript_embeddings_zip_files', ignore_errors=True)
144 |     os.remove(audio_file.file.name)
145 |     
146 | 
147 | @pytest.mark.asyncio
148 | async def test_compute_transcript_with_whisper_from_audio_func():
149 |     audio_file_hash = "test_audio_file_hash"
150 |     audio_file_path = "/path/to/audio/file"
151 |     audio_file_name = "test_audio_file_name.wav"
152 |     audio_file_size_mb = 1.0
153 |     ip_address = "127.0.0.1"
154 | 
155 |     # Simulate a request object
156 |     req = Request({"type": "http", "client": ("127.0.0.1", 12345)}, {})
157 | 
158 |     # Calling compute_transcript_with_whisper_from_audio_func
159 |     segment_details, info_dict, combined_transcript_text, combined_transcript_text_list_of_metadata_dicts, request_time, response_time, total_time, download_url = await compute_transcript_with_whisper_from_audio_func(
160 |         audio_file_hash, audio_file_path, audio_file_name, audio_file_size_mb, ip_address, req
161 |     )
162 | 
163 |     # Validate (since this is a test, you may need to adjust these assertions based on what compute_transcript_with_whisper_from_audio_func actually returns)
164 |     assert segment_details is not None
165 |     assert info_dict is not None
166 |     assert combined_transcript_text is not None
167 |     assert combined_transcript_text_list_of_metadata_dicts is not None
168 |     assert request_time is not None
169 |     assert response_time is not None
170 |     assert total_time is not None    


--------------------------------------------------------------------------------
/tests/swiss_army_llama/test_build_faiss_indexes.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import json
 3 | from unittest.mock import AsyncMock
 4 | import numpy as np
 5 | from swiss_army_llama import build_faiss_indexes, AsyncSessionLocal
 6 | import faiss
 7 | 
 8 | @pytest.mark.asyncio
 9 | async def test_build_faiss_indexes(monkeypatch):
10 |     # Mocking data returned from the database for embeddings
11 |     mock_embedding_data = [("model1", "text1", json.dumps([1.0, 1.0])), ("model1", "text2", json.dumps([1.0, 1.0]))]
12 |     
13 |     # Mocking SQLAlchemy execute method to return our mock data
14 |     async def mock_execute(*args, **kwargs):
15 |         if "SELECT llm_model_name, text, embedding_json FROM embeddings" in args[0]:
16 |             return AsyncMock(fetchall=AsyncMock(return_value=mock_embedding_data))()
17 | 
18 |     # Mocking the database session
19 |     monkeypatch.setattr(AsyncSessionLocal, "execute", mock_execute)
20 |     
21 |     # Run the function to test
22 |     faiss_indexes, associated_texts_by_model_and_pooling_method = await build_faiss_indexes()
23 |     
24 |     # Verify that FAISS indexes have been built for the mock data
25 |     assert "model1" in faiss_indexes
26 |     
27 |     # Verify that associated texts have been correctly identified
28 |     assert associated_texts_by_model_and_pooling_method["model1"] == ["text1", "text2"]
29 |     
30 |     # Verify that the FAISS index is valid
31 |     embedding_array = np.array([[1.0, 1.0], [1.0, 1.0]]).astype('float32')
32 |     faiss.normalize_L2(embedding_array)
33 |     assert faiss_indexes["model1"].ntotal == len(embedding_array)
34 | 


--------------------------------------------------------------------------------
/tests/swiss_army_llama/test_database_operations.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import asyncio
 3 | from datetime import datetime
 4 | from sqlalchemy import select
 5 | from swiss_army_llama import DatabaseWriter, execute_with_retry, engine, AsyncSessionLocal
 6 | from embeddings_data_models import Base, TextEmbedding, DocumentEmbedding, Document, AudioTranscript
 7 | from sqlalchemy.exc import OperationalError
 8 | 
 9 | @pytest.fixture(scope='module')
10 | async def setup_db():
11 |     async with engine.begin() as conn:
12 |         await conn.run_sync(Base.metadata.create_all)
13 |     yield
14 |     await engine.dispose()
15 | 
16 | @pytest.fixture
17 | def db_writer():
18 |     queue = asyncio.Queue()
19 |     return DatabaseWriter(queue)
20 | 
21 | @pytest.mark.asyncio
22 | @pytest.mark.usefixtures("setup_db")
23 | async def test_enqueue_text_embedding_write(db_writer):
24 |     async with AsyncSessionLocal() as session:
25 |         text_embedding = TextEmbedding(
26 |             text="text",
27 |             llm_model_name="model",
28 |             embedding_json="{}",
29 |             ip_address="127.0.0.1",
30 |             request_time=datetime.now(),
31 |             response_time=datetime.now(),
32 |             total_time=1.0
33 |         )
34 |         await db_writer.enqueue_write([text_embedding])
35 |         await db_writer.dedicated_db_writer()
36 |         result = await execute_with_retry(session, select(TextEmbedding).where(TextEmbedding.text == "text"), OperationalError)
37 |         assert result.scalar_one().text == "text"
38 | 
39 | @pytest.mark.asyncio
40 | @pytest.mark.usefixtures("setup_db")
41 | async def test_enqueue_document_embedding_write(db_writer):
42 |     async with AsyncSessionLocal() as session:
43 |         doc_embedding = DocumentEmbedding(
44 |             document_hash="doc_hash",
45 |             filename="file",
46 |             mimetype="text",
47 |             document_file_hash="document_file_hash",
48 |             llm_model_name="model",
49 |             file_data=b"data",
50 |             document_embedding_results_json={},
51 |             ip_address="127.0.0.1",
52 |             request_time=datetime.now(),
53 |             response_time=datetime.now(),
54 |             total_time=1.0
55 |         )
56 |         await db_writer.enqueue_write([doc_embedding])
57 |         await db_writer.dedicated_db_writer()
58 |         result = await execute_with_retry(session, select(DocumentEmbedding).where(DocumentEmbedding.filename == "file"), OperationalError)
59 |         assert result.scalar_one().filename == "file"
60 | 
61 | @pytest.mark.asyncio
62 | @pytest.mark.usefixtures("setup_db")
63 | async def test_enqueue_document_write(db_writer):
64 |     async with AsyncSessionLocal() as session:
65 |         document = Document(
66 |             llm_model_name="model",
67 |             document_hash="doc_hash"
68 |         )
69 |         await db_writer.enqueue_write([document])
70 |         await db_writer.dedicated_db_writer()
71 |         result = await execute_with_retry(session, select(Document).where(Document.document_hash == "doc_hash"), OperationalError)
72 |         assert result.scalar_one().document_hash == "doc_hash"
73 | 
74 | @pytest.mark.asyncio
75 | @pytest.mark.usefixtures("setup_db")
76 | async def test_enqueue_audio_transcript_write(db_writer):
77 |     async with AsyncSessionLocal() as session:
78 |         audio_transcript = AudioTranscript(
79 |             audio_file_hash="audio_hash",
80 |             audio_file_name="audio_name",
81 |             audio_file_size_mb=1.0,
82 |             segments_json={},
83 |             combined_transcript_text="text",
84 |             combined_transcript_text_list_of_metadata_dicts={},
85 |             info_json={},
86 |             ip_address="127.0.0.1",
87 |             request_time=datetime.now(),
88 |             response_time=datetime.now(),
89 |             total_time=1.0
90 |         )
91 |         await db_writer.enqueue_write([audio_transcript])
92 |         await db_writer.dedicated_db_writer()
93 |         result = await execute_with_retry(session, select(AudioTranscript).where(AudioTranscript.audio_file_hash == "audio_hash"), OperationalError)
94 |         assert result.scalar_one().audio_file_hash == "audio_hash"
95 | 


--------------------------------------------------------------------------------
/tests/swiss_army_llama/test_ramdisk_management.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import os
 3 | import subprocess
 4 | from swiss_army_llama import check_that_user_has_required_permissions_to_manage_ramdisks, setup_ramdisk, copy_models_to_ramdisk, clear_ramdisk, RAMDISK_PATH
 5 | 
 6 | @pytest.mark.skipif(not os.environ.get('RUN_SUDO_TESTS'), reason="requires admin rights")
 7 | def test_check_user_permission_for_ramdisk():
 8 |     assert check_that_user_has_required_permissions_to_manage_ramdisks() is True
 9 | 
10 | @pytest.mark.skipif(not os.environ.get('RUN_SUDO_TESTS'), reason="requires admin rights")
11 | def test_setup_ramdisk():
12 |     setup_ramdisk()
13 |     cmd_check = f"mount | grep {RAMDISK_PATH}"
14 |     result = subprocess.run(cmd_check, shell=True, stdout=subprocess.PIPE).stdout.decode('utf-8')
15 |     assert RAMDISK_PATH in result
16 | 
17 | @pytest.mark.skipif(not os.environ.get('RUN_SUDO_TESTS'), reason="requires admin rights")
18 | def test_copy_models_to_ramdisk():
19 |     models_directory = "./test_models/"
20 |     ramdisk_directory = RAMDISK_PATH
21 |     os.makedirs(models_directory, exist_ok=True)
22 |     with open(f"{models_directory}/dummy_model.bin", "wb") as f:
23 |         f.write(b"Dummy data")
24 |     copy_models_to_ramdisk(models_directory, ramdisk_directory)
25 |     assert os.path.exists(os.path.join(ramdisk_directory, "dummy_model.bin")) is True
26 | 
27 | @pytest.mark.skipif(not os.environ.get('RUN_SUDO_TESTS'), reason="requires admin rights")
28 | def test_clear_ramdisk():
29 |     clear_ramdisk()
30 |     cmd_check = f"mount | grep {RAMDISK_PATH}"
31 |     result = subprocess.run(cmd_check, shell=True, stdout=subprocess.PIPE).stdout.decode('utf-8')
32 |     assert RAMDISK_PATH not in result
33 | 


--------------------------------------------------------------------------------
/tests/swiss_army_llama/test_service_functions.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Dicklesworthstone/swiss_army_llama/7bd155410ff2cdf71b4ddf4ccd5a626a600690b3/tests/swiss_army_llama/test_service_functions.py


--------------------------------------------------------------------------------
/tests/swiss_army_llama/test_views.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Dicklesworthstone/swiss_army_llama/7bd155410ff2cdf71b4ddf4ccd5a626a600690b3/tests/swiss_army_llama/test_views.py


--------------------------------------------------------------------------------
/tests/test_log_viewer_functions.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pytest
 3 | from datetime import datetime, timedelta
 4 | from pytz import timezone
 5 | from log_viewer_functions import safe_highlight_func, highlight_rules_func, show_logs_func, show_logs_incremental_func
 6 | 
 7 | @pytest.fixture(scope="session", autouse=True)
 8 | def prepare_log_file():
 9 |     # Create a more realistic sample log file for testing
10 |     log_file_path = 'swiss_army_llama.log'
11 |     now = datetime.now(timezone('UTC'))
12 |     five_minutes_ago = now - timedelta(minutes=5)
13 |     sample_logs = f"""{five_minutes_ago.strftime('%Y-%m-%d %H:%M:%S,%f')[:-3]} - This is a success.
14 | {now.strftime('%Y-%m-%d %H:%M:%S,%f')[:-3]} - This is an error.
15 | """
16 |     with open(log_file_path, 'w') as f:
17 |         f.write(sample_logs)
18 |     yield
19 |     os.remove(log_file_path)
20 | 
21 | def test_safe_highlight_func():
22 |     assert safe_highlight_func("Hello world", r"world", "WORLD") == "Hello WORLD"
23 |     assert safe_highlight_func("Hello world", r"[", "WORLD") == "Hello world"
24 | 
25 | def test_highlight_rules_func():
26 |     assert highlight_rules_func("This is a success.") == 'This is a success.'
27 | 
28 | def test_show_logs_func(prepare_log_file):
29 |     logs = show_logs_func(5)
30 |     assert "success" in logs
31 |     assert "error" in logs
32 | 
33 | def test_show_logs_incremental_func(prepare_log_file):
34 |     result = show_logs_incremental_func(5, 0)
35 |     assert "success" in result["logs"]
36 |     assert "error" in result["logs"]
37 | 


--------------------------------------------------------------------------------
/tests/test_sentiment_score_generation.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import asyncio
 3 | from sentiment_score_generation import (
 4 |     generate_all_prompts,
 5 |     generate_llm_sentiment_score_prompt,
 6 |     combine_populated_prompts_with_source_text,
 7 |     validate_llm_generated_sentiment_response,
 8 |     run_llm_in_process,
 9 |     parallel_attempt,
10 |     combine_llm_generated_sentiment_responses,
11 |     analyze_focus_area_sentiments
12 | )
13 | 
14 | class TestSentimentScoreGeneration(unittest.TestCase):
15 | 
16 |     def setUp(self):
17 |         self.focus_key = "financial_investor_focus"
18 |         self.scoring_scale_explanation = "Test explanation"
19 |         self.source_text_positive = "The company has shown impressive growth this year."
20 |         self.source_text_negative = "The company's performance has been disappointing."
21 |         self.model_name = "Test model"
22 | 
23 |     def test_generate_all_prompts(self):
24 |         populated_prompts = generate_all_prompts(self.focus_key, self.scoring_scale_explanation)
25 |         self.assertIsInstance(populated_prompts, dict)
26 |         self.assertTrue('Optimistic' in populated_prompts)
27 | 
28 |     def test_generate_llm_sentiment_score_prompt(self):
29 |         prompt = generate_llm_sentiment_score_prompt('Optimistic', 'Positive outlook', 'Investors', self.scoring_scale_explanation)
30 |         self.assertIsInstance(prompt, str)
31 | 
32 |     def test_combine_populated_prompts_with_source_text(self):
33 |         combined = combine_populated_prompts_with_source_text('Test prompt', 'Test source')
34 |         self.assertIsInstance(combined, str)
35 | 
36 |     def test_validate_llm_generated_sentiment_response(self):
37 |         sentiment_score, justification = validate_llm_generated_sentiment_response('5 | Test justification', -10, 10)
38 |         self.assertIsInstance(sentiment_score, float)
39 |         self.assertIsInstance(justification, str)
40 | 
41 |     def test_run_llm_in_process(self):
42 |         result = run_llm_in_process('Test prompt', 'Test model')
43 |         self.assertIsInstance(result, dict)
44 | 
45 |     def test_parallel_attempt(self):
46 |         loop = asyncio.get_event_loop()
47 |         result = loop.run_until_complete(parallel_attempt('Test prompt', 'Test model'))
48 |         self.assertIsInstance(result, str)
49 | 
50 |     def test_combine_llm_generated_sentiment_responses(self):
51 |         outputs = ['5 | Justification', '6 | Justification']
52 |         mean_score, ci, iqr, iqr_pct, justifications = combine_llm_generated_sentiment_responses(outputs, -10, 10)
53 |         self.assertIsInstance(mean_score, float)
54 |         self.assertIsInstance(ci, list)
55 |         self.assertIsInstance(iqr, list)
56 |         self.assertIsInstance(iqr_pct, float)
57 |         self.assertIsInstance(justifications, list)
58 | 
59 |     def test_analyze_focus_area_sentiments(self):
60 |         loop = asyncio.get_event_loop()
61 |         result = loop.run_until_complete(analyze_focus_area_sentiments(self.focus_key, self.scoring_scale_explanation, self.source_text, self.model_name))
62 |         self.assertIsInstance(result, dict)
63 | 
64 |     def test_positive_sentiment(self):
65 |         loop = asyncio.get_event_loop()
66 |         result = loop.run_until_complete(analyze_focus_area_sentiments(self.focus_key, self.scoring_scale_explanation, self.source_text_positive, self.model_name))
67 |         self.assertIsInstance(result, dict)
68 |         self.assertGreater(result['individual_sentiment_report_dict']['Optimistic']['sentiment_scores_dict']['mean_sentiment_score'], 50)
69 | 
70 |     def test_negative_sentiment(self):
71 |         loop = asyncio.get_event_loop()
72 |         result = loop.run_until_complete(analyze_focus_area_sentiments(self.focus_key, self.scoring_scale_explanation, self.source_text_negative, self.model_name))
73 |         self.assertIsInstance(result, dict)
74 |         self.assertLess(result['individual_sentiment_report_dict']['Optimistic']['sentiment_scores_dict']['mean_sentiment_score'], -50)
75 |         
76 | if __name__ == '__main__':
77 |     unittest.main()
78 | 


--------------------------------------------------------------------------------
/uvicorn_config.py:
--------------------------------------------------------------------------------
 1 | from decouple import config
 2 | SWISS_ARMY_LLAMA_SERVER_LISTEN_PORT = config("SWISS_ARMY_LLAMA_SERVER_LISTEN_PORT", default=8089, cast=int)
 3 | UVICORN_NUMBER_OF_WORKERS = config("UVICORN_NUMBER_OF_WORKERS", default=3, cast=int)
 4 | 
 5 | option = {
 6 |     "host": "0.0.0.0",
 7 |     "port": SWISS_ARMY_LLAMA_SERVER_LISTEN_PORT,
 8 |     "workers": UVICORN_NUMBER_OF_WORKERS
 9 | }
10 | 


--------------------------------------------------------------------------------