├── .devcontainer └── devcontainer.json ├── .editorconfig ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.yml │ └── config.yml ├── PULL_REQUEST_TEMPLATE │ └── pull_request_template.md ├── scripts │ └── build_sdist_and_wheel.sh └── workflows │ ├── asv_benchmark_pr.yml │ ├── build_documentation.yml │ ├── publish_documentation.yml │ ├── release_docker.yml │ ├── release_pypi.yaml │ └── tests.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .pydocstyle ├── .readthedocs.yaml ├── .vscode └── settings.json ├── Dockerfile ├── LICENSE ├── README.md ├── benchmarks ├── __init__.py ├── asv.conf.json ├── bench_cfg_guide.py ├── bench_json_schema.py ├── bench_processors.py ├── bench_regex_guide.py └── common.py ├── docs ├── api │ ├── guide.md │ ├── index.md │ ├── json_schema.md │ ├── models.md │ ├── parsing.md │ ├── regex.md │ ├── samplers.md │ └── templates.md ├── assets │ └── images │ │ ├── dottxt-dark.svg │ │ ├── dottxt-light.svg │ │ ├── dottxt.png │ │ ├── logits_processing_diagram.svg │ │ ├── logo-dark-mode.png │ │ ├── logo-dark-mode.svg │ │ ├── logo-light-mode.png │ │ ├── logo-light-mode.svg │ │ ├── logo-simple.png │ │ ├── logo-square.png │ │ ├── logo-square.svg │ │ └── normal_computing.jpg ├── blog │ ├── .authors.yml │ ├── assets │ │ └── 4000_stars.png │ ├── index.md │ └── posts │ │ └── roadmap-2024.md ├── community │ ├── belonging.png │ ├── contribute.md │ ├── examples.md │ ├── feedback.md │ ├── index.md │ └── versioning.md ├── cookbook │ ├── atomic_caption.md │ ├── chain_of_density.md │ ├── chain_of_thought.md │ ├── classification.md │ ├── dating_profiles.md │ ├── deploy-using-bentoml.md │ ├── deploy-using-cerebrium.md │ ├── deploy-using-modal.md │ ├── earnings-reports.md │ ├── extract_event_details.md │ ├── extract_event_details.py │ ├── extraction.md │ ├── images │ │ ├── chain_of_density.png │ │ ├── coding_structure_diagram.png │ │ ├── knowledge-graph-extraction.png │ │ ├── nvidia-income.png │ │ ├── simtom.png │ │ └── trader-joes-receipt.jpg │ ├── index.md │ ├── knowledge_graph_extraction.md │ ├── models_playing_chess.md │ ├── qa-with-citations.md │ ├── react_agent.md │ ├── read-pdfs.md │ ├── receipt-digitization.md │ ├── simtom.md │ └── structured_generation_workflow.md ├── index.md ├── installation.md ├── licence.md ├── logos │ ├── amazon.png │ ├── apple.png │ ├── best_buy.png │ ├── canoe.png │ ├── cisco.png │ ├── dassault_systems.png │ ├── databricks.png │ ├── datadog.png │ ├── dbt_labs.png │ ├── gladia.jpg │ ├── harvard.png │ ├── hf.png │ ├── johns_hopkins.png │ ├── meta.png │ ├── mit.png │ ├── mount_sinai.png │ ├── nvidia.png │ ├── nyu.png │ ├── safran.png │ ├── salesforce.png │ ├── shopify.png │ ├── smithsonian.png │ ├── tinder.png │ └── upenn.png ├── overrides │ ├── home.html │ └── main.html ├── quickstart.md ├── reference │ ├── chat_templating.md │ ├── generation │ │ ├── cfg.md │ │ ├── choices.md │ │ ├── creating_grammars.md │ │ ├── custom_fsm_ops.md │ │ ├── format.md │ │ ├── generation.md │ │ ├── json.md │ │ ├── regex.md │ │ ├── structured_generation_explanation.md │ │ └── types.md │ ├── index.md │ ├── models │ │ ├── exllamav2.md │ │ ├── llamacpp.md │ │ ├── mlxlm.md │ │ ├── models.md │ │ ├── openai.md │ │ ├── tgi.md │ │ ├── transformers.md │ │ ├── transformers_vision.md │ │ └── vllm.md │ ├── prompting.md │ ├── regex_dsl.md │ ├── samplers.md │ ├── serve │ │ ├── lmstudio.md │ │ └── vllm.md │ └── text.md ├── stylesheets │ └── extra.css └── welcome.md ├── environment.yml ├── examples ├── babyagi.py ├── beam-cloud │ ├── README.md │ └── app.py ├── bentoml │ ├── .bentoignore │ ├── bentofile.yaml │ ├── import_model.py │ ├── requirements.txt │ └── service.py ├── cerebrium │ ├── cerebrium.toml │ └── main.py ├── cfg.py ├── dating_profile.py ├── llamacpp_example.py ├── llamacpp_processor.py ├── math_generate_code.py ├── meta_prompting.py ├── modal_example.py ├── parsing.py ├── pick_odd_one_out.py ├── react.py ├── sampling.ipynb ├── self_consistency.py ├── simulation_based_inference.ipynb ├── transformers_integration.py └── vllm_integration.py ├── flake.lock ├── flake.nix ├── mkdocs.yml ├── outlines ├── __init__.py ├── base.py ├── caching.py ├── fsm │ ├── __init__.py │ ├── guide.py │ ├── json_schema.py │ ├── parsing.py │ └── types.py ├── function.py ├── generate │ ├── __init__.py │ ├── api.py │ ├── cfg.py │ ├── choice.py │ ├── format.py │ ├── fsm.py │ ├── generator.py │ ├── json.py │ ├── regex.py │ └── text.py ├── grammars.py ├── grammars │ ├── arithmetic.lark │ ├── common.lark │ └── json.lark ├── models │ ├── __init__.py │ ├── exllamav2.py │ ├── llamacpp.py │ ├── mlxlm.py │ ├── openai.py │ ├── tokenizer.py │ ├── transformers.py │ ├── transformers_vision.py │ └── vllm.py ├── processors │ ├── __init__.py │ ├── base_logits_processor.py │ └── structured.py ├── py.typed ├── samplers.py ├── serve │ ├── __init__.py │ └── serve.py ├── templates.py └── types │ ├── __init__.py │ ├── airports.py │ ├── countries.py │ ├── dsl.py │ └── locale │ ├── __init__.py │ └── us.py ├── pyproject.toml ├── requirements-doc.txt ├── setup.cfg ├── shell.nix ├── tests ├── __init__.py ├── cfg_samples │ ├── arithmetic │ │ ├── lots_of_ops.arithmetic.test │ │ └── simple_math.arithmetic.test │ └── json │ │ ├── outlines.generate.samplers.mypy.json.test │ │ ├── simple_fruit.json.test │ │ └── simple_fruit_no_indent.json.test ├── conftest.py ├── fsm │ ├── partial_python.lark │ ├── test_cfg_guide.py │ ├── test_guide.py │ ├── test_json_schema.py │ ├── test_parsing.py │ └── test_types.py ├── generate │ ├── __init__.py │ ├── conftest.py │ ├── test_api.py │ ├── test_generate.py │ ├── test_generator.py │ ├── test_integration_exllamav2.py │ ├── test_integration_llamacpp.py │ ├── test_integration_transformers.py │ ├── test_integration_transformers_vision.py │ └── test_integration_vllm.py ├── models │ ├── test_mlxlm.py │ ├── test_openai.py │ ├── test_tokenizer.py │ └── test_transformers.py ├── processors │ └── test_base_processor.py ├── test_base.py ├── test_cache.py ├── test_function.py ├── test_grammars.py ├── test_samplers.py ├── test_templates.py └── types │ ├── test_custom_types.py │ ├── test_dsl.py │ └── test_to_regex.py └── uv.lock /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "dottxt-ai", 3 | "image": "mcr.microsoft.com/devcontainers/python:3.12", 4 | "runArgs": [ 5 | "--device=nvidia.com/gpu=all" 6 | ], 7 | "hostRequirements": { 8 | "gpu": "optional" 9 | }, 10 | "features": { 11 | "ghcr.io/devcontainers/features/conda:1": {}, 12 | "ghcr.io/devcontainers/features/nvidia-cuda:1": { 13 | "installCudnn": true, 14 | "installToolkit": true, 15 | "cudaVersion": "12.4" 16 | }, 17 | "ghcr.io/devcontainers/features/rust:1": {} 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # EditorConfig is awesome: https://EditorConfig.org 2 | 3 | # top-most EditorConfig file 4 | root = true 5 | 6 | [*] 7 | indent_style = space 8 | indent_size = 4 9 | end_of_line = lf 10 | charset = utf-8 11 | trim_trailing_whitespace = true 12 | insert_final_newline = true 13 | 14 | [*.yaml] 15 | indent_size = 2 16 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.yml: -------------------------------------------------------------------------------- 1 | # Issue template inspired by NumPy's excellent template: 2 | # https://github.com/numpy/numpy/edit/main/.github/ISSUE_TEMPLATE/bug-report.yml 3 | name: 🐞 Bug report 4 | description: Create a bug report to help us reproduce and fix it. 5 | title: "" 6 | labels: ["bug"] 7 | 8 | body: 9 | - type: markdown 10 | attributes: 11 | value: >- 12 | Thank you for taking the time to file a bug report. First, carefully read 13 | the following before everything else: 14 | 15 | - Does your issue only arise in a library that uses Outlines? If so, 16 | submit your issue to this library's issue tracker. 17 | - Did you check the issue tracker for open and closed issues that may be 18 | related to your bug? 19 | 20 | - type: textarea 21 | attributes: 22 | label: "Describe the issue as clearly as possible:" 23 | validations: 24 | required: true 25 | 26 | - type: textarea 27 | attributes: 28 | label: "Steps/code to reproduce the bug:" 29 | description: > 30 | A short code example that reproduces the problem/missing feature. It 31 | should be self-contained, i.e., can be copy-pasted into the Python 32 | interpreter or run as-is via `python myproblem.py`. 33 | placeholder: | 34 | import outlines 35 | 36 | << your code here >> 37 | render: python 38 | validations: 39 | required: true 40 | 41 | - type: textarea 42 | attributes: 43 | label: "Expected result:" 44 | description: > 45 | Please describe what you expect the above example to output. 46 | placeholder: | 47 | << the expected result here >> 48 | render: shell 49 | validations: 50 | required: true 51 | 52 | - type: textarea 53 | attributes: 54 | label: "Error message:" 55 | description: > 56 | Please include the full error message, if any. 57 | placeholder: | 58 | << Full traceback starting from `Traceback: ...` >> 59 | render: shell 60 | 61 | - type: textarea 62 | attributes: 63 | label: "Outlines/Python version information:" 64 | description: | 65 | Please run the following code and paste the output here. 66 | python -c "from outlines import _version; print(_version.__version__)"; 67 | python -c "import sys; print('Python', sys.version)"; 68 | pip freeze; 69 | value: | 70 | Version information 71 |
72 | ``` 73 | (command output here) 74 | ``` 75 |
76 | validations: 77 | required: true 78 | 79 | - type: textarea 80 | attributes: 81 | label: "Context for the issue:" 82 | description: | 83 | Please explain how this issue affects your work or why it should be prioritized. 84 | placeholder: | 85 | << your explanation here >> 86 | validations: 87 | required: false 88 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | contact_links: 2 | - name: 🤔 Questions & Help 3 | url: https://github.com/dottxt-ai/outlines/discussions/new 4 | about: "If you have a question about how to use Outlines, please start a discussion." 5 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE/pull_request_template.md: -------------------------------------------------------------------------------- 1 | # 🚧 Thank you for opening a PR! 2 | 3 | A few important guidelines and requirements before we can merge your PR: 4 | 5 | - [ ] We should be able to understand what the PR does from its title only; 6 | - [ ] There is a high-level description of the changes; 7 | - [ ] *If I add a new feature*, there is an [issue][issues] discussing it already; 8 | - [ ] There are links to *all* the relevant issues, discussions and PRs; 9 | - [ ] The branch is rebased on the latest `main` commit; 10 | - [ ] **Commit messages** follow these [guidelines][git-guidelines]; 11 | - [ ] One commit per logical change; 12 | - [ ] The code respects the current **naming conventions**; 13 | - [ ] Docstrings follow the [numpy style guide][docstring-guidelines]; 14 | - [ ] `pre-commit` is installed and configured on your machine, and you ran it before opening the PR; 15 | - [ ] There are tests covering the changes; 16 | - [ ] The documentation is up-to-date; 17 | 18 | Consider opening a **Draft PR** if your work is still in progress but you would 19 | like some feedback from other contributors. 20 | 21 | [issues]: https://github.com/dottxt-ai/outlines/issues 22 | [git-guidelines]: https://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html 23 | [docstring-guidelines]: https://numpydoc.readthedocs.io/en/latest/format.html 24 | -------------------------------------------------------------------------------- /.github/scripts/build_sdist_and_wheel.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Build sdist and wheel 4 | python -m pip install -U pip 5 | python -m pip install build 6 | python -m build 7 | 8 | # Check sdist install and imports 9 | mkdir -p test-sdist 10 | cd test-sdist 11 | python -m venv venv-sdist 12 | venv-sdist/bin/python -m pip install ../dist/outlines-*.tar.gz 13 | venv-sdist/bin/python -c "import outlines" 14 | cd .. 15 | 16 | # Check wheel install and imports 17 | mkdir -p test-wheel 18 | cd test-wheel 19 | python -m venv venv-wheel 20 | venv-wheel/bin/python -m pip install ../dist/outlines-*.whl 21 | venv-wheel/bin/python -c "import outlines" 22 | cd .. 23 | -------------------------------------------------------------------------------- /.github/workflows/asv_benchmark_pr.yml: -------------------------------------------------------------------------------- 1 | name: Benchmark PR 2 | 3 | on: 4 | push: 5 | pull_request: 6 | types: [synchronize, labeled] 7 | workflow_dispatch: 8 | env: 9 | PYTHON_VERSION: "3.10" 10 | WORKING_DIR: ${{ github.workspace }}/benchmarks 11 | BENCHMARKS_OUTPUT: ${{ github.workspace }}/benchmarks_output 12 | 13 | permissions: 14 | contents: read 15 | 16 | # Cancels all previous workflow runs for pull requests that have not completed. 17 | concurrency: 18 | # The concurrency group contains the workflow name and the branch name for pull requests 19 | # or the commit hash for any other events. 20 | group: ${{ github.workflow }}-${{ github.event_name == 'pull_request' && github.head_ref || github.sha }} 21 | cancel-in-progress: true 22 | 23 | jobs: 24 | benchmark-pr: 25 | runs-on: ubuntu-latest 26 | if: ${{ contains(github.event.pull_request.labels.*.name, 'run-benchmarks') || github.ref == 'refs/heads/main' }} 27 | 28 | defaults: 29 | run: 30 | working-directory: ${{ env.WORKING_DIR }} 31 | 32 | steps: 33 | 34 | - name: Checkout repository 35 | uses: actions/checkout@v3 36 | with: 37 | fetch-depth: 0 38 | 39 | - name: Set up Python 40 | uses: actions/setup-python@v4 41 | with: 42 | python-version: ${{ env.PYTHON_VERSION }} 43 | 44 | - name: Install dependencies 45 | run: | 46 | python -m pip install --upgrade pip 47 | pip install asv virtualenv lf-asv-formatter 48 | 49 | - name: Create ASV machine config file 50 | run: asv machine --machine gh-runner --yes 51 | 52 | - name: Run Benchmarks - `PR HEAD` vs `main` 53 | run: | 54 | # prepare main branch for comparison 55 | git remote add upstream https://github.com/${{ github.repository }}.git 56 | git fetch upstream main 57 | 58 | # Run benchmarks, allow errors, they will be caught in the next step 59 | asv continuous upstream/main HEAD \ 60 | --no-stats --interleave-rounds -a repeat=3 || true 61 | 62 | - name: BENCHMARK RESULTS 63 | run: | 64 | asv compare --factor=1.1 --no-stats --split upstream/main HEAD | tee ${{ env.BENCHMARKS_OUTPUT }} 65 | if grep -q "Benchmarks that have got worse" "${{ env.BENCHMARKS_OUTPUT }}"; then 66 | echo "Performance degradation detected!" 67 | exit 1 68 | fi 69 | -------------------------------------------------------------------------------- /.github/workflows/build_documentation.yml: -------------------------------------------------------------------------------- 1 | name: Build the documentation 2 | 3 | on: 4 | pull_request: 5 | branches: [main] 6 | workflow_dispatch: 7 | 8 | jobs: 9 | build: 10 | name: Build 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v4 14 | - uses: actions/setup-python@v4 15 | with: 16 | python-version: "3.10" 17 | 18 | - name: Build the documentation 19 | env: 20 | GOOGLE_ANALYTICS_KEY: ${{ secrets.GOOGLE_ANALYTICS_KEY }} 21 | run: | 22 | pip install -r requirements-doc.txt 23 | mkdocs build 24 | -------------------------------------------------------------------------------- /.github/workflows/publish_documentation.yml: -------------------------------------------------------------------------------- 1 | name: Publish the documentation 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: 7 | - main 8 | release: 9 | types: 10 | - created 11 | 12 | permissions: 13 | contents: write 14 | 15 | jobs: 16 | deploy: 17 | runs-on: ubuntu-latest 18 | steps: 19 | - uses: actions/checkout@v4 20 | with: 21 | fetch-depth: 0 22 | - uses: actions/setup-python@v4 23 | with: 24 | python-version: 3.x 25 | - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV 26 | - uses: actions/cache@v3 27 | with: 28 | key: mkdocs-material-${{ env.cache_id }} 29 | path: .cache 30 | restore-keys: | 31 | mkdocs-material- 32 | - run: pip install -r requirements-doc.txt 33 | - run: mkdocs build 34 | 35 | - name: Set up Git 36 | run: | 37 | git config user.name ${{ github.actor }} 38 | git config user.email ${{ github.actor }}@users.noreply.github.com 39 | 40 | - name: Publish Tag as latest 41 | env: 42 | GOOGLE_ANALYTICS_KEY: ${{ secrets.GOOGLE_ANALYTICS_KEY }} 43 | if: github.event_name == 'release' 44 | run: | 45 | mike deploy --push --update-aliases ${{ github.ref_name }} latest 46 | mike set-default --push latest 47 | 48 | - name: Publish main as unstable 49 | env: 50 | GOOGLE_ANALYTICS_KEY: ${{ secrets.GOOGLE_ANALYTICS_KEY }} 51 | if: github.event_name == 'push' 52 | run: | 53 | mike deploy --push --update-aliases ${{ github.ref_name }} unstable 54 | -------------------------------------------------------------------------------- /.github/workflows/release_docker.yml: -------------------------------------------------------------------------------- 1 | name: Release Docker 2 | 3 | on: 4 | release: 5 | types: 6 | - created 7 | workflow_dispatch: 8 | inputs: 9 | release_tag: 10 | description: 'Release Tag (for manual dispatch)' 11 | required: false 12 | default: 'latest' 13 | jobs: 14 | release-job: 15 | name: Build and publish on Docker Hub 16 | runs-on: ubuntu-latest 17 | environment: release 18 | steps: 19 | - name: Checkout 20 | uses: actions/checkout@v4 21 | - name: Log in to Docker Hub 22 | uses: docker/login-action@v3 23 | with: 24 | username: ${{ secrets.DOCKERHUB_USERNAME }} 25 | password: ${{ secrets.DOCKERHUB_TOKEN }} 26 | - name: Build and push Docker image 27 | uses: docker/build-push-action@v5 28 | with: 29 | push: true 30 | tags: | 31 | outlinesdev/outlines:latest 32 | outlinesdev/outlines:${{ github.event.release.tag_name || github.event.inputs.release_tag }} 33 | build-args: | 34 | BUILDKIT_CONTEXT_KEEP_GIT_DIR=true 35 | - name: Clean docker cache 36 | run: docker system prune --all --force 37 | -------------------------------------------------------------------------------- /.github/workflows/release_pypi.yaml: -------------------------------------------------------------------------------- 1 | name: Release PyPi 2 | 3 | on: 4 | release: 5 | types: 6 | - created 7 | jobs: 8 | release-job: 9 | name: Build and publish on PyPi 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Checkout 13 | uses: actions/checkout@v2 14 | - name: Set up Python 15 | uses: actions/setup-python@v2 16 | with: 17 | python-version: "3.10" 18 | - name: Build SDist and Wheel 19 | run: ./.github/scripts/build_sdist_and_wheel.sh 20 | - name: Check that the package version matches the Release name 21 | run: | 22 | grep -Rq "^Version: ${GITHUB_REF:10}$" outlines.egg-info/PKG-INFO 23 | - name: Publish to PyPi 24 | uses: pypa/gh-action-pypi-publish@v1.4.2 25 | with: 26 | user: __token__ 27 | password: ${{ secrets.PYPI_TOKEN }} 28 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: 4 | pull_request: 5 | branches: [main] 6 | push: 7 | branches: [main] 8 | 9 | jobs: 10 | style: 11 | name: Check the code style 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v3 15 | - uses: actions/setup-python@v4 16 | with: 17 | python-version: "3.10" 18 | - uses: pre-commit/action@v3.0.0 19 | 20 | tests: 21 | name: Run the tests 22 | runs-on: ubuntu-latest 23 | strategy: 24 | matrix: 25 | python-version: ["3.10"] 26 | steps: 27 | - uses: actions/checkout@v3 28 | - name: Set up Python ${{ matrix.python-version }} 29 | uses: actions/setup-python@v4 30 | with: 31 | python-version: ${{ matrix.python-version }} 32 | - name: Set up test environment 33 | run: | 34 | python -m pip install --upgrade pip 35 | pip install uv 36 | uv venv 37 | uv pip install -e .[test] 38 | - name: Create matrix id 39 | id: matrix-id 40 | env: 41 | MATRIX_CONTEXT: ${{ toJson(matrix) }} 42 | run: | 43 | echo $MATRIX_CONTEXT 44 | export MATRIX_ID=`echo $MATRIX_CONTEXT | md5sum | cut -c 1-32` 45 | echo $MATRIX_ID 46 | echo "::set-output name=id::$MATRIX_ID" 47 | - name: Run tests 48 | run: | 49 | uv run pytest -x --cov=outlines 50 | env: 51 | COVERAGE_FILE: .coverage.${{ steps.matrix-id.outputs.id }} 52 | - name: Upload coverage data 53 | uses: actions/upload-artifact@v4 54 | with: 55 | name: coverage-data 56 | path: .coverage.* 57 | if-no-files-found: ignore 58 | include-hidden-files: true 59 | # TODO FIXME: This is only using the last run 60 | overwrite: true 61 | 62 | coverage: 63 | name: Combine & check coverage. 64 | needs: tests 65 | runs-on: ubuntu-latest 66 | 67 | steps: 68 | - uses: actions/checkout@v3 69 | with: 70 | fetch-depth: 0 71 | 72 | - uses: actions/setup-python@v4 73 | with: 74 | cache: pip 75 | python-version: "3.11" 76 | 77 | - name: Set up environment 78 | run: | 79 | pip install --upgrade "coverage[toml]>=5.1" diff-cover 80 | 81 | - uses: actions/download-artifact@v4 82 | with: 83 | name: coverage-data 84 | 85 | - name: Fetch main for coverage diff 86 | run: | 87 | git fetch --no-tags --prune origin main 88 | 89 | - name: Combine coverage & fail if it's <100%. 90 | run: | 91 | python -m coverage combine 92 | python -m coverage html --skip-covered --skip-empty 93 | python -m coverage xml 94 | diff-cover coverage.xml --markdown-report=coverage.md --fail-under=100 || (cat coverage.md >> $GITHUB_STEP_SUMMARY && exit 1) 95 | 96 | - name: Upload HTML report if check failed. 97 | uses: actions/upload-artifact@v4 98 | with: 99 | name: html-report 100 | path: htmlcov 101 | # TODO FIXME: This is only using the last run 102 | overwrite: true 103 | if: ${{ failure() }} 104 | 105 | build-wheel: 106 | name: Build Wheel and Test SDist 107 | runs-on: ubuntu-latest 108 | steps: 109 | - uses: actions/checkout@v3 110 | - name: Build SDist and Wheel 111 | run: ./.github/scripts/build_sdist_and_wheel.sh 112 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | .benchmarks 3 | .cache 4 | .coverage 5 | .direnv 6 | .env 7 | .idea 8 | .pytest_cache 9 | .python-version 10 | .venv 11 | *_version.py 12 | *.egg-info 13 | *.gguf 14 | benchmarks/results 15 | build 16 | docs/build 17 | logs 18 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v5.0.0 4 | hooks: 5 | - id: check-merge-conflict 6 | - id: debug-statements 7 | - id: end-of-file-fixer 8 | - id: trailing-whitespace 9 | - repo: https://github.com/pre-commit/mirrors-mypy 10 | rev: v1.14.1 11 | hooks: 12 | - id: mypy 13 | args: [--allow-redefinition] 14 | exclude: ^examples/ 15 | additional_dependencies: [types-tqdm, types-Pillow] 16 | - repo: https://github.com/astral-sh/ruff-pre-commit 17 | rev: v0.9.1 18 | hooks: 19 | - id: ruff 20 | args: ["--config=pyproject.toml"] 21 | -------------------------------------------------------------------------------- /.pydocstyle: -------------------------------------------------------------------------------- 1 | [pydocstyle] 2 | convention = numpy 3 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | python: 4 | version: "3.8" 5 | install: 6 | - method: pip 7 | path: . 8 | extra_requirements: 9 | - rtd 10 | - requirements: requirements-doc.txt 11 | 12 | sphinx: 13 | builder: html 14 | configuration: docs/source/conf.py 15 | fail_on_warning: true 16 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.testing.pytestArgs": [ 3 | "tests" 4 | ], 5 | "python.testing.unittestEnabled": false, 6 | "python.testing.pytestEnabled": true 7 | } 8 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | ### Build stage 2 | FROM python:3.10 AS builder 3 | 4 | WORKDIR /outlines 5 | 6 | RUN pip install --upgrade pip 7 | 8 | # Copy necessary build components 9 | COPY pyproject.toml . 10 | COPY outlines ./outlines 11 | 12 | # Install outlines and outlines[serve] 13 | # .git required by setuptools-scm 14 | RUN --mount=source=.git,target=.git,type=bind \ 15 | pip install --no-cache-dir .[serve] 16 | 17 | ### Runtime stage 18 | FROM python:3.10 19 | WORKDIR /outlines 20 | COPY --from=builder /outlines /outlines 21 | 22 | # https://dottxt-ai.github.io/outlines/reference/vllm/ 23 | ENTRYPOINT ["python3", "-m", "outlines.serve.serve"] 24 | -------------------------------------------------------------------------------- /benchmarks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/benchmarks/__init__.py -------------------------------------------------------------------------------- /benchmarks/asv.conf.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": 1, 3 | "project": "Outlines", 4 | "project_url": "https://dottxt-ai.github.io/outlines/", 5 | "repo": "..", 6 | "branches": [ 7 | "HEAD" 8 | ], 9 | "build_command": [ 10 | "python -mpip install .[test]", 11 | "PIP_NO_BUILD_ISOLATION=false python -mpip wheel --no-deps --no-index -w {build_cache_dir} {build_dir}", 12 | ], 13 | "environment_type": "virtualenv", 14 | "show_commit_url": "https://github.com/dottxt-ai/outlines/commit/", 15 | "benchmark_dir": ".", 16 | "env_dir": "env", 17 | "results_dir": "results", 18 | "html_dir": "html", 19 | "build_cache_size": 8 20 | } 21 | -------------------------------------------------------------------------------- /benchmarks/bench_cfg_guide.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | from transformers import AutoTokenizer 4 | 5 | import outlines.grammars 6 | from outlines.caching import cache_disabled 7 | from outlines.fsm.guide import CFGGuide 8 | from outlines.models.transformers import TransformerTokenizer 9 | 10 | random.seed(42) 11 | 12 | 13 | def get_tiny_tokenizer(): 14 | """1000 tokens in vocabulary""" 15 | return TransformerTokenizer( 16 | AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-gpt2") 17 | ) 18 | 19 | 20 | benched_grammars = { 21 | "json": outlines.grammars.json, 22 | "arithmetic": outlines.grammars.arithmetic, 23 | } 24 | 25 | 26 | class CFGGuideBenchmark: 27 | params = benched_grammars.keys() 28 | 29 | def setup(self, grammar_name): 30 | self.tokenizer = get_tiny_tokenizer() 31 | self.prebuilt_cfg_guide = CFGGuide( 32 | benched_grammars[grammar_name], self.tokenizer 33 | ) 34 | 35 | @staticmethod 36 | def _run_random_cfg(guide, rejection_sampling=True): 37 | state = guide.initial_state 38 | token_ids = list(guide.tokenizer.vocabulary.values()) 39 | for i in range(40): 40 | # simulate ordering of logits top prob to lowest prob 41 | random.shuffle(token_ids) 42 | # simulate sampling and state update 43 | if rejection_sampling: 44 | next_token_id = next(guide.iter_valid_token_ids(state, token_ids)) 45 | state = guide.get_next_state(state, next_token_id) 46 | else: 47 | next_token_id = random.choice(guide.get_next_instruction(state).tokens) 48 | state = guide.get_next_state(state, next_token_id) 49 | 50 | @cache_disabled() 51 | def time_cfg_guide_setup(self, grammar_name): 52 | CFGGuide(benched_grammars[grammar_name], self.tokenizer) 53 | 54 | @cache_disabled() 55 | def time_cfg_guide_run_rejection_sampling(self, grammar): 56 | self._run_random_cfg(self.prebuilt_cfg_guide, rejection_sampling=True) 57 | 58 | @cache_disabled() 59 | def time_cfg_guide_run(self, grammar): 60 | self._run_random_cfg(self.prebuilt_cfg_guide, rejection_sampling=False) 61 | 62 | @cache_disabled() 63 | def peakmem_cfg_guide_run(self, grammar): 64 | self._run_random_cfg(self.prebuilt_cfg_guide) 65 | -------------------------------------------------------------------------------- /benchmarks/bench_json_schema.py: -------------------------------------------------------------------------------- 1 | from outlines_core.fsm.json_schema import build_regex_from_schema 2 | 3 | from outlines.caching import cache_disabled 4 | from outlines.fsm.guide import RegexGuide 5 | 6 | from .common import setup_tokenizer # noqa: E402 7 | 8 | simple_schema = """{ 9 | "$defs": { 10 | "Armor": { 11 | "enum": ["leather", "chainmail", "plate"], 12 | "title": "Armor", 13 | "type": "string" 14 | } 15 | }, 16 | "properties": { 17 | "name": {"maxLength": 10, "title": "Name", "type": "string"}, 18 | "age": {"title": "Age", "type": "integer"}, 19 | "armor": {"$ref": "#/$defs/Armor"}, 20 | "strength": {"title": "Strength", "type": "integer"}\ 21 | }, 22 | "required": ["name", "age", "armor", "strength"], 23 | "title": "Character", 24 | "type": "object" 25 | }""" 26 | 27 | 28 | complex_schema = """{ 29 | "$schema": "http://json-schema.org/draft-04/schema#", 30 | "title": "Schema for a recording", 31 | "type": "object", 32 | "definitions": { 33 | "artist": { 34 | "type": "object", 35 | "properties": { 36 | "id": {"type": "number"}, 37 | "name": {"type": "string"}, 38 | "functions": { 39 | "type": "array", 40 | "items": {"type": "string"} 41 | } 42 | }, 43 | "required": ["id", "name", "functions"] 44 | } 45 | }, 46 | "properties": { 47 | "id": {"type": "number"}, 48 | "work": { 49 | "type": "object", 50 | "properties": { 51 | "id": {"type": "number"}, 52 | "name": {"type": "string"}, 53 | "composer": {"$ref": "#/definitions/artist"} 54 | } 55 | }, 56 | "recording_artists": { 57 | "type": "array", 58 | "items": {"$ref": "#/definitions/artist"} 59 | } 60 | }, 61 | "required": ["id", "work", "recording_artists"] 62 | }""" 63 | 64 | schemas = dict(simple_schema=simple_schema, complex_schema=complex_schema) 65 | 66 | 67 | class JsonSchemaBenchmark: 68 | params = schemas.keys() 69 | 70 | def setup(self, schema_name): 71 | self.tokenizer = setup_tokenizer() 72 | self.schema = schemas[schema_name] 73 | 74 | @cache_disabled() 75 | def time_json_schema_to_fsm(self, schema_name): 76 | regex = build_regex_from_schema(self.schema) 77 | RegexGuide.from_regex(regex, self.tokenizer) 78 | -------------------------------------------------------------------------------- /benchmarks/bench_processors.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | import outlines.models as models 5 | from outlines.processors import OutlinesLogitsProcessor, RegexLogitsProcessor 6 | 7 | try: 8 | import mlx.core as mx 9 | except ImportError: 10 | pass 11 | 12 | try: 13 | import jax 14 | import jax.numpy as jnp 15 | except ImportError: 16 | pass 17 | 18 | 19 | def is_mlx_lm_allowed(): 20 | try: 21 | import mlx.core as mx 22 | except ImportError: 23 | return False 24 | return mx.metal.is_available() 25 | 26 | 27 | def is_jax_allowed(): 28 | try: 29 | import jax # noqa: F401 30 | except ImportError: 31 | return False 32 | return True 33 | 34 | 35 | def get_mock_processor_inputs(array_library, num_tokens=30000): 36 | """ 37 | logits: (4, 30,000 ) dtype=float 38 | input_ids shape: (4, 2048) dtype=int 39 | """ 40 | if array_library.startswith("torch"): 41 | device = array_library.split("_")[1] if "_" in array_library else "cpu" 42 | 43 | logits = torch.rand((4, num_tokens), dtype=torch.float, device=device) 44 | input_ids = torch.randint( 45 | low=0, high=num_tokens, size=(4, 2048), dtype=torch.int, device=device 46 | ) 47 | elif array_library == "numpy": 48 | logits = np.random.rand(4, num_tokens).astype(np.float32) 49 | input_ids = np.random.randint(low=0, high=num_tokens, size=(4, 2048)) 50 | elif array_library == "mlx": 51 | logits = mx.random.uniform( 52 | low=-1e9, high=1e9, shape=(4, num_tokens), dtype=mx.float32 53 | ) 54 | input_ids = mx.random.randint( 55 | low=0, high=num_tokens, shape=(4, 2048), dtype=mx.int32 56 | ) 57 | elif array_library == "jax": 58 | logits = jnp.random.uniform( 59 | key=jax.random.PRNGKey(0), shape=(4, num_tokens), dtype=jnp.float32 60 | ) 61 | input_ids = jnp.random.randint( 62 | key=jax.random.PRNGKey(0), low=0, high=num_tokens, shape=(4, 2048) 63 | ) 64 | else: 65 | raise ValueError 66 | 67 | return logits, input_ids 68 | 69 | 70 | class HalvingLogitsProcessor(OutlinesLogitsProcessor): 71 | """Simply halve the passed logits""" 72 | 73 | def process_logits(self, input_ids, logits): 74 | return logits / 2 75 | 76 | 77 | class LogitsProcessorPassthroughBenchmark: 78 | """ 79 | Benchmark the time it takes to convert between array frameworks 80 | This should be on the order of microseconds 81 | """ 82 | 83 | params = ["torch", "numpy"] 84 | if is_mlx_lm_allowed(): 85 | params += ["mlx"] 86 | if torch.cuda.is_available(): 87 | params += ["torch_cuda"] 88 | if torch.mps.is_available(): 89 | params += ["torch_mps"] 90 | if is_jax_allowed(): 91 | params += ["jax"] 92 | 93 | def setup(self, array_library): 94 | self.logits_processor = HalvingLogitsProcessor() 95 | 96 | self.logits, self.input_ids = get_mock_processor_inputs(array_library) 97 | 98 | def time_passthrough(self, *params): 99 | self.logits_processor(self.input_ids, self.logits) 100 | 101 | 102 | class LogitsProcessorStructuredBenchmark: 103 | """ 104 | Benchmark structured generation mask application for single decoder pass 105 | """ 106 | 107 | array_libraries = ["torch", "numpy"] 108 | if is_mlx_lm_allowed(): 109 | array_libraries += ["mlx"] 110 | if torch.cuda.is_available(): 111 | array_libraries += ["torch_cuda"] 112 | if torch.mps.is_available(): 113 | array_libraries += ["torch_mps"] 114 | 115 | # accept very many or very few tokens, respectively 116 | patterns = [r"[^Z]*", "Z*"] 117 | 118 | params = [array_libraries, patterns] 119 | param_names = ["array_library, pattern"] 120 | 121 | def setup(self, array_library, pattern): 122 | tokenizer = models.transformers("facebook/opt-125m", device="cpu").tokenizer 123 | 124 | self.logits_processor = RegexLogitsProcessor(pattern, tokenizer) 125 | 126 | self.logits, self.input_ids = get_mock_processor_inputs( 127 | array_library, len(tokenizer.vocabulary) 128 | ) 129 | 130 | def time_structured_generation(self, array_library, pattern): 131 | self.logits_processor(self.input_ids, self.logits) 132 | -------------------------------------------------------------------------------- /benchmarks/bench_regex_guide.py: -------------------------------------------------------------------------------- 1 | from outlines.caching import cache_disabled 2 | from outlines.fsm.guide import RegexGuide 3 | 4 | from .common import setup_tokenizer 5 | 6 | regex_samples = { 7 | "email": r"[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?", 8 | "complex_phone": "\\+?\\d{1,4}?[-.\\s]?\\(?\\d{1,3}?\\)?[-.\\s]?\\d{1,4}[-.\\s]?\\d{1,4}[-.\\s]?\\d{1,9}", 9 | "simple_phone": "\\+?[1-9][0-9]{7,14}", 10 | "date": r"([1-9]|0[1-9]|1[0-9]|2[0-9]|3[0-1])(\.|-|/)([1-9]|0[1-9]|1[0-2])(\.|-|/)([0-9][0-9]|19[0-9][0-9]|20[0-9][0-9])|([0-9][0-9]|19[0-9][0-9]|20[0-9][0-9])(\.|-|/)([1-9]|0[1-9]|1[0-2])(\.|-|/)([1-9]|0[1-9]|1[0-9]|2[0-9]|3[0-1])", 11 | "time": r"(0?[1-9]|1[0-2]):[0-5]\d\s?(am|pm)?", 12 | "ip": r"(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)", 13 | "url": r"(https?:\/\/)?([\da-z\.-]+)\.([a-z\.]{2,6})([\/\w \.-]*)*\/?", 14 | "ssn": r"\d{3}-\d{2}-\d{4}", 15 | "complex_span_constrained_relation_extraction": "(['\"\\ ,]?((?:of|resulting|case|which|cultures|a|core|extreme|selflessness|spiritual|various|However|both|vary|in|other|secular|the|religious|among|moral|and|It|object|worldviews|altruism|traditional|material|aspect|or|life|beings|virtue|is|however|opposite|concern|an|practice|it|for|s|quality|religions|In|Altruism|animals|happiness|many|become|principle|human|selfishness|may|synonym)['\"\\ ,]?)+['\"\\ ,]?\\s\\|\\s([^|\\(\\)\n]{1,})\\s\\|\\s['\"\\ ,]?((?:of|resulting|case|which|cultures|a|core|extreme|selflessness|spiritual|various|However|both|vary|in|other|secular|the|religious|among|moral|and|It|object|worldviews|altruism|traditional|material|aspect|or|life|beings|virtue|is|however|opposite|concern|an|practice|it|for|s|quality|religions|In|Altruism|animals|happiness|many|become|principle|human|selfishness|may|synonym)['\"\\ ,]?)+['\"\\ ,]?(\\s\\|\\s\\(([^|\\(\\)\n]{1,})\\s\\|\\s([^|\\(\\)\n]{1,})\\))*\\n)*", 16 | } 17 | 18 | 19 | class RegexGuideBenchmark: 20 | params = regex_samples.keys() 21 | 22 | def setup(self, pattern_name): 23 | self.tokenizer = setup_tokenizer() 24 | self.pattern = regex_samples[pattern_name] 25 | 26 | @cache_disabled() 27 | def time_regex_to_guide(self, pattern_name): 28 | RegexGuide.from_regex(self.pattern, self.tokenizer) 29 | 30 | 31 | class MemoryRegexGuideBenchmark: 32 | params = ["simple_phone", "complex_span_constrained_relation_extraction"] 33 | 34 | def setup(self, pattern_name): 35 | self.tokenizer = setup_tokenizer() 36 | self.pattern = regex_samples[pattern_name] 37 | 38 | @cache_disabled() 39 | def peakmem_regex_to_guide(self, pattern_name): 40 | RegexGuide.from_regex(self.pattern, self.tokenizer) 41 | -------------------------------------------------------------------------------- /benchmarks/common.py: -------------------------------------------------------------------------------- 1 | from transformers import AutoTokenizer 2 | 3 | from outlines.models.transformers import TransformerTokenizer 4 | 5 | 6 | def setup_tokenizer(): 7 | tokenizer = AutoTokenizer.from_pretrained("gpt2") 8 | return TransformerTokenizer(tokenizer) 9 | -------------------------------------------------------------------------------- /docs/api/guide.md: -------------------------------------------------------------------------------- 1 | ::: outlines.fsm.guide 2 | -------------------------------------------------------------------------------- /docs/api/index.md: -------------------------------------------------------------------------------- 1 | # API Reference 2 | -------------------------------------------------------------------------------- /docs/api/json_schema.md: -------------------------------------------------------------------------------- 1 | ::: outlines.fsm.json_schema 2 | -------------------------------------------------------------------------------- /docs/api/models.md: -------------------------------------------------------------------------------- 1 | ::: outlines.models 2 | -------------------------------------------------------------------------------- /docs/api/parsing.md: -------------------------------------------------------------------------------- 1 | ::: outlines.fsm.parsing 2 | -------------------------------------------------------------------------------- /docs/api/regex.md: -------------------------------------------------------------------------------- 1 | ::: outlines.generate.regex 2 | -------------------------------------------------------------------------------- /docs/api/samplers.md: -------------------------------------------------------------------------------- 1 | ::: outlines.samplers 2 | -------------------------------------------------------------------------------- /docs/api/templates.md: -------------------------------------------------------------------------------- 1 | ::: outlines.templates 2 | -------------------------------------------------------------------------------- /docs/assets/images/dottxt-light.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /docs/assets/images/dottxt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/assets/images/dottxt.png -------------------------------------------------------------------------------- /docs/assets/images/logo-dark-mode.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/assets/images/logo-dark-mode.png -------------------------------------------------------------------------------- /docs/assets/images/logo-light-mode.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/assets/images/logo-light-mode.png -------------------------------------------------------------------------------- /docs/assets/images/logo-simple.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/assets/images/logo-simple.png -------------------------------------------------------------------------------- /docs/assets/images/logo-square.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/assets/images/logo-square.png -------------------------------------------------------------------------------- /docs/assets/images/logo-square.svg: -------------------------------------------------------------------------------- 1 | 2 | 18 | 36 | 39 | 43 | 47 | 51 | 55 | 59 | 63 | 67 | 71 | 75 | 79 | 83 | 87 | 91 | 95 | 99 | 103 | 107 | 111 | 115 | 119 | 120 | 122 | 124 | 131 | 132 | 133 | 134 | -------------------------------------------------------------------------------- /docs/assets/images/normal_computing.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/assets/images/normal_computing.jpg -------------------------------------------------------------------------------- /docs/blog/.authors.yml: -------------------------------------------------------------------------------- 1 | authors: 2 | remilouf: 3 | name: Remi Louf 4 | description: author 5 | avatar: https://avatars.githubusercontent.com/u/3885044?v=4 6 | -------------------------------------------------------------------------------- /docs/blog/assets/4000_stars.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/blog/assets/4000_stars.png -------------------------------------------------------------------------------- /docs/blog/index.md: -------------------------------------------------------------------------------- 1 | # Blog 2 | -------------------------------------------------------------------------------- /docs/community/belonging.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/community/belonging.png -------------------------------------------------------------------------------- /docs/community/examples.md: -------------------------------------------------------------------------------- 1 | # Community projects and articles 2 | 3 | Publishing examples and articles about Outlines are a meaningful way to contribute to the community. Here is a list of projects we are aware of. Drop us a line if we forgot yours! 4 | 5 | [MMSG](https://github.com/leloykun/mmsg) is a Python library for generating interleaved text and image content in a structured format you can directly pass to downstream APIs. 6 | 7 | [Multimodal Structured Generation: CVPR's 2nd MMFM Challenge Technical Report](https://arxiv.org/abs/2406.11403) shows that Structured Generation can outperform finetuning, and maybe even multimodality, in document-image understanding tasks as part of CVPR's 2nd MMFM Challenge. 8 | 9 | [Chess LLM Arena](https://huggingface.co/spaces/mlabonne/chessllm) is a HuggingFace Space where you can make LLMs compete in a chess match. 10 | 11 | [LLM Data Gen](https://huggingface.co/spaces/lhoestq/LLM_DataGen) is a HuggingFace Space that generates synthetic dataset files in JSONLines format. 12 | 13 | [Fast, High-Fidelity LLM Decoding with Regex Constraints ](https://vivien000.github.io/blog/journal/llm-decoding-with-regex-constraints.html) presents an efficient alternative to Outlines's structured generation. 14 | 15 | [gigax](https://github.com/GigaxGames/gigax) is an Open-Source library that allows to create real-time LLM-powered NPCs for video games. 16 | 17 | [Improving Prompt Consistency with Structured Generations](https://huggingface.co/blog/evaluation-structured-outputs) shows how structured generation can improve consistency of evaluation runs by reducing sensitivity to changes in prompt format. 18 | 19 | [AskNews](https://asknews.app) is a news curation service processing 300k news articles per day in a structured way, with Outlines. 20 | -------------------------------------------------------------------------------- /docs/community/index.md: -------------------------------------------------------------------------------- 1 | # Community 2 | 3 | ![Belonging](belonging.png) 4 | 5 | Outlines exists for a community of users who believe software doesn't need to be complicated. Who share the same passion for Large Language Models but don't want to compromise on robustness. Together, we are bringing these powerful models back to the world of software. 6 | 7 | ## Connect on Discord 8 | 9 | The Outlines community lives on our Discord server. There you can ask questions, share ideas or just chat with people like you. Don't be a stranger and [join us][discord]. 10 | 11 | 12 | [discord]: https://discord.gg/UppQmhEpe8 13 | -------------------------------------------------------------------------------- /docs/community/versioning.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Versioning Guide 3 | --- 4 | 5 | # Versioning Guide 6 | 7 | 8 | The Outlines project follows a structured versioning scheme designed to provide clarity and minimize risk for downstream dependents. 9 | 10 | Each part of the version number (`major.minor.patch`) conveys information about the nature and impact of the changes included in the release. 11 | 12 | - **Major Releases** includes compatibility-breaking changes to core interfaces, such as `LogitsProcessor`s and `Guides`. 13 | - **Minor Releases** introduce changes of substance to internal or unexposed functionality. These changes are well tested and intended to maintain compatability with existing use of core interfaces. 14 | - **Patch Releases** address bug fixes and incorporate low-risk changes to improve stability and performance. 15 | 16 | ## Releases 17 | 18 | Releases along with release notes can be found on the [Outlines Releases GitHub Page](https://github.com/dottxt-ai/outlines/releases). 19 | 20 | ## Version Pinning Recommendations 21 | 22 | Here are our recommendations for managing dependencies on the Outlines package: 23 | 24 | **Small, Risk-Tolerant Projects:** Pin to a specific major version. 25 | 26 | **Large, Conservative Projects:** Pin to a specific minor version. 27 | -------------------------------------------------------------------------------- /docs/cookbook/classification.md: -------------------------------------------------------------------------------- 1 | # Classification 2 | 3 | Classification is a classic problem in NLP and finds many applications: spam detection, sentiment analysis, triaging of incoming requests, etc. We will use the example of a company that wants to sort support requests between those that require immediate attention (`URGENT`), those that can wait a little (`STANDARD`). You could easily extend the example by adding new labels. 4 | 5 | 6 | This tutorial shows how one can implement multi-label classification using Outlines. We will use two functionalities of the library: `generate.choice` and `generate.json`. 7 | 8 | As always, we start with initializing the model. Since we are GPU poor we will be using a quantized version of Mistal-7B-v0.1: 9 | 10 | ```python 11 | import outlines 12 | 13 | model = outlines.models.transformers("TheBloke/Mistral-7B-OpenOrca-AWQ", device="cuda") 14 | ``` 15 | 16 | We will use the following prompt template: 17 | 18 | ```python 19 | from outlines import Template 20 | 21 | 22 | customer_support = Template.from_string( 23 | """You are an experienced customer success manager. 24 | 25 | Given a request from a client, you need to determine when the 26 | request is urgent using the label "URGENT" or when it can wait 27 | a little with the label "STANDARD". 28 | 29 | # Examples 30 | 31 | Request: "How are you?" 32 | Label: STANDARD 33 | 34 | Request: "I need this fixed immediately!" 35 | Label: URGENT 36 | 37 | # TASK 38 | 39 | Request: {{ request }} 40 | Label: """ 41 | ) 42 | ``` 43 | 44 | ## Choosing between multiple choices 45 | 46 | Outlines provides a shortcut to do multi-label classification, using the `outlines.generate.choice` function to initialize a generator. Outlines uses multinomial sampling by default, here we will use the greedy sampler to get the label with the highest probability: 47 | 48 | ```python 49 | from outlines.samplers import greedy 50 | 51 | generator = outlines.generate.choice(model, ["URGENT", "STANDARD"], sampler=greedy()) 52 | ``` 53 | Outlines supports batched requests, so we will pass two requests to the model: 54 | 55 | ```python 56 | requests = [ 57 | "My hair is one fire! Please help me!!!", 58 | "Just wanted to say hi" 59 | ] 60 | 61 | prompts = [customer_support(request) for request in requests] 62 | ``` 63 | 64 | We can now asks the model to classify the requests: 65 | 66 | ```python 67 | labels = generator(prompts) 68 | print(labels) 69 | # ['URGENT', 'STANDARD'] 70 | ``` 71 | 72 | Now, you might be in a hurry and don't want to wait until the model finishes completion. After all, you only need to see the first letter of the response to know whether the request is urgent or standard. You can instead stream the response: 73 | 74 | ```python 75 | tokens = generator.stream(prompts) 76 | labels = ["URGENT" if "U" in token else "STANDARD" for token in next(tokens)] 77 | print(labels) 78 | # ['URGENT', 'STANDARD'] 79 | ``` 80 | 81 | ## Using JSON-structured generation 82 | 83 | Another (convoluted) way to do multi-label classification is to JSON-structured generation in Outlines. We first need to define our Pydantic schema that contains the labels: 84 | 85 | ```python 86 | from enum import Enum 87 | from pydantic import BaseModel 88 | 89 | 90 | class Label(str, Enum): 91 | urgent = "URGENT" 92 | standard = "STANDARD" 93 | 94 | 95 | class Classification(BaseModel): 96 | label: Label 97 | ``` 98 | 99 | and we can use `generate.json` by passing this Pydantic model we just defined, and call the generator: 100 | 101 | ```python 102 | generator = outlines.generate.json(model, Classification, sampler=greedy()) 103 | labels = generator(prompts) 104 | print(labels) 105 | # [Classification(label=), Classification(label=)] 106 | ``` 107 | -------------------------------------------------------------------------------- /docs/cookbook/deploy-using-cerebrium.md: -------------------------------------------------------------------------------- 1 | # Run Outlines using Cerebrium 2 | 3 | [Cerebrium](https://www.cerebrium.ai/) is a serverless AI infrastructure platform that makes it easier for companies to build and deploy AI based applications. They offer Serverless GPU's with low cold start times with over 12 varieties of GPU chips that auto scale and you only pay for the compute you use. 4 | 5 | In this guide we will show you how you can use Cerebrium to run programs written with Outlines on GPUs in the cloud. 6 | 7 | # Setup Cerebrium 8 | 9 | First, we install Cerebrium and login to get authenticated. 10 | 11 | ```bash 12 | pip install cerebrium 13 | cerebrium login 14 | ``` 15 | 16 | Then let us create our first project 17 | 18 | ```bash 19 | cerebrium init outlines-project 20 | ``` 21 | 22 | ## Setup Environment and Hardware 23 | 24 | You set up your environment and hardware in the cerebrium.toml file that was created using the init function above. 25 | 26 | ```toml 27 | [cerebrium.deployment] 28 | docker_base_image_url = "nvidia/cuda:12.1.1-runtime-ubuntu22.04" 29 | 30 | [cerebrium.hardware] 31 | cpu = 2 32 | memory = 14.0 33 | gpu = "AMPERE A10" 34 | gpu_count = 1 35 | provider = "aws" 36 | region = "us-east-1" 37 | 38 | [cerebrium.dependencies.pip] 39 | outline = "==0.0.37" 40 | transformers = "==4.38.2" 41 | datasets = "==2.18.0" 42 | accelerate = "==0.27.2" 43 | ``` 44 | 45 | ## Setup inference 46 | 47 | Running code in Cerebrium is like writing normal python with no special syntax. In a `main.py` file specify the following: 48 | 49 | ```python 50 | import outlines 51 | 52 | 53 | model = outlines.models.transformers("microsoft/Phi-3-mini-4k-instruct") 54 | 55 | schema = """{ 56 | "title": "Character", 57 | "type": "object", 58 | "properties": { 59 | "name": { 60 | "title": "Name", 61 | "maxLength": 10, 62 | "type": "string" 63 | }, 64 | "age": { 65 | "title": "Age", 66 | "type": "integer" 67 | }, 68 | "armor": {"$ref": "#/definitions/Armor"}, 69 | "weapon": {"$ref": "#/definitions/Weapon"}, 70 | "strength": { 71 | "title": "Strength", 72 | "type": "integer" 73 | } 74 | }, 75 | "required": ["name", "age", "armor", "weapon", "strength"], 76 | "definitions": { 77 | "Armor": { 78 | "title": "Armor", 79 | "description": "An enumeration.", 80 | "enum": ["leather", "chainmail", "plate"], 81 | "type": "string" 82 | }, 83 | "Weapon": { 84 | "title": "Weapon", 85 | "description": "An enumeration.", 86 | "enum": ["sword", "axe", "mace", "spear", "bow", "crossbow"], 87 | "type": "string" 88 | } 89 | } 90 | }""" 91 | 92 | generator = outlines.generate.json(model, schema) 93 | ``` 94 | 95 | On first deploy, it will download the model and store it on disk therefore for subsequent calls it will load the model from disk. 96 | 97 | Every function in Cerebrium is callable through an API endpoint. Code at the top most layer (ie: not in a function) is instantiated only when the container is spun up the first time so for subsequent calls, it will simply run the code defined in the function you call. 98 | 99 | To deploy an API that creates a new character when called with a prompt you can add the following code to `main.py`: 100 | 101 | ```python 102 | def generate( 103 | prompt: str = "Amiri, a 53 year old warrior woman with a sword and leather armor.", 104 | ): 105 | 106 | character = generator( 107 | f"[INST]Give me a character description. Describe {prompt}.[/INST]" 108 | ) 109 | 110 | return character 111 | ``` 112 | 113 | 114 | ## Run on the cloud 115 | 116 | ```bash 117 | cerebrium deploy 118 | ``` 119 | 120 | You will see your application deploy, install pip packages and download the model. Once completed it will output a CURL request you can use to call your endpoint. Just remember to end 121 | the url with the function you would like to call - in this case /generate. You should see your response returned! 122 | -------------------------------------------------------------------------------- /docs/cookbook/extract_event_details.md: -------------------------------------------------------------------------------- 1 | This recipe demonstrates how to use the `outlines` library to extract structured event details from a text message. 2 | We will extract the title, location, and start date and time from messages like the following: 3 | 4 | ```plaintext 5 | Hello Kitty, my grandmother will be here, I think it's better to postpone 6 | our appointment to review math lessons to next Monday at 2pm at the same 7 | place, 3 avenue des tanneurs, one hour will be enough see you 😘 8 | ``` 9 | 10 | Let see how to extract the event details from the message with the MLX 11 | library dedicated to Apple Silicon processor (M series). 12 | 13 | ```python 14 | --8<-- "docs/cookbook/extract_event_details.py" 15 | ``` 16 | 17 | The output will be: 18 | 19 | ```plaintext 20 | Today: Saturday 16 November 2024 and it's 10:55 21 | ``` 22 | 23 | and the extracted event information will be: 24 | 25 | ```json 26 | { 27 | "title":"Math Review", 28 | "location":"3 avenue des tanneurs", 29 | "start":"2024-11-22T14:00:00Z" 30 | } 31 | ``` 32 | 33 | 34 | To find out more about this use case, we recommend the project developped by [Joseph Rudoler](https://x.com/JRudoler) the [ICS Generator](https://github.com/jrudoler/ics-generator) 35 | -------------------------------------------------------------------------------- /docs/cookbook/extract_event_details.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | from pydantic import BaseModel, Field 4 | 5 | from outlines import generate, models 6 | 7 | # Load the model 8 | model = models.mlxlm("mlx-community/Hermes-3-Llama-3.1-8B-8bit") 9 | 10 | 11 | # Define the event schema using Pydantic 12 | class Event(BaseModel): 13 | title: str = Field(description="title of the event") 14 | location: str 15 | start: datetime = Field( 16 | default=None, description="date of the event if available in iso format" 17 | ) 18 | 19 | 20 | # Get the current date and time 21 | now = datetime.now().strftime("%A %d %B %Y and it's %H:%M") 22 | 23 | # Define the prompt 24 | prompt = f""" 25 | Today's date and time are {now} 26 | Given a user message, extract information of the event like date and time in iso format, location and title. 27 | If the given date is relative, think step by step to find the right date. 28 | Here is the message: 29 | """ 30 | 31 | # Sample message 32 | message = """Hello Kitty, my grandmother will be here , I think it's better to postpone our 33 | appointment to review math lessons to next Friday at 2pm at the same place, 3 avenue des tanneurs, I think that one hour will be enough 34 | see you 😘 """ 35 | 36 | # Create the generator 37 | generator = generate.json(model, Event) 38 | 39 | # Extract the event information 40 | event = generator(prompt + message) 41 | 42 | # Print the current date and time 43 | print(f"Today: {now}") 44 | 45 | # Print the extracted event information in JSON format 46 | print(event.json()) 47 | -------------------------------------------------------------------------------- /docs/cookbook/extraction.md: -------------------------------------------------------------------------------- 1 | # Named entity extraction 2 | 3 | Named Entity Extraction is a fundamental problem in NLP. It involves identifying and categorizing named entities within a document: people, organization, dates, places, etc. It is usually the first step in a more complex NLP worklow. Here we will use the example of a pizza restaurant that receives orders via their website and need to identify the number and types of pizzas that are being ordered. 4 | 5 | Getting LLMs to output the extracted entities in a structured format can be challenging. In this tutorial we will see how we can use Outlines' JSON-structured generation to extract entities from a document and return them in a valid JSON data structure 100% of the time. 6 | 7 | As always, we start with initializing the model. We will be using a quantized version of Mistal-7B-v0.1 (we're GPU poor): 8 | 9 | ```python 10 | import outlines 11 | 12 | model = outlines.models.transformers("TheBloke/Mistral-7B-OpenOrca-AWQ", device="cuda") 13 | ``` 14 | 15 | And we will be using the following prompt template: 16 | 17 | ```python 18 | from outlines import Template 19 | 20 | 21 | take_order = Template.from_string( 22 | """You are the owner of a pizza parlor. Customers \ 23 | send you orders from which you need to extract: 24 | 25 | 1. The pizza that is ordered 26 | 2. The number of pizzas 27 | 28 | # EXAMPLE 29 | 30 | ORDER: I would like one Margherita pizza 31 | RESULT: {"pizza": "Margherita", "number": 1} 32 | 33 | # OUTPUT INSTRUCTIONS 34 | 35 | Answer in valid JSON. Here are the different objects relevant for the output: 36 | 37 | Order: 38 | pizza (str): name of the pizza 39 | number (int): number of pizzas 40 | 41 | Return a valid JSON of type "Order" 42 | 43 | # OUTPUT 44 | 45 | ORDER: {{ order }} 46 | RESULT: """ 47 | ) 48 | ``` 49 | 50 | We now define our data model using Pydantic: 51 | 52 | ```python 53 | from enum import Enum 54 | from pydantic import BaseModel 55 | 56 | class Pizza(str, Enum): 57 | margherita = "Margherita" 58 | pepperonni = "Pepperoni" 59 | calzone = "Calzone" 60 | 61 | class Order(BaseModel): 62 | pizza: Pizza 63 | number: int 64 | ``` 65 | 66 | We can now define our generator and call it on several incoming orders: 67 | 68 | ```python 69 | orders = [ 70 | "Hi! I would like to order two pepperonni pizzas and would like them in 30mins.", 71 | "Is it possible to get 12 margheritas?" 72 | ] 73 | prompts = [take_order(order) for order in orders] 74 | 75 | generator = outlines.generate.json(model, Order) 76 | 77 | results = generator(prompts) 78 | print(results) 79 | # [Order(pizza=, number=2), 80 | # Order(pizza=, number=12)] 81 | ``` 82 | 83 | There are several ways you could improve this example: 84 | 85 | - Clients may order several types of pizzas. 86 | - Clients may order drinks as well. 87 | - If the pizza place has a delivery service we need to extract the client's address and phone number 88 | - Clients may specify the time for which they want the pizza. We could then check against a queuing system and reply to them with the estimated delivery time. 89 | 90 | How would you change the Pydantic model to account for these use cases? 91 | -------------------------------------------------------------------------------- /docs/cookbook/images/chain_of_density.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/cookbook/images/chain_of_density.png -------------------------------------------------------------------------------- /docs/cookbook/images/coding_structure_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/cookbook/images/coding_structure_diagram.png -------------------------------------------------------------------------------- /docs/cookbook/images/knowledge-graph-extraction.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/cookbook/images/knowledge-graph-extraction.png -------------------------------------------------------------------------------- /docs/cookbook/images/nvidia-income.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/cookbook/images/nvidia-income.png -------------------------------------------------------------------------------- /docs/cookbook/images/simtom.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/cookbook/images/simtom.png -------------------------------------------------------------------------------- /docs/cookbook/images/trader-joes-receipt.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/cookbook/images/trader-joes-receipt.jpg -------------------------------------------------------------------------------- /docs/cookbook/index.md: -------------------------------------------------------------------------------- 1 | # Examples 2 | 3 | This part of the documentation provides a few cookbooks that you can browse to get acquainted with the library and get some inspiration about what you could do with structured generation. Remember that you can easily change the model that is being used! 4 | 5 | - [Classification](classification.md): Classify customer requests. 6 | - [Named Entity Extraction](extraction.md): Extract information from pizza orders. 7 | - [Dating Profile](dating_profiles.md): Build dating profiles from descriptions using prompt templating and JSON-structured generation. 8 | - [Chain Of Density](chain_of_density.md): Summarize documents using chain of density prompting and JSON-structured generation. 9 | - [Playing Chess](models_playing_chess.md): Make Phi-3 Mini play chess against itself using regex-structured generation. 10 | - [SimToM](simtom.md): Improve LLMs' Theory of Mind capabilities with perspective-taking prompting and JSON-structured generation. 11 | - [Q&A with Citations](qa-with-citations.md): Answer questions and provide citations using JSON-structured generation. 12 | - [Knowledge Graph Generation](knowledge_graph_extraction.md): Generate a Knowledge Graph from unstructured text using JSON-structured generation. 13 | - [Chain Of Thought (CoT)](chain_of_thought.md): Generate a series of intermediate reasoning steps using regex-structured generation. 14 | - [ReAct Agent](react_agent.md): Build an agent with open weights models using regex-structured generation. 15 | - [Earnings reports to CSV](earnings-reports.md): Extract data from earnings reports to CSV using regex-structured generation. 16 | - [Vision-Language Models](atomic_caption.md): Use Outlines with vision-language models for tasks like image captioning and visual reasoning. 17 | - [Receipt Digitization](receipt-digitization.md): Extract information from a picture of a receipt using structured generation. 18 | - [Structured Generation from PDFs](read-pdfs.md): Use Outlines with vision-language models to read PDFs and produce structured output. 19 | -------------------------------------------------------------------------------- /docs/cookbook/models_playing_chess.md: -------------------------------------------------------------------------------- 1 | # Large language models playing chess 2 | 3 | In this example we will make a Phi-2 model play chess against itself. On its own the model easily generates invalid moves, so we will give it a little help. At each step we will generate a regex that only matches valid move, and use it to help the model only generating valid moves. 4 | 5 | ## The chessboard 6 | 7 | The game will be played on a standard checkboard. We will use the `chess` [library](https://github.com/niklasf/python-chess) to track the opponents' moves, and check that the moves are valid. 8 | 9 | ```python 10 | %pip install outlines -q 11 | %pip install chess -q 12 | %pip install transformers accelerate einops -q 13 | 14 | import chess 15 | 16 | board = chess.Board("rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1") 17 | ``` 18 | 19 | ## The opponents 20 | 21 | Phi-2 will be playing against itself: 22 | 23 | ```python 24 | from outlines import models 25 | 26 | model = models.transformers("microsoft/Phi-3-mini-4k-instruct") 27 | 28 | ``` 29 | 30 | ## A little help for the language model 31 | 32 | To make sure Phi-2 generates valid chess moves we will use Outline's regex-structured generation. We define a function that takes the current state of the board and returns a regex that matches all possible legal moves: 33 | 34 | ```python 35 | import re 36 | 37 | def legal_moves_regex(board): 38 | """Build a regex that only matches valid moves.""" 39 | legal_moves = list(board.legal_moves) 40 | legal_modes_str = [board.san(move) for move in legal_moves] 41 | legal_modes_str = [re.sub(r"[+#]", "", move) for move in legal_modes_str] 42 | regex_pattern = "|".join(re.escape(move) for move in legal_modes_str) 43 | regex_pattern = f"{regex_pattern}" 44 | return regex_pattern 45 | ``` 46 | 47 | ## Prompting the language model 48 | 49 | The prompt corresponds to the current state of the board, so we start with: 50 | 51 | ```python 52 | prompt = "Let's play Chess. Moves: " 53 | 54 | ``` 55 | 56 | We update the prompt at each step so it reflects the state of the board after the previous move. 57 | 58 | ## Let's play 59 | 60 | ```python 61 | from outlines import generate 62 | 63 | board_state = " " 64 | turn_number = 0 65 | while not board.is_game_over(): 66 | regex_pattern = legal_moves_regex(board) 67 | structured = generate.regex(model, regex_pattern)(prompt + board_state) 68 | move = board.parse_san(structured) 69 | 70 | if turn_number % 2 == 0 : # It's White's turn 71 | board_state += board.san(move) + " " 72 | else: 73 | board_state += board.san(move) + " " + str(turn_number) + "." 74 | 75 | turn_number += 1 76 | 77 | board.push(move) 78 | 79 | print(board_state) 80 | ``` 81 | 82 | Interestingly enough, Phi-2 hates capturing. 83 | 84 | ```pgn 85 | e4 e5 1.Nf3 Ne7 3.b4 Nf5 5.Nc3 Ne7 7.Bb5 a6 9.Na4 b6 11.c3 Nec6 13.c4 a5 15.d4 Qg5 17.Nd2 Bb7 19.dxe5 86 | ``` 87 | 88 | *This example was originally authored by [@903124S](https://x.com/903124S) in [this gist](https://gist.github.com/903124/cfbefa24da95e2316e0d5e8ef8ed360d).* 89 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Outlines 3 | template: home.html # Note that this is managed in overrides/home.html 4 | hide: 5 | - navigation 6 | - toc 7 | - feedback 8 | --- 9 | -------------------------------------------------------------------------------- /docs/installation.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Installation 3 | --- 4 | 5 | # Installation 6 | 7 | You can install Outlines with `pip`: 8 | 9 | ```sh 10 | pip install outlines 11 | ``` 12 | 13 | Outlines supports OpenAI, Transformers, Mamba, llama.cpp, and ExLlamaV2, but **you will need to install them manually**: 14 | 15 | ```sh 16 | pip install openai 17 | pip install transformers datasets accelerate torch 18 | pip install llama-cpp-python 19 | pip install exllamav2 transformers torch 20 | pip install mamba_ssm transformers torch 21 | pip install vllm 22 | ``` 23 | 24 | If you encounter any problems using Outlines with these libraries, take a look at their installation instructions. The installation of `openai` and `transformers` should be straightforward, but other libraries have specific hardware requirements. 25 | 26 | ## Optional Dependencies 27 | 28 | Outlines provides multiple optional dependency sets to support different backends and use cases. You can install them as needed using: 29 | 30 | - `pip install "outlines[vllm]"` for [vLLM](https://github.com/vllm-project/vllm), optimized for high-throughput inference. 31 | - `pip install "outlines[transformers]"` for [Hugging Face Transformers](https://huggingface.co/docs/transformers/index). 32 | - `pip install "outlines[mlx]"` for [MLX-LM](https://github.com/ml-explore/mlx-lm), optimized for Apple silicon. 33 | - `pip install "outlines[openai]"` to use OpenAI’s API. 34 | - `pip install "outlines[llamacpp]"` for [llama.cpp](https://github.com/ggerganov/llama.cpp), a lightweight LLM inference engine. 35 | - `pip install "outlines[exllamav2]"` for [ExLlamaV2](https://github.com/turboderp/exllamav2), optimized for NVIDIA GPUs. 36 | 37 | ## Bleeding Edge 38 | 39 | You can install the latest version of Outlines from the repository's `main` branch: 40 | 41 | ```sh 42 | pip install git+https://github.com/dottxt-ai/outlines.git@main 43 | ``` 44 | 45 | This can be useful, for instance, when a fix has been merged but not yet released. 46 | 47 | ## Installing for Development 48 | 49 | See the [contributing documentation](community/contribute.md) for instructions on how to install Outlines for development, including an example using the `dot-install` method for one of the backends. 50 | -------------------------------------------------------------------------------- /docs/licence.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Licence 3 | --- 4 | 5 | # Licence and citations 6 | 7 | Outlines is licenced under the Apache 2.0 licence. To comply with the licence you need to add the following notice at the top every file that uses part of Outlines' code: 8 | 9 | ``` 10 | Copyright 2023- The Outlines developers 11 | 12 | Licensed under the Apache License, Version 2.0 (the "License"); 13 | you may not use this file except in compliance with the License. 14 | You may obtain a copy of the License at 15 | 16 | http://www.apache.org/licenses/LICENSE-2.0 17 | 18 | Unless required by applicable law or agreed to in writing, software 19 | distributed under the License is distributed on an "AS IS" BASIS, 20 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 21 | See the License for the specific language governing permissions and 22 | limitations under the License. 23 | ``` 24 | 25 | If you use Outlines in your work you can use the following citation: 26 | 27 | ``` 28 | @article{willard2023efficient, 29 | title={Efficient Guided Generation for LLMs}, 30 | author={Willard, Brandon T and Louf, R{\'e}mi}, 31 | journal={arXiv preprint arXiv:2307.09702}, 32 | year={2023} 33 | } 34 | ``` 35 | -------------------------------------------------------------------------------- /docs/logos/amazon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/logos/amazon.png -------------------------------------------------------------------------------- /docs/logos/apple.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/logos/apple.png -------------------------------------------------------------------------------- /docs/logos/best_buy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/logos/best_buy.png -------------------------------------------------------------------------------- /docs/logos/canoe.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/logos/canoe.png -------------------------------------------------------------------------------- /docs/logos/cisco.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/logos/cisco.png -------------------------------------------------------------------------------- /docs/logos/dassault_systems.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/logos/dassault_systems.png -------------------------------------------------------------------------------- /docs/logos/databricks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/logos/databricks.png -------------------------------------------------------------------------------- /docs/logos/datadog.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/logos/datadog.png -------------------------------------------------------------------------------- /docs/logos/dbt_labs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/logos/dbt_labs.png -------------------------------------------------------------------------------- /docs/logos/gladia.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/logos/gladia.jpg -------------------------------------------------------------------------------- /docs/logos/harvard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/logos/harvard.png -------------------------------------------------------------------------------- /docs/logos/hf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/logos/hf.png -------------------------------------------------------------------------------- /docs/logos/johns_hopkins.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/logos/johns_hopkins.png -------------------------------------------------------------------------------- /docs/logos/meta.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/logos/meta.png -------------------------------------------------------------------------------- /docs/logos/mit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/logos/mit.png -------------------------------------------------------------------------------- /docs/logos/mount_sinai.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/logos/mount_sinai.png -------------------------------------------------------------------------------- /docs/logos/nvidia.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/logos/nvidia.png -------------------------------------------------------------------------------- /docs/logos/nyu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/logos/nyu.png -------------------------------------------------------------------------------- /docs/logos/safran.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/logos/safran.png -------------------------------------------------------------------------------- /docs/logos/salesforce.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/logos/salesforce.png -------------------------------------------------------------------------------- /docs/logos/shopify.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/logos/shopify.png -------------------------------------------------------------------------------- /docs/logos/smithsonian.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/logos/smithsonian.png -------------------------------------------------------------------------------- /docs/logos/tinder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/logos/tinder.png -------------------------------------------------------------------------------- /docs/logos/upenn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/logos/upenn.png -------------------------------------------------------------------------------- /docs/overrides/main.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | -------------------------------------------------------------------------------- /docs/reference/chat_templating.md: -------------------------------------------------------------------------------- 1 | # Chat templating 2 | 3 | Instruction-tuned language models use "special tokens" to indicate different parts of text, such as the system prompt, the user prompt, any images, and the assistant's response. A [chat template](https://huggingface.co/docs/transformers/main/en/chat_templating) is how different types of input are composited together into a single, machine-readable string. 4 | 5 | Outlines does not manage chat templating tokens when using instruct models. You must apply the chat template tokens to the prompt yourself -- if you do not apply chat templating on instruction-tuned models, you will often get nonsensical output from the model. 6 | 7 | Chat template tokens are not needed for base models. 8 | 9 | You can find the chat template tokens in the model's HuggingFace repo or documentation. As an example, the `SmolLM2-360M-Instruct` special tokens can be found [here](https://huggingface.co/HuggingFaceTB/SmolLM2-360M-Instruct/blob/main/special_tokens_map.json). 10 | 11 | However, it can be slow to manually look up a model's special tokens, and special tokens vary by models. If you change the model, your prompts may break if you have hard-coded special tokens. 12 | 13 | If you need a convenient tool to apply chat templating for you, you should use the `tokenizer` from the `transformers` library: 14 | 15 | ```python 16 | from transformers import AutoTokenizer 17 | 18 | tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/SmolLM2-360M-Instruct") 19 | prompt = tokenizer.apply_chat_template( 20 | [ 21 | {"role": "system", "content": "You extract information from text."}, 22 | {"role": "user", "content": "What food does the following text describe?"}, 23 | ], 24 | tokenize=False, 25 | add_bos=True, 26 | add_generation_prompt=True, 27 | ) 28 | ``` 29 | 30 | yields 31 | 32 | ``` 33 | <|im_start|>system 34 | You extract information from text.<|im_end|> 35 | <|im_start|>user 36 | What food does the following text describe?<|im_end|> 37 | <|im_start|>assistant 38 | ``` 39 | -------------------------------------------------------------------------------- /docs/reference/generation/choices.md: -------------------------------------------------------------------------------- 1 | # Multiple choices 2 | 3 | Oultines allows you to make sure the generated text is chosen between different options: 4 | 5 | ```python 6 | from outlines import models, generate 7 | 8 | model = models.transformers("microsoft/Phi-3-mini-4k-instruct") 9 | generator = generate.choice(model, ["skirt", "dress", "pen", "jacket"]) 10 | answer = generator("Pick the odd word out: skirt, dress, pen, jacket") 11 | 12 | ``` 13 | 14 | !!! Note "Performance" 15 | 16 | `generation.choice` computes an index that helps Outlines guide generation. This can take some time, but only needs to be done once. If you want to generate from the same list of choices several times make sure that you only call `generate.choice` once. 17 | -------------------------------------------------------------------------------- /docs/reference/generation/custom_fsm_ops.md: -------------------------------------------------------------------------------- 1 | # Custom FSM Operations 2 | 3 | Outlines is fast because it compiles regular expressions into an index ahead of inference. To do so we use the equivalence between regular expressions and Finite State Machines (FSMs), and the library [interegular](https://github.com/MegaIng/interegular) to perform the translation. 4 | 5 | Alternatively, one can pass a FSM built using `integular` directly to structure the generation. 6 | 7 | ## Example 8 | 9 | ### Using the `difference` operation 10 | 11 | In the following example we build a fsm which recognizes only the strings valid to the first regular expression but not the second. In particular, it will prevent the words "pink" and "elephant" from being generated: 12 | 13 | ```python 14 | import interegular 15 | from outlines import models, generate 16 | 17 | 18 | list_of_strings_pattern = """\["[^"\s]*"(?:,"[^"\s]*")*\]""" 19 | pink_elephant_pattern = """.*(pink|elephant).*""" 20 | 21 | list_of_strings_fsm = interegular.parse_pattern(list_of_strings_pattern).to_fsm() 22 | pink_elephant_fsm = interegular.parse_pattern(pink_elephant_pattern).to_fsm() 23 | 24 | difference_fsm = list_of_strings_fsm - pink_elephant_fsm 25 | 26 | difference_fsm_fsm.accepts('["a","pink","elephant"]') 27 | # False 28 | difference_fsm_fsm.accepts('["a","blue","donkey"]') 29 | # True 30 | 31 | 32 | model = models.transformers("microsoft/Phi-3-mini-4k-instruct") 33 | generator = generate.fsm(model, difference_fsm) 34 | response = generator("Don't talk about pink elephants") 35 | ``` 36 | 37 | To see the other operations available, consult [interegular's documentation](https://github.com/MegaIng/interegular/blob/master/interegular/fsm.py). 38 | -------------------------------------------------------------------------------- /docs/reference/generation/format.md: -------------------------------------------------------------------------------- 1 | # Type constraints 2 | 3 | We can ask completions to be restricted to valid python types: 4 | 5 | ```python 6 | from outlines import models, generate 7 | 8 | model = models.transformers("microsoft/Phi-3-mini-4k-instruct") 9 | generator = generate.format(model, int) 10 | answer = generator("When I was 6 my sister was half my age. Now I’m 70 how old is my sister?") 11 | print(answer) 12 | # 67 13 | ``` 14 | 15 | The following types are currently available: 16 | 17 | - int 18 | - float 19 | - bool 20 | - datetime.date 21 | - datetime.time 22 | - datetime.datetime 23 | - We also provide [custom types](types.md) 24 | -------------------------------------------------------------------------------- /docs/reference/generation/regex.md: -------------------------------------------------------------------------------- 1 | # Regular expressions 2 | 3 | Outlines can guarantee that the text generated by the LLM will be valid to a regular expression: 4 | 5 | ```python 6 | from outlines import models, generate 7 | 8 | model = models.transformers("microsoft/Phi-3-mini-4k-instruct") 9 | 10 | generator = generate.regex( 11 | model, 12 | r"((25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(25[0-5]|2[0-4]\d|[01]?\d\d?)", 13 | ) 14 | 15 | prompt = "What is the IP address of the Google DNS servers? " 16 | answer = generator(prompt, max_tokens=30) 17 | 18 | print(answer) 19 | # What is the IP address of the Google DNS servers? 20 | # 2.2.6.1 21 | ``` 22 | 23 | If you find yourself using `generate.regex` to restrict the answers' type you can take a look at [type-structured generation](types.md) instead. 24 | 25 | !!! Note "Performance" 26 | 27 | `generate.regex` computes an index that helps Outlines guide generation. This can take some time, but only needs to be done once. If you want to generate several times using the same regular expression make sure that you only call `generate.regex` once. 28 | -------------------------------------------------------------------------------- /docs/reference/generation/structured_generation_explanation.md: -------------------------------------------------------------------------------- 1 | # How does Outlines work? 2 | 3 | 4 | Language models generate text token by token, using the previous token sequence as input and sampled logits as output. This document explains the structured generation process, where only legal tokens are considered for the next step based on a predefined automata, e.g. a regex-defined [finite-state machine](https://en.wikipedia.org/wiki/Finite-state_machine) (FSM) or [Lark](https://lark-parser.readthedocs.io/en/stable/) grammar.` 5 | 6 | 7 | ## Worked Example 8 | 9 | Let's consider a worked example with a pattern for whole and decimal numbers: 10 | 11 | `^\d*(\.\d+)?$`. 12 | 13 | ### Creating Automata 14 | 15 | The pattern is first converted into an automata. Below is a brief explanation of the automata conversion and its representation. 16 | 17 | **Automata Diagram:** 18 | 19 | ```mermaid 20 | graph LR 21 | node0("1-9") --> node1("1-9") 22 | node1 --> node1 23 | node1 --> nodeEND{{END}} 24 | node1 --> nodePeriod(".") 25 | nodePeriod --> node2("1-9") 26 | node2 --> node2 27 | node2 --> nodeEND{{END}} 28 | ``` 29 | 30 | ### Generating a Token 31 | 32 | Let's assume that we're in the middle of generation, and so far "748" has been generated. Here is the automata with the current state highlighted in green, with the legal next characters being another number (1-9), a dot (.), or end of sequence. 33 | 34 | ```mermaid 35 | graph LR 36 | node0("1-9") --> node1("1-9") 37 | node1 --> node1 38 | node1 --> nodeEND{{END}} 39 | node1 --> nodePeriod(".") 40 | nodePeriod --> node2("1-9") 41 | node2 --> node2 42 | node2 --> nodeEND{{END}} 43 | 44 | style node1 fill:#090 45 | ``` 46 | 47 | Generating a token requires the following steps: 48 | 49 | - Feed the previous input sequence ("748") into the language model. 50 | - Language model runs a forward pass and produces token logits. 51 | - Outlines logits processor sets the probability of illegal tokens to 0%. 52 | - A token is sampled from the set of legal tokens. 53 | 54 | ![Generation and Logits Processing Flow Chart](../../assets/images/logits_processing_diagram.svg) 55 | -------------------------------------------------------------------------------- /docs/reference/generation/types.md: -------------------------------------------------------------------------------- 1 | # Custom types 2 | 3 | Outlines provides custom Pydantic types so you can focus on your use case rather than on writing regular expressions: 4 | 5 | | Category | Type | Import | Description | 6 | |:--------:|:----:|:-------|:------------| 7 | | ISBN | 10 & 13 | `outlines.types.ISBN` | There is no guarantee that the [check digit][wiki-isbn] will be correct | 8 | | Airport | IATA | `outlines.types.airports.IATA` | Valid [airport IATA codes][wiki-airport-iata] | 9 | | Country | alpha-2 code | `outlines.types.airports.Alpha2` | Valid [country alpha-2 codes][wiki-country-alpha-2] | 10 | | | alpha-3 code | `outlines.types.countries.Alpha3` | Valid [country alpha-3 codes][wiki-country-alpha-3] | 11 | | | numeric code | `outlines.types.countries.Numeric` | Valid [country numeric codes][wiki-country-numeric] | 12 | | | name | `outlines.types.countries.Name` | Valid country names | 13 | | | flag | `outlines.types.countries.Flag` | Valid flag emojis | 14 | | | email | `outlines.types.Email` | Valid email address | 15 | 16 | Some types require localization. We currently only support US types, but please don't hesitate to create localized versions of the different types and open a Pull Request. Localized types are specified using `types.locale` in the following way: 17 | 18 | ```python 19 | from outlines import types 20 | 21 | types.locale("us").ZipCode 22 | types.locale("us").PhoneNumber 23 | ``` 24 | 25 | Here are the localized types that are currently available: 26 | 27 | | Category | Locale | Import | Description | 28 | |:--------:|:----:|:-------|:------------| 29 | | Zip code | US | `ZipCode` | Generate US Zip(+4) codes | 30 | | Phone number | US | `PhoneNumber` | Generate valid US phone numbers | 31 | 32 | 33 | You can use these types in Pydantic schemas for JSON-structured generation: 34 | 35 | ```python 36 | from pydantic import BaseModel 37 | 38 | from outlines import models, generate, types 39 | 40 | # Specify the locale for types 41 | locale = types.locale("us") 42 | 43 | class Client(BaseModel): 44 | name: str 45 | phone_number: locale.PhoneNumber 46 | zip_code: locale.ZipCode 47 | 48 | 49 | model = models.transformers("microsoft/Phi-3-mini-4k-instruct") 50 | generator = generate.json(model, Client) 51 | result = generator( 52 | "Create a client profile with the fields name, phone_number and zip_code" 53 | ) 54 | print(result) 55 | # name='Tommy' phone_number='129-896-5501' zip_code='50766' 56 | ``` 57 | 58 | Or simply with `outlines.generate.format`: 59 | 60 | ```python 61 | from pydantic import BaseModel 62 | 63 | from outlines import models, generate, types 64 | 65 | 66 | model = models.transformers("microsoft/Phi-3-mini-4k-instruct") 67 | generator = generate.format(model, types.locale("us").PhoneNumber) 68 | result = generator( 69 | "Return a US Phone number: " 70 | ) 71 | print(result) 72 | # 334-253-2630 73 | ``` 74 | 75 | 76 | We plan on adding many more custom types. If you have found yourself writing regular expressions to generate fields of a given type, or if you could benefit from more specific types don't hesite to [submit a PR](https://github.com/dottxt-ai/outlines/pulls) or [open an issue](https://github.com/dottxt-ai/outlines/issues/new/choose). 77 | 78 | 79 | [wiki-isbn]: https://en.wikipedia.org/wiki/ISBN#Check_digits 80 | [wiki-airport-iata]: https://en.wikipedia.org/wiki/IATA_airport_code 81 | [wiki-country-alpha-2]: https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2 82 | [wiki-country-alpha-3]: https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3 83 | [wiki-country-numeric]: https://en.wikipedia.org/wiki/ISO_3166-1_numeric 84 | -------------------------------------------------------------------------------- /docs/reference/index.md: -------------------------------------------------------------------------------- 1 | # Reference 2 | 3 | ## Structured generation 4 | 5 | While LLM capabilities are increasingly impressive, we can make their output more reliable by steering the generation. Outlines thus offers mechanisms to specify high level constraints on text completions by generative language models. 6 | 7 | Stopping sequence 8 | By default, language models stop generating tokens after and token was generated, or after a set maximum number of tokens. Their output can be verbose, and for practical purposes it is often necessary to stop the generation after a given sequence has been found instead. You can use the stop_at keyword argument when calling the model with a prompt: 9 | 10 | ```python 11 | import outlines.models as models 12 | 13 | complete = models.openai("gpt-4o-mini") 14 | expert = complete("Name an expert in quantum gravity.", stop_at=["\n", "."]) 15 | ``` 16 | -------------------------------------------------------------------------------- /docs/reference/models/exllamav2.md: -------------------------------------------------------------------------------- 1 | # ExllamaV2 2 | 3 | The `outlines.models.exllamav2` model requires a Logits Processor component for compatibility with Outlines structured generation. While ExLlamaV2 doesn't natively support this feature, a third-party fork provides the necessary functionality. You can install it with the following command: 4 | 5 | ```bash 6 | pip install git+https://github.com/lapp0/exllamav2@sampler-logits-processor 7 | ``` 8 | 9 | Install other requirements: 10 | 11 | ```bash 12 | pip install transformers torch 13 | ``` 14 | 15 | *Coming soon* 16 | -------------------------------------------------------------------------------- /docs/reference/models/mlxlm.md: -------------------------------------------------------------------------------- 1 | # mlx-lm 2 | 3 | Outlines provides an integration with [mlx-lm](https://github.com/ml-explore/mlx-examples/tree/main/llms), allowing models to be run quickly on Apple Silicon via the [mlx](https://ml-explore.github.io/mlx/build/html/index.html) library. 4 | 5 | !!! Note "Installation" 6 | 7 | You need to install the `mlx` and `mlx-lm` libraries on a device which [supports Metal](https://support.apple.com/en-us/102894) to use the mlx-lm integration. To get started quickly you can also run: 8 | 9 | ```bash 10 | pip install "outlines[mlxlm]" 11 | ``` 12 | 13 | 14 | ## Load the model 15 | 16 | You can initialize the model by passing the name of the repository on the HuggingFace Hub. The official repository for mlx-lm supported models is [mlx-community](https://huggingface.co/mlx-community). 17 | 18 | ```python 19 | from outlines import models 20 | 21 | model = models.mlxlm("mlx-community/Meta-Llama-3.1-8B-Instruct-8bit") 22 | ``` 23 | 24 | This will download the model files to the hub cache folder and load the weights in memory. 25 | 26 | The arguments `model_config` and `tokenizer_config` are available to modify loading behavior. For example, per the `mlx-lm` [documentation](https://github.com/ml-explore/mlx-examples/tree/main/llms#supported-models), you must set an eos_token for `qwen/Qwen-7B`. In outlines you may do so via 27 | 28 | ``` 29 | model = models.mlxlm( 30 | "mlx-community/Meta-Llama-3.1-8B-Instruct-8bit", 31 | tokenizer_config={"eos_token": "<|endoftext|>", "trust_remote_code": True}, 32 | ) 33 | ``` 34 | 35 | **Main parameters:** 36 | 37 | (Subject to change. Table based on [mlx-lm.load docstring](https://github.com/ml-explore/mlx-examples/blob/main/llms/mlx_lm/utils.py#L429)) 38 | 39 | | Parameters | Type | Description | Default | 40 | |--------------------|--------|--------------------------------------------------------------------------------------------------|---------| 41 | | `tokenizer_config` | `dict` | Configuration parameters specifically for the tokenizer. Defaults to an empty dictionary. | `{}` | 42 | | `model_config` | `dict` | Configuration parameters specifically for the model. Defaults to an empty dictionary. | `{}` | 43 | | `adapter_path` | `str` | Path to the LoRA adapters. If provided, applies LoRA layers to the model. | `None` | 44 | | `lazy` | `bool` | If False, evaluate the model parameters to make sure they are loaded in memory before returning. | `False` | 45 | 46 | 47 | ## Generate text 48 | 49 | You may generate text using the parameters described in the [text generation documentation](../text.md). 50 | 51 | With the loaded model, you can generate text or perform structured generation, e.g. 52 | 53 | ```python 54 | from outlines import models, generate 55 | 56 | model = models.mlxlm("mlx-community/Meta-Llama-3.1-8B-Instruct-8bit") 57 | generator = generate.text(model) 58 | 59 | answer = generator("A prompt", temperature=2.0) 60 | ``` 61 | 62 | ## Streaming 63 | 64 | You may creating a streaming iterable with minimal changes 65 | 66 | ```python 67 | from outlines import models, generate 68 | 69 | model = models.mlxlm("mlx-community/Meta-Llama-3.1-8B-Instruct-8bit") 70 | generator = generate.text(model) 71 | 72 | for token_str in generator.text("A prompt", temperature=2.0): 73 | print(token_str) 74 | ``` 75 | 76 | ## Structured 77 | 78 | You may perform structured generation with mlxlm to guarantee your output will match a regex pattern, json schema, or lark grammar. 79 | 80 | Example: Phone number generation with pattern `"\\+?[1-9][0-9]{7,14}"`: 81 | 82 | ```python 83 | from outlines import models, generate 84 | 85 | model = models.mlxlm("mlx-community/Meta-Llama-3.1-8B-Instruct-8bit") 86 | 87 | phone_number_pattern = "\\+?[1-9][0-9]{7,14}" 88 | generator = generate.regex(model, phone_number_pattern) 89 | 90 | model_output = generator("What's Jennys Number?\n") 91 | print(model_output) 92 | # '8675309' 93 | ``` 94 | -------------------------------------------------------------------------------- /docs/reference/models/models.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Models 3 | --- 4 | 5 | # Models 6 | 7 | Outlines supports generation using a number of inference engines (`outlines.models`). Loading a model using outlines follows a similar interface between inference engines: 8 | 9 | ```python 10 | import outlines 11 | 12 | model = outlines.models.transformers("microsoft/Phi-3-mini-128k-instruct") 13 | model = outlines.models.transformers_vision("llava-hf/llava-v1.6-mistral-7b-hf") 14 | model = outlines.models.vllm("microsoft/Phi-3-mini-128k-instruct") 15 | model = outlines.models.llamacpp( 16 | "microsoft/Phi-3-mini-4k-instruct-gguf", "Phi-3-mini-4k-instruct-q4.gguf" 17 | ) 18 | model = outlines.models.exllamav2("bartowski/Phi-3-mini-128k-instruct-exl2") 19 | model = outlines.models.mlxlm("mlx-community/Phi-3-mini-4k-instruct-4bit") 20 | 21 | model = outlines.models.openai( 22 | "gpt-4o-mini", 23 | api_key=os.environ["OPENAI_API_KEY"] 24 | ) 25 | ``` 26 | 27 | 28 | # Feature Matrix 29 | | | [Transformers](transformers.md) | [Transformers Vision](transformers_vision.md) | [vLLM](vllm.md) | [llama.cpp](llamacpp.md) | [ExLlamaV2](exllamav2.md) | [MLXLM](mlxlm.md) | [OpenAI](openai.md)* | 30 | |-------------------|--------------|---------------------|------|-----------|-----------|-------|---------| 31 | | **Device** | | | | | | | | 32 | | Cuda | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | N/A | 33 | | Apple Silicon | ✅ | ✅ | ❌ | ✅ | ✅ | ✅ | N/A | 34 | | x86 / AMD64 | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | N/A | 35 | | **Sampling** | | | | | | | | 36 | | Greedy | ✅ | ✅ | ✅ | ✅* | ✅ | ✅ | ❌ | 37 | | Multinomial | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | 38 | | Multiple Samples | ✅ | ✅ | | ❌ | | ❌ | ✅ | 39 | | Beam Search | ✅ | ✅ | ✅ | ❌ | ✅ | ❌ | ❌ | 40 | | **Generation** | | | | | | | | 41 | | Batch | ✅ | ✅ | ✅ | ❌ | ? | ❌ | ❌ | 42 | | Stream | ✅ | ❌ | ❌ | ✅ | ? | ✅ | ❌ | 43 | | Text | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | 44 | | **Structured** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | 45 | | JSON Schema | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | 46 | | Choice | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | 47 | | Regex | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | 48 | | Grammar | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | 49 | 50 | 51 | ## Caveats 52 | 53 | - OpenAI doesn't support structured generation due to limitations in their API and server implementation. 54 | - `outlines.generate` ["Structured"](../generation/generation.md) includes methods such as `outlines.generate.regex`, `outlines.generate.json`, `outlines.generate.cfg`, etc. 55 | - MLXLM only supports Apple Silicon. 56 | - llama.cpp greedy sampling available via multinomial with `temperature = 0.0`. 57 | -------------------------------------------------------------------------------- /docs/reference/models/tgi.md: -------------------------------------------------------------------------------- 1 | # Text-generation-inference (TGI) 2 | 3 | TGI uses Outlines to provide structured generation, see [their documentation](https://huggingface.co/docs/text-generation-inference/en/basic_tutorials/using_guidance). 4 | -------------------------------------------------------------------------------- /docs/reference/models/transformers_vision.md: -------------------------------------------------------------------------------- 1 | # Transformers Vision 2 | 3 | Outlines allows seamless use of [vision models](https://huggingface.co/learn/computer-vision-course/en/unit4/multimodal-models/tasks-models-part1). 4 | 5 | `outlines.models.transformers_vision` shares interfaces with, and is based on [outlines.models.transformers](./transformers.md). 6 | 7 | Tasks supported include 8 | 9 | - image + text -> text 10 | - video + text -> text 11 | 12 | 13 | 14 | ## Example: Using [Llava-Next](https://huggingface.co/docs/transformers/en/model_doc/llava_next) Vision Models 15 | 16 | Install dependencies 17 | `pip install torchvision pillow flash-attn` 18 | 19 | Create the model 20 | ```python 21 | import outlines 22 | from transformers import LlavaNextForConditionalGeneration 23 | 24 | model = outlines.models.transformers_vision( 25 | "llava-hf/llava-v1.6-mistral-7b-hf", 26 | model_class=LlavaNextForConditionalGeneration, 27 | device="cuda", 28 | ) 29 | ``` 30 | 31 | Create convenience function to load a `PIL.Image` from URL 32 | ```python 33 | from PIL import Image 34 | from io import BytesIO 35 | from urllib.request import urlopen 36 | 37 | def img_from_url(url): 38 | img_byte_stream = BytesIO(urlopen(url).read()) 39 | return Image.open(img_byte_stream).convert("RGB") 40 | ``` 41 | 42 | ### Describing an image 43 | 44 | ```python 45 | description_generator = outlines.generate.text(model) 46 | description_generator( 47 | " detailed description:", 48 | [img_from_url("https://upload.wikimedia.org/wikipedia/commons/2/25/Siam_lilacpoint.jpg")] 49 | ) 50 | ``` 51 | 52 | > This is a color photograph featuring a Siamese cat with striking blue eyes. The cat has a creamy coat and a light eye color, which is typical for the Siamese breed. Its features include elongated ears, a long, thin tail, and a striking coat pattern. The cat is sitting in an indoor setting, possibly on a cat tower or a similar raised platform, which is covered with a beige fabric, providing a comfortable and soft surface for the cat to rest or perch. The surface of the wall behind the cat appears to be a light-colored stucco or plaster. 53 | 54 | #### Multiple Images 55 | 56 | To include multiple images in your prompt you simply add more `` tokens to the prompt 57 | 58 | ```python 59 | image_urls = [ 60 | "https://cdn1.byjus.com/wp-content/uploads/2020/08/ShapeArtboard-1-copy-3.png", # triangle 61 | "https://cdn1.byjus.com/wp-content/uploads/2020/08/ShapeArtboard-1-copy-11.png", # hexagon 62 | ] 63 | description_generator = outlines.generate.text(model) 64 | description_generator( 65 | "What shapes are present?", 66 | list(map(img_from_url, image_urls)), 67 | ) 68 | ``` 69 | 70 | > There are two shapes present. One shape is a hexagon and the other shape is an triangle. ' 71 | 72 | 73 | ### Classifying an Image 74 | 75 | ```python 76 | pattern = "Mercury|Venus|Earth|Mars|Saturn|Jupiter|Neptune|Uranus|Pluto" 77 | planet_generator = outlines.generate.regex(model, pattern) 78 | 79 | planet_generator( 80 | "What planet is this: ", 81 | [img_from_url("https://upload.wikimedia.org/wikipedia/commons/e/e3/Saturn_from_Cassini_Orbiter_%282004-10-06%29.jpg")] 82 | ) 83 | ``` 84 | 85 | > Saturn 86 | 87 | 88 | ### Extracting Structured Image data 89 | 90 | ```python 91 | from pydantic import BaseModel 92 | from typing import List, Optional 93 | 94 | class ImageData(BaseModel): 95 | caption: str 96 | tags_list: List[str] 97 | object_list: List[str] 98 | is_photo: bool 99 | 100 | image_data_generator = outlines.generate.json(model, ImageData) 101 | 102 | image_data_generator( 103 | " detailed JSON metadata:", 104 | [img_from_url("https://upload.wikimedia.org/wikipedia/commons/9/98/Aldrin_Apollo_11_original.jpg")] 105 | ) 106 | ``` 107 | 108 | > `ImageData(caption='An astronaut on the moon', tags_list=['moon', 'space', 'nasa', 'americanflag'], object_list=['moon', 'moon_surface', 'space_suit', 'americanflag'], is_photo=True)` 109 | 110 | 111 | ## Resources 112 | 113 | ### Choosing a model 114 | - https://mmbench.opencompass.org.cn/leaderboard 115 | - https://huggingface.co/spaces/WildVision/vision-arena 116 | -------------------------------------------------------------------------------- /docs/reference/serve/lmstudio.md: -------------------------------------------------------------------------------- 1 | # Serve with LM Studio 2 | 3 | !!! tip "Would rather not self-host?" 4 | 5 | If you want to get started quickly with JSON-structured generation you can call instead [.json](https://h1xbpbfsf0w.typeform.com/to/ZgBCvJHF), a [.txt](http://dottxt.co) API that guarantees valid JSON. 6 | 7 | [LM Studio](https://lmstudio.ai/) is an application that runs local LLMs. It flexibly mixes GPU and CPU compute in hardware-constrained environments. 8 | 9 | As of [LM Studio 0.3.4](https://lmstudio.ai/blog/lmstudio-v0.3.4), it natively supports Outlines for structured text generation, using an OpenAI-compatible endpoint. 10 | 11 | ## Setup 12 | 13 | 1. Install LM Studio by visiting their [downloads page](https://lmstudio.ai/download). 14 | 2. Enable the LM Studio [server functionality](https://lmstudio.ai/docs/basics/server). 15 | 3. Download [a model](https://lmstudio.ai/docs/basics#1-download-an-llm-to-your-computer). 16 | 4. Install Python dependencies. 17 | ```bash 18 | pip install pydantic openai 19 | ``` 20 | 21 | ## Calling the server 22 | 23 | By default, LM Studio will serve from `http://localhost:1234`. If you are serving on a different port or host, make sure to change the `base_url` argument in `OpenAI` to the relevant location. 24 | 25 | ```python 26 | class Testing(BaseModel): 27 | """ 28 | A class representing a testing schema. 29 | """ 30 | name: str 31 | age: int 32 | 33 | openai_client = openai.OpenAI( 34 | base_url="http://0.0.0.0:1234/v1", 35 | api_key="dopeness" 36 | ) 37 | 38 | # Make a request to the local LM Studio server 39 | response = openai_client.beta.chat.completions.parse( 40 | model="hugging-quants/Llama-3.2-1B-Instruct-Q8_0-GGUF", 41 | messages=[ 42 | {"role": "system", "content": "You are like so good at whatever you do."}, 43 | {"role": "user", "content": "My name is Cameron and I am 28 years old. What's my name and age?"} 44 | ], 45 | response_format=Testing 46 | ) 47 | ``` 48 | 49 | You should receive a `ParsedChatCompletion[Testing]` object back: 50 | 51 | ```python 52 | ParsedChatCompletion[Testing]( 53 | id='chatcmpl-3hykyf0fxus7jc90k6gwlw', 54 | choices=[ 55 | ParsedChoice[Testing]( 56 | finish_reason='stop', 57 | index=0, 58 | logprobs=None, 59 | message=ParsedChatCompletionMessage[Testing]( 60 | content='{ "age": 28, "name": "Cameron" }', 61 | refusal=None, 62 | role='assistant', 63 | function_call=None, 64 | tool_calls=[], 65 | parsed=Testing(name='Cameron', age=28) 66 | ) 67 | ) 68 | ], 69 | created=1728595622, 70 | model='lmstudio-community/Phi-3.1-mini-128k-instruct-GGUF/Phi-3.1-mini-128k-instruct-Q4_K_M.gguf', 71 | object='chat.completion', 72 | service_tier=None, 73 | system_fingerprint='lmstudio-community/Phi-3.1-mini-128k-instruct-GGUF/Phi-3.1-mini-128k-instruct- 74 | Q4_K_M.gguf', 75 | usage=CompletionUsage( 76 | completion_tokens=17, 77 | prompt_tokens=47, 78 | total_tokens=64, 79 | completion_tokens_details=None, 80 | prompt_tokens_details=None 81 | ) 82 | ) 83 | ``` 84 | 85 | You can retrieve your `Testing` object with 86 | 87 | ```python 88 | response.choices[0].message.parsed 89 | ``` 90 | -------------------------------------------------------------------------------- /docs/reference/serve/vllm.md: -------------------------------------------------------------------------------- 1 | # Serve with vLLM 2 | 3 | !!! tip "Would rather not self-host?" 4 | 5 | If you want to get started quickly with JSON-structured generation you can call instead [.json](https://h1xbpbfsf0w.typeform.com/to/ZgBCvJHF), a [.txt](http://dottxt.co) API that guarantees valid JSON. 6 | 7 | Outlines can be deployed as an LLM service using the vLLM inference engine and a FastAPI server. vLLM is not installed by default so will need to install Outlines with: 8 | 9 | ```bash 10 | pip install outlines[serve] 11 | ``` 12 | 13 | You can then start the server with: 14 | 15 | ```bash 16 | python -m outlines.serve.serve --model="microsoft/Phi-3-mini-4k-instruct" 17 | ``` 18 | 19 | This will by default start a server at `http://127.0.0.1:8000` (check what the console says, though). Without the `--model` argument set, the OPT-125M model is used. The `--model` argument allows you to specify any model of your choosing. 20 | 21 | To run inference on multiple GPUs you must pass the `--tensor-parallel-size` argument when initializing the server. For instance, to run inference on 2 GPUs: 22 | 23 | 24 | ```bash 25 | python -m outlines.serve.serve --model="microsoft/Phi-3-mini-4k-instruct" --tensor-parallel-size 2 26 | ``` 27 | 28 | 29 | ### Alternative Method: Via Docker 30 | 31 | You can install and run the server with Outlines' official Docker image using the command 32 | 33 | ```bash 34 | docker run -p 8000:8000 outlinesdev/outlines --model="microsoft/Phi-3-mini-4k-instruct" 35 | ``` 36 | 37 | ## Querying Endpoint 38 | 39 | You can then query the model in shell by passing a prompt and either 40 | 41 | 1. a [JSON Schema][jsonschema]{:target="_blank"} specification or 42 | 2. a [Regex][regex]{:target="_blank"} pattern 43 | 44 | with the `schema` or `regex` parameters, respectively, to the `/generate` endpoint. If both are specified, the schema will be used. If neither is specified, the generated text will be unconstrained. 45 | 46 | For example, to generate a string that matches the schema `{"type": "string"}` (any string): 47 | 48 | ```bash 49 | curl http://127.0.0.1:8000/generate \ 50 | -d '{ 51 | "prompt": "What is the capital of France?", 52 | "schema": {"type": "string", "maxLength": 5} 53 | }' 54 | ``` 55 | 56 | To generate a string that matches the regex `(-)?(0|[1-9][0-9]*)(\.[0-9]+)?([eE][+-][0-9]+)?` (a number): 57 | 58 | ```bash 59 | curl http://127.0.0.1:8000/generate \ 60 | -d '{ 61 | "prompt": "What is Pi? Give me the first 15 digits: ", 62 | "regex": "(-)?(0|[1-9][0-9]*)(\\.[0-9]+)?([eE][+-][0-9]+)?" 63 | }' 64 | ``` 65 | 66 | Instead of `curl`, you can also use the [requests][requests]{:target="_blank"} library from another python program. 67 | 68 | Please consult the [vLLM documentation][vllm]{:target="_blank"} for details on additional request parameters. You can also [read the code](https://github.com/dottxt-ai/outlines/blob/main/outlines/serve/serve.py) in case you need to customize the solution to your needs. 69 | 70 | [requests]: https://requests.readthedocs.io/en/latest/ 71 | [vllm]: https://docs.vllm.ai/en/latest/index.html 72 | [jsonschema]: https://json-schema.org/learn/getting-started-step-by-step 73 | [regex]: https://www.regular-expressions.info/tutorial.html 74 | -------------------------------------------------------------------------------- /docs/reference/text.md: -------------------------------------------------------------------------------- 1 | # Text generation 2 | 3 | Outlines provides a unified interface to generate text with many language models, API-based and local. The same pattern is used throughout the library: 4 | 5 | 1. Instantiate a generator by calling `outlines.generate.text` with the model to be used. 6 | 2. Call the generator with the prompt and (optionally) some generation parameters. 7 | 8 | 9 | ```python 10 | from outlines import models, generate 11 | 12 | model = models.openai("gpt-4o-mini") 13 | generator = generate.text(model) 14 | answer = generator("What is 2+2?") 15 | 16 | model = models.transformers("microsoft/Phi-3-mini-4k-instruct") 17 | generator = generate.text(model) 18 | answer = generator("What is 2+2?") 19 | ``` 20 | 21 | By default Outlines uses the multinomial sampler with `temperature=1`. See [this section](samplers.md) to learn how to use different samplers. 22 | 23 | ## Streaming 24 | 25 | Outlines allows you to stream the model's response by calling the `.stream` method of the generator with the prompt: 26 | 27 | 28 | ```python 29 | from outlines import models, generate 30 | 31 | model = models.transformers("microsoft/Phi-3-mini-4k-instruct") 32 | generator = generate.text(model) 33 | 34 | tokens = generator.stream("What is 2+2?") 35 | for token in tokens: 36 | print(token) 37 | ``` 38 | 39 | ## Parameters 40 | 41 | ### Limit the number of tokens generated 42 | 43 | To limit the number of tokens generated you can pass the `max_tokens` positional argument to the generator: 44 | 45 | ```python 46 | from outlines import models, generate 47 | 48 | model = models.transformers("microsoft/Phi-3-mini-4k-instruct") 49 | generator = generate.text(model) 50 | 51 | answer = generator("What is 2+2?", 5) 52 | answer = generator("What is 2+2?", max_tokens=5) 53 | ``` 54 | 55 | ### Stop after a given string is generated 56 | 57 | You can also ask the model to stop generating text after a given string has been generated, for instance a period or a line break. You can pass a string or a line of string for the `stop_at` argument: 58 | 59 | 60 | ```python 61 | from outlines import models, generate 62 | 63 | model = models.transformers("microsoft/Phi-3-mini-4k-instruct") 64 | generator = generate.text(model) 65 | 66 | answer = generator("What is 2+2?", stop_at=".") 67 | answer = generator("What is 2+2?", stop_at=[".", "\n"]) 68 | ``` 69 | 70 | *The stopping string will be included in the response.* 71 | 72 | 73 | ### Seed the generation 74 | 75 | It can be useful to seed the generation in order to get reproducible results: 76 | 77 | ```python 78 | import torch 79 | from outlines import models, generate 80 | 81 | model = models.transformers("microsoft/Phi-3-mini-4k-instruct") 82 | 83 | seed = 789001 84 | 85 | answer = generator("What is 2+2?", seed=seed) 86 | ``` 87 | -------------------------------------------------------------------------------- /docs/stylesheets/extra.css: -------------------------------------------------------------------------------- 1 | @font-face { 2 | font-family: 'Source Code Pro Custom', monospace; 3 | src: url(https://fonts.googleapis.com/css2?family=Source+Code+Pro:ital,wght@0,200..900;1,200..900&display=swap); 4 | } 5 | 6 | :root > * { 7 | --md-default-bg-color: #FFFFFF; 8 | --md-code-bg-color: #2E3440; 9 | --md-code-fg-color: #FFFFFF; 10 | --md-text-font-family: "Inter"; 11 | --md-code-font: "Source Code Pro Custom"; 12 | --md-typeset-a-color: #d16626; /*this is the brand color*/ 13 | 14 | /* don't inherit white fg color for mermaid diagrams from --md-code-fg-color */ 15 | --md-mermaid-label-fg-color: #000000; 16 | --md-mermaid-edge-color: #000000; 17 | } 18 | 19 | .index-pre-code { 20 | max-width: 700px; 21 | left: 50%; 22 | } 23 | 24 | .index-pre-code pre>code { 25 | text-align: left; 26 | } 27 | 28 | .md-clipboard::after { 29 | color: #FFFFFF; 30 | transition: color 0.3s ease-in-out; 31 | } 32 | 33 | .md-clipboard:hover::after { 34 | color: #D8DEE9; 35 | } 36 | 37 | .md-source-file { 38 | text-align: center; 39 | padding: 24px 0; 40 | } 41 | 42 | .md-typeset pre>code { 43 | border-radius: .2rem; 44 | box-shadow: 10px 5px 5px #D8DEE9; 45 | } 46 | 47 | .md-typeset p > code { 48 | background: #ECEFF4; 49 | color: #000000; 50 | font-weight: 500; 51 | } 52 | 53 | .md-typeset strong > code { 54 | background: #ECEFF4; 55 | color: #000000; 56 | font-weight: 500; 57 | } 58 | 59 | .md-content p > code { 60 | background: #ECEFF4; 61 | color: #000000; 62 | font-weight: 500; 63 | } 64 | 65 | .md-typeset td > code { 66 | background: #ECEFF4; 67 | color: #000000; 68 | font-weight: 500; 69 | } 70 | 71 | .md-typeset li > code { 72 | background: #ECEFF4; 73 | color: #000000; 74 | font-weight: 500; 75 | } 76 | 77 | .md-typeset code { 78 | font-weight: 500; 79 | } 80 | 81 | .md-typeset pre { 82 | margin-left: .5rem; 83 | margin-right: .5rem; 84 | margin-top: 2rem; 85 | margin-bottom: 2rem; 86 | } 87 | 88 | .language-python { 89 | background: #FFFFFF ! important 90 | } 91 | 92 | .language-bash { 93 | background: #FFFFFF ! important 94 | } 95 | 96 | .language-toml { 97 | background: #FFFFFF ! important 98 | } 99 | 100 | .language-text { 101 | background: #FFFFFF ! important 102 | } 103 | 104 | .language-json { 105 | background: #FFFFFF ! important 106 | } 107 | 108 | h1.title { 109 | color: #FFFFFF; 110 | margin: 0px 0px 5px; 111 | } 112 | 113 | h2.subtitle { 114 | margin: 5px 0px 25px; 115 | font-size: 1rem; 116 | max-width: 540px; 117 | margin: 0 auto; 118 | } 119 | 120 | .md-typeset { 121 | line-height: 24px; 122 | font-weight: 400; 123 | } 124 | 125 | .md-typeset h1 { 126 | font-weight: bold; 127 | color: #000000; 128 | } 129 | 130 | .md-typeset h2 { 131 | font-weight: bold; 132 | color: #000000; 133 | } 134 | 135 | span.md-ellipsis { 136 | color: black; 137 | } 138 | 139 | .md-nav__link--active { 140 | background-color: #ECEFF4; 141 | } 142 | 143 | .md-typeset ol li { 144 | margin-bottom: .1rem; 145 | } 146 | 147 | .md-typeset ul li { 148 | margin-bottom: .1rem; 149 | } 150 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | # To use: 2 | # 3 | # $ conda env create -f environment.yml # `mamba` works too for this command 4 | # $ conda activate dottxt-ai 5 | # 6 | name: dottxt-ai 7 | channels: 8 | - conda-forge 9 | - huggingface 10 | dependencies: 11 | - python==3.10.0 12 | - jinja2 13 | - numpy 14 | - pydantic 15 | - scipy 16 | - pytest 17 | - pre-commit 18 | - referencing 19 | - jsonschema 20 | - transformers 21 | - pip 22 | - pip: 23 | - -e ".[test]" 24 | -------------------------------------------------------------------------------- /examples/babyagi.py: -------------------------------------------------------------------------------- 1 | """This example is a simplified translation of BabyAGI. 2 | 3 | It currently does not use the vector store retrieval 4 | 5 | The original repo can be found at https://github.com/yoheinakajima/babyagi 6 | """ 7 | 8 | from collections import deque 9 | from typing import Deque, List 10 | 11 | import outlines 12 | import outlines.models as models 13 | from outlines import Template 14 | 15 | 16 | model = models.openai("gpt-4o-mini") 17 | complete = outlines.generate.text(model) 18 | 19 | ## Load the prompts 20 | perform_task_ppt = Template.from_file("prompts/babyagi_perform_task.txt") 21 | create_tasks_ppt = Template.from_file("prompts/babyagi_create_task.txt") 22 | prioritize_tasks_ppt = Template.from_file("prompts/babyagi_prioritize_task.txt") 23 | 24 | 25 | def create_tasks_fmt(result: str) -> List[str]: 26 | new_tasks = result.split("\n") 27 | 28 | task_list = [] 29 | for task in new_tasks: 30 | parts = task.strip().split(".", 1) 31 | if len(parts) == 2: 32 | task_list.append(parts[1].strip()) 33 | 34 | return task_list 35 | 36 | 37 | def prioritize_tasks_fmt(result: str): 38 | new_tasks = result.split("\n") 39 | 40 | task_list: Deque = deque([]) 41 | for task in new_tasks: 42 | parts = task.strip().split(".", 1) 43 | if len(parts) == 2: 44 | task_id = int(parts[0].strip()) 45 | task_name = parts[1].strip() 46 | task_list.append({"task_id": task_id, "task_name": task_name}) 47 | 48 | return task_list 49 | 50 | 51 | objective = "Becoming rich while doing nothing." 52 | first_task = { 53 | "task_id": 1, 54 | "task_name": "Find a repeatable, low-maintainance, scalable business.", 55 | } 56 | next_task_id = 1 57 | task_list = deque([first_task]) 58 | 59 | 60 | def one_cycle(objective: str, task_list, next_task_id: int): 61 | """One BabyAGI cycle. 62 | 63 | It consists in executing the highest-priority task, creating some new tasks 64 | given the result, and re-priotizing the tasks. 65 | 66 | Parameters 67 | ---------- 68 | objective 69 | The overall objective of the session. 70 | task_list 71 | The current list of tasks to perform. 72 | task_id_counter 73 | The current task id. 74 | 75 | """ 76 | 77 | task = task_list.popleft() 78 | 79 | prompt = perform_task_ppt(objective, task) 80 | result = complete(prompt) 81 | 82 | prompt = create_tasks_ppt( 83 | objective, first_task["task_name"], result, [first_task["task_name"]] 84 | ) 85 | new_tasks = complete(prompt) 86 | 87 | new_tasks = create_tasks_fmt(new_tasks) 88 | 89 | for task in new_tasks: 90 | next_task_id += 1 91 | task_list.append({"task_id": next_task_id, "task_name": task}) 92 | 93 | prompt = prioritize_tasks_ppt( 94 | objective, [task["task_name"] for task in task_list], next_task_id 95 | ) 96 | prioritized_tasks = complete(prompt) 97 | 98 | prioritized_tasks = prioritize_tasks_fmt(prioritized_tasks) 99 | 100 | return task, result, prioritized_tasks, next_task_id 101 | 102 | 103 | # Let's run it for 5 cycles to see how it works without spending a fortune. 104 | for _ in range(5): 105 | print("\033[95m\033[1m" + "\n*****TASK LIST*****\n" + "\033[0m\033[0m") 106 | for t in task_list: 107 | print(" • " + str(t["task_name"])) 108 | 109 | task, result, task_list, next_task_id = one_cycle( 110 | objective, task_list, next_task_id 111 | ) 112 | 113 | print("\033[92m\033[1m" + "\n*****NEXT TASK*****\n" + "\033[0m\033[0m") 114 | print(task) 115 | print("\033[93m\033[1m" + "\n*****TASK RESULT*****\n" + "\033[0m\033[0m") 116 | print(result) 117 | -------------------------------------------------------------------------------- /examples/beam-cloud/README.md: -------------------------------------------------------------------------------- 1 | ## Deploy Outlines on Beam 2 | 3 | 1. Create an account [here](https://beam.cloud) and install the Beam SDK 4 | 2. Download the `app.py` file to your computer 5 | 3. Deploy it as a serverless API by running: `beam deploy app.py:predict` 6 | -------------------------------------------------------------------------------- /examples/beam-cloud/app.py: -------------------------------------------------------------------------------- 1 | from beam import Image, endpoint, env 2 | 3 | if env.is_remote(): 4 | import outlines 5 | 6 | 7 | # Pre-load models when the container first starts 8 | def load_models(): 9 | import outlines 10 | 11 | model = outlines.models.transformers("microsoft/Phi-3-mini-4k-instruct") 12 | return model 13 | 14 | 15 | @endpoint( 16 | name="outlines-serverless", 17 | gpu="A10G", 18 | cpu=1, 19 | memory="16Gi", 20 | on_start=load_models, 21 | image=Image().add_python_packages( 22 | ["outlines", "torch", "transformers", "accelerate"] 23 | ), 24 | ) 25 | def predict(context, **inputs): 26 | default_prompt = """You are a sentiment-labelling assistant. 27 | Is the following review positive or negative? 28 | 29 | Review: This restaurant is just awesome! 30 | """ 31 | 32 | prompt = inputs.get("prompt", default_prompt) 33 | 34 | # Unpack cached model from context 35 | model = context.on_start_value 36 | # Inference 37 | generator = outlines.generate.choice(model, ["Positive", "Negative"]) 38 | answer = generator(prompt) 39 | return {"answer": answer} 40 | -------------------------------------------------------------------------------- /examples/bentoml/.bentoignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | *.py[cod] 3 | *$py.class 4 | .ipynb_checkpoints 5 | venv/ 6 | -------------------------------------------------------------------------------- /examples/bentoml/bentofile.yaml: -------------------------------------------------------------------------------- 1 | service: "service:Outlines" 2 | labels: 3 | owner: bentoml-team 4 | stage: demo 5 | include: 6 | - "*.py" 7 | python: 8 | requirements_txt: "./requirements.txt" 9 | lock_packages: false 10 | -------------------------------------------------------------------------------- /examples/bentoml/import_model.py: -------------------------------------------------------------------------------- 1 | import bentoml 2 | 3 | MODEL_ID = "mistralai/Mistral-7B-v0.1" 4 | BENTO_MODEL_TAG = MODEL_ID.lower().replace("/", "--") 5 | 6 | 7 | def import_model(model_id, bento_model_tag): 8 | import torch 9 | from transformers import AutoModelForCausalLM, AutoTokenizer 10 | 11 | tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) 12 | model = AutoModelForCausalLM.from_pretrained( 13 | MODEL_ID, 14 | torch_dtype=torch.float16, 15 | low_cpu_mem_usage=True, 16 | ) 17 | 18 | with bentoml.models.create(bento_model_tag) as bento_model_ref: 19 | tokenizer.save_pretrained(bento_model_ref.path) 20 | model.save_pretrained(bento_model_ref.path) 21 | 22 | 23 | if __name__ == "__main__": 24 | import_model(MODEL_ID, BENTO_MODEL_TAG) 25 | -------------------------------------------------------------------------------- /examples/bentoml/requirements.txt: -------------------------------------------------------------------------------- 1 | bentoml>=1.2.11 2 | outlines==0.0.37 3 | transformers==4.38.2 4 | datasets==2.18.0 5 | accelerate==0.27.2 6 | -------------------------------------------------------------------------------- /examples/bentoml/service.py: -------------------------------------------------------------------------------- 1 | import typing as t 2 | 3 | import bentoml 4 | from import_model import BENTO_MODEL_TAG 5 | 6 | DEFAULT_SCHEMA = """{ 7 | "title": "Character", 8 | "type": "object", 9 | "properties": { 10 | "name": { 11 | "title": "Name", 12 | "maxLength": 10, 13 | "type": "string" 14 | }, 15 | "age": { 16 | "title": "Age", 17 | "type": "integer" 18 | }, 19 | "armor": {"$ref": "#/definitions/Armor"}, 20 | "weapon": {"$ref": "#/definitions/Weapon"}, 21 | "strength": { 22 | "title": "Strength", 23 | "type": "integer" 24 | } 25 | }, 26 | "required": ["name", "age", "armor", "weapon", "strength"], 27 | "definitions": { 28 | "Armor": { 29 | "title": "Armor", 30 | "description": "An enumeration.", 31 | "enum": ["leather", "chainmail", "plate"], 32 | "type": "string" 33 | }, 34 | "Weapon": { 35 | "title": "Weapon", 36 | "description": "An enumeration.", 37 | "enum": ["sword", "axe", "mace", "spear", "bow", "crossbow"], 38 | "type": "string" 39 | } 40 | } 41 | }""" 42 | 43 | 44 | @bentoml.service( 45 | traffic={ 46 | "timeout": 300, 47 | }, 48 | resources={ 49 | "gpu": 1, 50 | "gpu_type": "nvidia-l4", 51 | }, 52 | ) 53 | class Outlines: 54 | bento_model_ref = bentoml.models.get(BENTO_MODEL_TAG) 55 | 56 | def __init__(self) -> None: 57 | import torch 58 | 59 | import outlines 60 | 61 | self.model = outlines.models.transformers( 62 | self.bento_model_ref.path, 63 | device="cuda", 64 | model_kwargs={"torch_dtype": torch.float16}, 65 | ) 66 | 67 | @bentoml.api 68 | async def generate( 69 | self, 70 | prompt: str = "Give me a character description.", 71 | json_schema: t.Optional[str] = DEFAULT_SCHEMA, 72 | ) -> t.Dict[str, t.Any]: 73 | import outlines 74 | 75 | generator = outlines.generate.json(self.model, json_schema) 76 | character = generator(prompt) 77 | 78 | return character 79 | -------------------------------------------------------------------------------- /examples/cerebrium/cerebrium.toml: -------------------------------------------------------------------------------- 1 | [cerebrium.deployment] 2 | name = "cerebrium" 3 | python_version = "3.11" 4 | cuda_version = "12" 5 | include = "[./*, main.py, cerebrium.toml]" 6 | exclude = "[.*]" 7 | shell_commands = [] 8 | 9 | [cerebrium.hardware] 10 | cpu = 2 11 | memory = 14.0 12 | gpu = "AMPERE A10" 13 | gpu_count = 1 14 | provider = "aws" 15 | region = "us-east-1" 16 | 17 | [cerebrium.scaling] 18 | min_replicas = 0 19 | max_replicas = 5 20 | cooldown = 60 21 | 22 | [cerebrium.dependencies.pip] 23 | outline = "==0.0.37" 24 | transformers = "==4.38.2" 25 | datasets = "==2.18.0" 26 | accelerate = "==0.27.2" 27 | -------------------------------------------------------------------------------- /examples/cerebrium/main.py: -------------------------------------------------------------------------------- 1 | import outlines 2 | 3 | model = outlines.models.transformers("mistralai/Mistral-7B-Instruct-v0.2") 4 | 5 | schema = { 6 | "title": "Character", 7 | "type": "object", 8 | "properties": { 9 | "name": {"title": "Name", "maxLength": 10, "type": "string"}, 10 | "age": {"title": "Age", "type": "integer"}, 11 | "armor": {"$ref": "#/definitions/Armor"}, 12 | "weapon": {"$ref": "#/definitions/Weapon"}, 13 | "strength": {"title": "Strength", "type": "integer"}, 14 | }, 15 | "required": ["name", "age", "armor", "weapon", "strength"], 16 | "definitions": { 17 | "Armor": { 18 | "title": "Armor", 19 | "description": "An enumeration.", 20 | "enum": ["leather", "chainmail", "plate"], 21 | "type": "string", 22 | }, 23 | "Weapon": { 24 | "title": "Weapon", 25 | "description": "An enumeration.", 26 | "enum": ["sword", "axe", "mace", "spear", "bow", "crossbow"], 27 | "type": "string", 28 | }, 29 | }, 30 | } 31 | 32 | generator = outlines.generate.json(model, schema) 33 | 34 | 35 | def generate( 36 | prompt: str = "Amiri, a 53 year old warrior woman with a sword and leather armor.", 37 | ): 38 | character = generator( 39 | f"[INST]Give me a character description. Describe {prompt}.[/INST]" 40 | ) 41 | 42 | print(character) 43 | return character 44 | -------------------------------------------------------------------------------- /examples/cfg.py: -------------------------------------------------------------------------------- 1 | import outlines.generate as generate 2 | import outlines.models as models 3 | 4 | nlamb_grammar = r""" 5 | start: sentence 6 | 7 | sentence: noun verb noun -> simple 8 | | noun verb "like" noun -> comparative 9 | 10 | noun: adj? NOUN 11 | verb: VERB 12 | adj: ADJ 13 | 14 | NOUN: "flies" | "bananas" | "fruit" 15 | VERB: "like" | "flies" 16 | ADJ: "fruit" 17 | 18 | %import common.WS 19 | %ignore WS 20 | """ 21 | 22 | calc_grammar = r""" 23 | ?start: sum 24 | | NAME "=" sum -> assign_var 25 | 26 | ?sum: product 27 | | sum "+" product -> add 28 | | sum "-" product -> sub 29 | 30 | ?product: atom 31 | | product "*" atom -> mul 32 | | product "/" atom -> div 33 | 34 | ?atom: NUMBER -> number 35 | | "-" atom -> neg 36 | | NAME -> var 37 | | "(" sum ")" 38 | 39 | %import common.LETTER -> NAME 40 | %import common.INT -> NUMBER 41 | %import common.WS_INLINE 42 | 43 | %ignore WS_INLINE 44 | """ 45 | 46 | dyck_grammar = r""" 47 | start: s 48 | s: /a+/ 49 | | "(" s ")" 50 | | "{" s "}" 51 | | "[" s "]" 52 | """ 53 | 54 | json_grammar = r""" 55 | ?start: value 56 | 57 | ?value: object 58 | | array 59 | | string 60 | | SIGNED_NUMBER -> number 61 | | "true" -> true 62 | | "false" -> false 63 | | "null" -> null 64 | 65 | array : "[" [value ("," value)*] "]" 66 | object : "{" [pair ("," pair)*] "}" 67 | pair : string ":" value 68 | 69 | inner: /([^"]|\\\")+/ | 70 | string : "\"" inner "\"" 71 | 72 | %import common.SIGNED_NUMBER 73 | %import common.WS 74 | 75 | %ignore WS 76 | """ 77 | 78 | model = models.transformers("hf-internal-testing/tiny-random-gpt2") 79 | batch_size = 10 80 | for grammar in [nlamb_grammar, calc_grammar, dyck_grammar, json_grammar]: 81 | generator = generate.cfg(model, grammar, max_tokens=model.model.config.n_positions) 82 | sequences = generator([" "] * batch_size) 83 | for seq in sequences: 84 | try: 85 | parse = generator.fsm.parser.parse(seq) 86 | assert parse is not None 87 | print("SUCCESS", seq) 88 | except Exception: # will also fail if goes over max_tokens / context window 89 | print("FAILURE", seq) 90 | -------------------------------------------------------------------------------- /examples/llamacpp_example.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | from pydantic import BaseModel, constr 4 | 5 | import outlines 6 | 7 | 8 | class Weapon(str, Enum): 9 | sword = "sword" 10 | axe = "axe" 11 | mace = "mace" 12 | spear = "spear" 13 | bow = "bow" 14 | crossbow = "crossbow" 15 | 16 | 17 | class Armor(str, Enum): 18 | leather = "leather" 19 | chainmail = "chainmail" 20 | plate = "plate" 21 | 22 | 23 | class Character(BaseModel): 24 | name: constr(max_length=10) 25 | age: int 26 | armor: Armor 27 | weapon: Weapon 28 | strength: int 29 | 30 | 31 | if __name__ == "__main__": 32 | # curl -L -o mistral-7b-instruct-v0.2.Q5_K_M.gguf https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q5_K_M.gguf 33 | model = outlines.models.llamacpp("./mistral-7b-instruct-v0.2.Q5_K_M.gguf") 34 | 35 | # Construct structured sequence generator 36 | generator = outlines.generate.json(model, Character) 37 | 38 | # Draw a sample 39 | seed = 789005 40 | 41 | prompt = "Instruct: You are a leading role play gamer. You have seen thousands of different characters and their attributes.\nPlease return a JSON object with common attributes of an RPG character. Give me a character description\nOutput:" 42 | 43 | sequence = generator(prompt, seed=seed, max_tokens=512) 44 | print(sequence) 45 | -------------------------------------------------------------------------------- /examples/llamacpp_processor.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | from llama_cpp import Llama, LogitsProcessorList 4 | from pydantic import BaseModel, constr 5 | 6 | from outlines.generate.processors import JSONLogitsProcessor 7 | from outlines.models.llamacpp import LlamaCppTokenizer 8 | 9 | 10 | class Weapon(str, Enum): 11 | sword = "sword" 12 | axe = "axe" 13 | mace = "mace" 14 | spear = "spear" 15 | bow = "bow" 16 | crossbow = "crossbow" 17 | 18 | 19 | class Armor(str, Enum): 20 | leather = "leather" 21 | chainmail = "chainmail" 22 | plate = "plate" 23 | 24 | 25 | class Character(BaseModel): 26 | name: constr(max_length=10) 27 | age: int 28 | armor: Armor 29 | weapon: Weapon 30 | strength: int 31 | 32 | 33 | if __name__ == "__main__": 34 | llama = Llama("./phi-2.Q4_K_M.gguf") 35 | tokenizer = LlamaCppTokenizer(llama) 36 | 37 | prompt = "Instruct: You are a leading role play gamer. You have seen thousands of different characters and their attributes.\nPlease return a JSON object with common attributes of an RPG character. Give me a character description\nOutput:" 38 | 39 | logits_processor = JSONLogitsProcessor(Character, tokenizer) 40 | 41 | json_str = llama.create_completion( 42 | prompt, 43 | top_k=40, 44 | top_p=0.95, 45 | temperature=0.7, 46 | max_tokens=100, 47 | logits_processor=LogitsProcessorList([logits_processor]), 48 | )["choices"][0]["text"] 49 | 50 | print(json_str) 51 | -------------------------------------------------------------------------------- /examples/math_generate_code.py: -------------------------------------------------------------------------------- 1 | """Example from https://dust.tt/spolu/a/d12ac33169""" 2 | 3 | import outlines 4 | import outlines.models as models 5 | from outlines import Template 6 | 7 | examples = [ 8 | {"question": "What is 37593 * 67?", "code": "37593 * 67"}, 9 | { 10 | "question": "Janet's ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market?", 11 | "code": "(16-3-4)*2", 12 | }, 13 | { 14 | "question": "A robe takes 2 bolts of blue fiber and half that much white fiber. How many bolts in total does it take?", 15 | "code": " 2 + 2/2", 16 | }, 17 | ] 18 | 19 | question = "Carla is downloading a 200 GB file. She can download 2 GB/minute, but 40% of the way through the download, the download fails. Then Carla has to restart the download from the beginning. How load did it take her to download the file in minutes?" 20 | 21 | answer_with_code_prompt = Template.from_string( 22 | """ 23 | {% for example in examples %} 24 | QUESTION: {{example.question}} 25 | CODE: {{example.code}} 26 | 27 | {% endfor %} 28 | QUESTION: {{question}} 29 | CODE:""" 30 | ) 31 | 32 | 33 | def execute_code(code): 34 | result = eval(code) 35 | return result 36 | 37 | 38 | prompt = answer_with_code_prompt(question, examples) 39 | model = models.openai("gpt-4o-mini") 40 | answer = outlines.generate.text(model)(prompt) 41 | result = execute_code(answer) 42 | print(f"It takes Carla {result:.0f} minutes to download the file.") 43 | -------------------------------------------------------------------------------- /examples/modal_example.py: -------------------------------------------------------------------------------- 1 | import modal 2 | 3 | app = modal.App(name="outlines-app") 4 | 5 | 6 | outlines_image = modal.Image.debian_slim(python_version="3.11").pip_install( 7 | "outlines==0.0.37", 8 | "transformers==4.38.2", 9 | "datasets==2.18.0", 10 | "accelerate==0.27.2", 11 | ) 12 | 13 | 14 | def import_model(): 15 | import outlines 16 | 17 | outlines.models.transformers("mistralai/Mistral-7B-Instruct-v0.2") 18 | 19 | 20 | outlines_image = outlines_image.run_function(import_model) 21 | 22 | 23 | schema = """{ 24 | "title": "Character", 25 | "type": "object", 26 | "properties": { 27 | "name": { 28 | "title": "Name", 29 | "maxLength": 10, 30 | "type": "string" 31 | }, 32 | "age": { 33 | "title": "Age", 34 | "type": "integer" 35 | }, 36 | "armor": {"$ref": "#/definitions/Armor"}, 37 | "weapon": {"$ref": "#/definitions/Weapon"}, 38 | "strength": { 39 | "title": "Strength", 40 | "type": "integer" 41 | } 42 | }, 43 | "required": ["name", "age", "armor", "weapon", "strength"], 44 | "definitions": { 45 | "Armor": { 46 | "title": "Armor", 47 | "description": "An enumeration.", 48 | "enum": ["leather", "chainmail", "plate"], 49 | "type": "string" 50 | }, 51 | "Weapon": { 52 | "title": "Weapon", 53 | "description": "An enumeration.", 54 | "enum": ["sword", "axe", "mace", "spear", "bow", "crossbow"], 55 | "type": "string" 56 | } 57 | } 58 | }""" 59 | 60 | 61 | @app.function(image=outlines_image, gpu=modal.gpu.A100(memory=80)) 62 | def generate( 63 | prompt: str = "Amiri, a 53 year old warrior woman with a sword and leather armor.", 64 | ): 65 | import outlines 66 | 67 | model = outlines.models.transformers("mistralai/Mistral-7B-v0.1", device="cuda") 68 | 69 | generator = outlines.generate.json(model, schema) 70 | character = generator( 71 | f"[INST]Give me a character description. Describe {prompt}.[/INST]" 72 | ) 73 | 74 | print(character) 75 | 76 | 77 | @app.local_entrypoint() 78 | def main( 79 | prompt: str = "Amiri, a 53 year old warrior woman with a sword and leather armor.", 80 | ): 81 | generate.remote(prompt) 82 | -------------------------------------------------------------------------------- /examples/parsing.py: -------------------------------------------------------------------------------- 1 | """An example illustrating parser-based masking.""" 2 | 3 | import math 4 | import time 5 | from copy import copy 6 | 7 | import torch 8 | from lark.indenter import DedentError 9 | from lark.lexer import UnexpectedCharacters, UnexpectedToken 10 | from transformers import ( 11 | AutoModelForCausalLM, 12 | AutoTokenizer, 13 | LogitsProcessor, 14 | LogitsProcessorList, 15 | set_seed, 16 | ) 17 | 18 | from outlines.fsm.parsing import PartialLark, PartialPythonIndenter 19 | 20 | revision = None 21 | checkpoint = "Salesforce/codegen-350M-mono" 22 | device = "cuda" 23 | 24 | tokenizer = AutoTokenizer.from_pretrained(checkpoint) 25 | 26 | model = AutoModelForCausalLM.from_pretrained( 27 | checkpoint, trust_remote_code=True, revision=revision 28 | ).to(device) 29 | 30 | parser = PartialLark.open_from_package( 31 | "tests", 32 | "partial_python.lark", 33 | ["text"], 34 | parser="lalr", 35 | postlex=PartialPythonIndenter(), 36 | start="file_input", 37 | ) 38 | 39 | 40 | class ParserLogitsProcessor(LogitsProcessor): 41 | """Bias invalid token scores according to a running parse state.""" 42 | 43 | def __init__(self, parser): 44 | self.parser = parser 45 | self.parser_state = parser.parse("") 46 | self.states_stack = [self.parser_state] 47 | self.token_seq = None 48 | self.token_idx = 0 49 | 50 | def __call__( 51 | self, input_ids: torch.LongTensor, scores: torch.FloatTensor 52 | ) -> torch.FloatTensor: 53 | if self.token_seq is None: 54 | self.token_seq = tokenizer.decode(input_ids[0]) 55 | self.token_idx = len(input_ids[0]) - 1 56 | else: 57 | self.token_idx += 1 58 | self.token_seq += tokenizer.decode(input_ids[0][self.token_idx]) 59 | 60 | # Process the last sampled token 61 | lex_state = self.parser_state.lexer.state 62 | lex_state.text = self.token_seq 63 | 64 | self.parser.parse_from_state(self.parser_state, is_end=False) 65 | 66 | print(f'parsed:"{self.token_seq}"') 67 | 68 | mask = torch.full_like(scores, -math.inf) 69 | 70 | # Determine which tokens in the vocabulary are valid next tokens 71 | # given the parser state. 72 | # 73 | # TODO: This is a very naive and slow approach. It could be done in 74 | # parallel, easily memoized/cached, etc., but there are a few other 75 | # approaches to try first that will dramatically reduce the 76 | # amount of work needed here. 77 | t0 = time.perf_counter() 78 | for test_token, token_id in tokenizer.vocab.items(): 79 | ps = copy(self.parser_state) 80 | ls = ps.lexer.state 81 | ls.text = self.token_seq + tokenizer.convert_tokens_to_string([test_token]) 82 | 83 | try: 84 | self.parser.parse_from_state(ps, is_end=False) 85 | mask[0][token_id] = 0 86 | except (EOFError, UnexpectedToken, UnexpectedCharacters, DedentError): 87 | pass 88 | 89 | print(f"next token masking duration: {time.perf_counter() - t0}") 90 | 91 | return scores + mask 92 | 93 | 94 | set_seed(20399) 95 | 96 | input_text = "def " 97 | inputs = tokenizer.encode(input_text, return_tensors="pt").to(device) 98 | 99 | outputs = model.generate( 100 | inputs, 101 | max_length=100, 102 | temperature=0.1, 103 | logits_processor=LogitsProcessorList([ParserLogitsProcessor(parser)]), 104 | renormalize_logits=True, 105 | ) 106 | 107 | print(tokenizer.decode(outputs[0])) 108 | -------------------------------------------------------------------------------- /examples/pick_odd_one_out.py: -------------------------------------------------------------------------------- 1 | """Chain-of-thought prompting for Odd one out classification. 2 | 3 | Example taken from the LQML library [1]_. 4 | 5 | References 6 | ---------- 7 | .. [1] Beurer-Kellner, L., Fischer, M., & Vechev, M. (2022). 8 | Prompting Is Programming: A Query Language For Large Language Models. 9 | arXiv preprint arXiv:2212.06094. 10 | 11 | """ 12 | 13 | import outlines 14 | import outlines.models as models 15 | 16 | 17 | build_ooo_prompt = outlines.Template.from_string( 18 | """ 19 | Pick the odd word out: skirt, dress, pen, jacket. 20 | skirt is clothing, dress is clothing, pen is an object, jacket is clothing. 21 | So the odd one is pen. 22 | 23 | Pick the odd word out: Spain, France, German, England, Singapore. 24 | Spain is a country, France is a country, German is a language, ... 25 | So the odd one is German. 26 | 27 | Pick the odd word out: {{ options | join(", ") }}. 28 | 29 | """ 30 | ) 31 | 32 | options = ["sea", "mountains", "plains", "sock"] 33 | 34 | model = models.openai("gpt-4o-mini") 35 | gen_text = outlines.generate.text(model) 36 | gen_choice = outlines.generate.choice(model, options) 37 | 38 | prompt = build_ooo_prompt(options) 39 | reasoning = gen_text(prompt, stop_at=["Pick the odd word", "So the odd one"]) 40 | prompt += reasoning 41 | result = gen_choice(prompt) 42 | prompt += result 43 | print(result) 44 | -------------------------------------------------------------------------------- /examples/react.py: -------------------------------------------------------------------------------- 1 | """ReAct 2 | 3 | This example was inspired by the LQML library [1]_. The ReAct framework was 4 | first developed in [2]_ and augments Chain-of-Thought prompting with the ability 5 | for the model to query external sources. 6 | 7 | References 8 | ---------- 9 | .. [1] Beurer-Kellner, L., Fischer, M., & Vechev, M. (2022). Prompting Is Programming: A Query Language For Large Language Models. arXiv preprint arXiv:2212.06094. 10 | .. [2] Yao, S., Zhao, J., Yu, D., Du, N., Shafran, I., Narasimhan, K., & Cao, Y. (2022). React: Synergizing reasoning and acting in language models. arXiv preprint arXiv:2210.03629. 11 | 12 | """ 13 | 14 | import requests # type: ignore 15 | 16 | import outlines 17 | from outlines import Template 18 | import outlines.generate as generate 19 | import outlines.models as models 20 | 21 | 22 | build_reAct_prompt = Template.from_string( 23 | """What is the elevation range for the area that the eastern sector of the Colorado orogeny extends into? 24 | Tho 1: I need to search Colorado orogeny, find the area that the eastern sector of the Colorado ... 25 | Act 2: Search 'Colorado orogeny' 26 | Obs 2: The Colorado orogeny was an episode of mountain building (an orogeny) ... 27 | Tho 3: It does not mention the eastern sector. So I need to look up eastern sector. 28 | ... 29 | Tho 4: High Plains rise in elevation from around 1,800 to 7,000 ft, so the answer is 1,800 to 7,000 ft. 30 | Act 5: Finish '1,800 to 7,000 ft' 31 | {{ question }} 32 | """ 33 | ) 34 | 35 | 36 | add_mode = Template.from_string( 37 | """{{ prompt }} 38 | {{ mode }} {{ i }}: {{ result }} 39 | """ 40 | ) 41 | 42 | 43 | def search_wikipedia(query: str): 44 | url = f"https://en.wikipedia.org/w/api.php?format=json&action=query&prop=extracts&exintro&explaintext&redirects=1&titles={query}&origin=*" 45 | response = requests.get(url) 46 | page = response.json()["query"]["pages"] 47 | return ".".join(list(page.values())[0]["extract"].split(".")[:2]) 48 | 49 | 50 | prompt = build_reAct_prompt("Where is Apple Computers headquarted? ") 51 | model = models.openai("gpt-4o-mini") 52 | 53 | mode_generator = generate.choice(model, choices=["Tho", "Act"]) 54 | action_generator = generate.choice(model, choices=["Search", "Finish"]) 55 | text_generator = generate.text(model) 56 | 57 | for i in range(1, 10): 58 | mode = mode_generator(prompt, max_tokens=128) 59 | prompt = add_mode(i, mode, "", prompt) 60 | 61 | if mode == "Tho": 62 | thought = text_generator(prompt, stop_at="\n", max_tokens=128) 63 | prompt += f"{thought}" 64 | elif mode == "Act": 65 | action = action_generator(prompt, max_tokens=128) 66 | prompt += f"{action} '" 67 | 68 | subject = text_generator(prompt, stop_at=["'"], max_tokens=128) 69 | # Apple Computers headquartered 70 | subject = " ".join(subject.split()[:2]) 71 | prompt += f"{subject}'" 72 | 73 | if action == "Search": 74 | result = search_wikipedia(subject) 75 | prompt = add_mode(i, "Obs", result, prompt) 76 | else: 77 | break 78 | 79 | print(prompt) 80 | -------------------------------------------------------------------------------- /examples/self_consistency.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | import numpy as np 4 | 5 | import outlines 6 | import outlines.models as models 7 | from outlines import Template 8 | 9 | examples = [ 10 | { 11 | "question": "There are 15 trees in the grove. Grove workers will plant trees in the grove today. After they are done, there will be 21 trees. How many trees did the grove workers plant today?", 12 | "answer": "We start with 15 trees. Later we have 21 trees. The difference must be the number of trees they planted. So, they must have planted 21 - 15 = 6 trees. The answer is 6.", 13 | }, 14 | { 15 | "question": "If there are 3 cars in the parking lot and 2 more cars arrive, how many cars are in the parking lot?", 16 | "answer": "There are 3 cars in the parking lot already. 2 more arrive. Now there are 3 + 2 = 5 cars. The answer is 5.", 17 | }, 18 | { 19 | "question": "Leah had 32 chocolates and her sister had 42. If they ate 35, how many pieces do they have left in total?", 20 | "answer": "Leah had 32 chocolates and Leah’s sister had 42. That means there were originally 32 + 42 = 74 chocolates. 35 have been eaten. So in total they still have 74 - 35 = 39 chocolates. The answer is 39.", 21 | }, 22 | { 23 | "question": "Jason had 20 lollipops. He gave Denny some lollipops. Now Jason has 12 lollipops. How many lollipops did Jason give to Denny?", 24 | "answer": "Jason had 20 lollipops. Since he only has 12 now, he must have given the rest to Denny. The number of lollipops he has given to Denny must have been 20 - 12 = 8 lollipops. The answer is 8.", 25 | }, 26 | { 27 | "question": "Shawn has five toys. For Christmas, he got two toys each from his mom and dad. How many toys does he have now?", 28 | "answer": "He has 5 toys. He got 2 from mom, so after that he has 5 + 2 = 7 toys. Then he got 2 more from dad, so in total he has 7 + 2 = 9 toys. The answer is 9.", 29 | }, 30 | { 31 | "question": "There were nine computers in the server room. Five more computers were installed each day, from monday to thursday. How many computers are now in the server room?", 32 | "answer": "There are 4 days from monday to thursday. 5 computers were added each day. That means in total 4 * 5 = 20 computers were added. There were 9 computers in the beginning, so now there are 9 + 20 = 29 computers. The answer is 29.", 33 | }, 34 | { 35 | "question": "Michael had 58 golf balls. On tuesday, he lost 23 golf balls. On wednesday, he lost 2 more. How many golf balls did he have at the end of wednesday?", 36 | "answer": "Michael initially had 58 balls. He lost 23 on Tuesday, so after that he has 58 - 23 = 35 balls. On Wednesday he lost 2 more so now he has 35 - 2 = 33 balls. The answer is 33.", 37 | }, 38 | { 39 | "question": "Olivia has $23. She bought five bagels for $3 each. How much money does she have left?", 40 | "answer": "She bought 5 bagels for $3 each. This means she spent 5", 41 | }, 42 | ] 43 | 44 | question = "When I was 6 my sister was half my age. Now I’m 70 how old is my sister?" 45 | 46 | 47 | few_shots = Template.from_string( 48 | """ 49 | {% for example in examples %} 50 | Q: {{ example.question }} 51 | A: {{ example.answer }} 52 | {% endfor %} 53 | Q: {{ question }} 54 | A: 55 | """ 56 | ) 57 | 58 | model = models.openai("gpt-4o-mini") 59 | generator = outlines.generate.text(model) 60 | prompt = few_shots(question, examples) 61 | answers = generator(prompt, samples=10) 62 | 63 | digits = [] 64 | for answer in answers: 65 | try: 66 | match = re.findall(r"\d+", answer)[-1] 67 | if match is not None: 68 | digit = int(match) 69 | digits.append(digit) 70 | except AttributeError: 71 | print(f"Could not parse the completion: '{answer}'") 72 | 73 | unique_digits, counts = np.unique(digits, return_counts=True) 74 | results = {d: c for d, c in zip(unique_digits, counts)} 75 | print(results) 76 | 77 | max_count = max(results.values()) 78 | answer_value = [key for key, value in results.items() if value == max_count][0] 79 | total_count = sum(results.values()) 80 | print( 81 | f"The most likely answer is {answer_value} ({max_count / total_count * 100}% consensus)" 82 | ) 83 | -------------------------------------------------------------------------------- /examples/transformers_integration.py: -------------------------------------------------------------------------------- 1 | """Example of integrating `outlines` with `transformers`.""" 2 | 3 | from pydantic import BaseModel 4 | from transformers import pipeline 5 | 6 | from outlines.integrations.transformers import JSONPrefixAllowedTokens 7 | 8 | 9 | class Person(BaseModel): 10 | first_name: str 11 | surname: str 12 | 13 | 14 | pipe = pipeline("text-generation", model="mistralai/Mistral-7B-v0.1") 15 | prefix_allowed_tokens_fn = JSONPrefixAllowedTokens( 16 | schema=Person, tokenizer_or_pipe=pipe, whitespace_pattern=r" ?" 17 | ) 18 | results = pipe( 19 | ["He is Tom Jones", "She saw Linda Smith"], 20 | return_full_text=False, 21 | do_sample=False, 22 | max_new_tokens=50, 23 | prefix_allowed_tokens_fn=prefix_allowed_tokens_fn, 24 | ) 25 | print(results) 26 | -------------------------------------------------------------------------------- /examples/vllm_integration.py: -------------------------------------------------------------------------------- 1 | """Example of integrating `outlines` with `vllm`.""" 2 | 3 | import vllm 4 | from pydantic import BaseModel 5 | from transformers import AutoTokenizer 6 | 7 | from outlines.models.vllm import adapt_tokenizer 8 | from outlines.processors import JSONLogitsProcessor 9 | 10 | 11 | class Person(BaseModel): 12 | first_name: str 13 | surname: str 14 | 15 | 16 | MODEL_ID = "mistralai/Mistral-7B-v0.1" 17 | llm = vllm.LLM(model=MODEL_ID, max_model_len=512) 18 | tokenizer = adapt_tokenizer(AutoTokenizer.from_pretrained(MODEL_ID)) 19 | logits_processor = JSONLogitsProcessor( 20 | schema=Person, tokenizer=tokenizer, whitespace_pattern=r" ?" 21 | ) 22 | result = llm.generate( 23 | ["He is Tom Jones", "She saw Linda Smith"], 24 | sampling_params=vllm.SamplingParams( 25 | temperature=0.0, 26 | max_tokens=50, 27 | logits_processors=[logits_processor], 28 | ), 29 | ) 30 | print(result) 31 | -------------------------------------------------------------------------------- /flake.lock: -------------------------------------------------------------------------------- 1 | { 2 | "nodes": { 3 | "flake-utils": { 4 | "inputs": { 5 | "systems": "systems" 6 | }, 7 | "locked": { 8 | "lastModified": 1731533236, 9 | "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=", 10 | "owner": "numtide", 11 | "repo": "flake-utils", 12 | "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b", 13 | "type": "github" 14 | }, 15 | "original": { 16 | "owner": "numtide", 17 | "repo": "flake-utils", 18 | "type": "github" 19 | } 20 | }, 21 | "nixpkgs": { 22 | "locked": { 23 | "lastModified": 1738136902, 24 | "narHash": "sha256-pUvLijVGARw4u793APze3j6mU1Zwdtz7hGkGGkD87qw=", 25 | "owner": "NixOS", 26 | "repo": "nixpkgs", 27 | "rev": "9a5db3142ce450045840cc8d832b13b8a2018e0c", 28 | "type": "github" 29 | }, 30 | "original": { 31 | "id": "nixpkgs", 32 | "type": "indirect" 33 | } 34 | }, 35 | "root": { 36 | "inputs": { 37 | "flake-utils": "flake-utils", 38 | "nixpkgs": "nixpkgs" 39 | } 40 | }, 41 | "systems": { 42 | "locked": { 43 | "lastModified": 1681028828, 44 | "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", 45 | "owner": "nix-systems", 46 | "repo": "default", 47 | "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", 48 | "type": "github" 49 | }, 50 | "original": { 51 | "owner": "nix-systems", 52 | "repo": "default", 53 | "type": "github" 54 | } 55 | } 56 | }, 57 | "root": "root", 58 | "version": 7 59 | } 60 | -------------------------------------------------------------------------------- /flake.nix: -------------------------------------------------------------------------------- 1 | { 2 | inputs.flake-utils.url = "github:numtide/flake-utils"; 3 | outputs = { self, nixpkgs, flake-utils }: 4 | flake-utils.lib.eachDefaultSystem (system: 5 | let 6 | pkgs = import nixpkgs { 7 | inherit system; 8 | config.allowUnfree = true; 9 | }; 10 | in { devShells.default = import ./shell.nix { inherit pkgs; }; }); 11 | } 12 | -------------------------------------------------------------------------------- /outlines/__init__.py: -------------------------------------------------------------------------------- 1 | """Outlines is a Generative Model Programming Framework.""" 2 | 3 | import outlines.generate 4 | import outlines.grammars 5 | import outlines.models 6 | import outlines.processors 7 | import outlines.types 8 | from outlines.base import vectorize 9 | from outlines.caching import clear_cache, disable_cache, get_cache 10 | from outlines.function import Function 11 | from outlines.templates import Template, prompt 12 | 13 | __all__ = [ 14 | "clear_cache", 15 | "disable_cache", 16 | "get_cache", 17 | "Function", 18 | "prompt", 19 | "Prompt", 20 | "vectorize", 21 | "grammars", 22 | ] 23 | -------------------------------------------------------------------------------- /outlines/fsm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/outlines/fsm/__init__.py -------------------------------------------------------------------------------- /outlines/fsm/json_schema.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | import json 3 | import warnings 4 | from enum import Enum 5 | from typing import Callable, Type, Union 6 | 7 | from pydantic import BaseModel, create_model 8 | 9 | 10 | def convert_json_schema_to_str(json_schema: Union[dict, str, Type[BaseModel]]) -> str: 11 | """Convert a JSON schema to a string. 12 | 13 | Parameters 14 | ---------- 15 | json_schema 16 | The JSON schema. 17 | 18 | Returns 19 | ------- 20 | str 21 | The JSON schema converted to a string. 22 | 23 | Raises 24 | ------ 25 | ValueError 26 | If the schema is not a dictionary, a string or a Pydantic class. 27 | """ 28 | if isinstance(json_schema, dict): 29 | schema_str = json.dumps(json_schema) 30 | elif isinstance(json_schema, str): 31 | schema_str = json_schema 32 | elif issubclass(json_schema, BaseModel): 33 | schema_str = json.dumps(json_schema.model_json_schema()) 34 | else: 35 | raise ValueError( 36 | f"Cannot parse schema {json_schema}. The schema must be either " 37 | + "a Pydantic class, a dictionary or a string that contains the JSON " 38 | + "schema specification" 39 | ) 40 | return schema_str 41 | 42 | 43 | def get_schema_from_signature(fn: Callable) -> dict: 44 | """Turn a function signature into a JSON schema. 45 | 46 | Every JSON object valid to the output JSON Schema can be passed 47 | to `fn` using the ** unpacking syntax. 48 | 49 | """ 50 | signature = inspect.signature(fn) 51 | arguments = {} 52 | for name, arg in signature.parameters.items(): 53 | if arg.annotation == inspect._empty: 54 | raise ValueError("Each argument must have a type annotation") 55 | else: 56 | arguments[name] = (arg.annotation, ...) 57 | 58 | try: 59 | fn_name = fn.__name__ 60 | except Exception as e: 61 | fn_name = "Arguments" 62 | warnings.warn( 63 | f"The function name could not be determined. Using default name 'Arguments' instead. For debugging, here is exact error:\n{e}", 64 | category=UserWarning, 65 | ) 66 | model = create_model(fn_name, **arguments) 67 | 68 | return model.model_json_schema() 69 | 70 | 71 | def get_schema_from_enum(myenum: type[Enum]) -> dict: 72 | if len(myenum) == 0: 73 | raise ValueError( 74 | f"Your enum class {myenum.__name__} has 0 members. If you are working with an enum of functions, do not forget to register them as callable (using `partial` for instance)" 75 | ) 76 | choices = [ 77 | get_schema_from_signature(elt.value.func) 78 | if callable(elt.value) 79 | else {"const": elt.value} 80 | for elt in myenum 81 | ] 82 | schema = {"title": myenum.__name__, "oneOf": choices} 83 | return schema 84 | -------------------------------------------------------------------------------- /outlines/fsm/types.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | from enum import EnumMeta 3 | from typing import Any, Protocol, Tuple, Type 4 | 5 | from outlines.types import Regex, boolean as boolean_regex, date as date_regex 6 | from outlines.types import datetime as datetime_regex 7 | from outlines.types import ( 8 | integer as integer_regex, 9 | number as number_regex, 10 | time as time_regex, 11 | ) 12 | 13 | 14 | class FormatFunction(Protocol): 15 | def __call__(self, sequence: str) -> Any: ... 16 | 17 | 18 | def python_types_to_regex(python_type: Type) -> Tuple[Regex, FormatFunction]: 19 | # If it is a custom type 20 | if isinstance(python_type, Regex): 21 | custom_regex_str = python_type.pattern 22 | 23 | def custom_format_fn(sequence: str) -> str: 24 | return str(sequence) 25 | 26 | return Regex(custom_regex_str), custom_format_fn 27 | 28 | if isinstance(python_type, EnumMeta): 29 | values = python_type.__members__.keys() 30 | enum_regex_str: str = "(" + "|".join(values) + ")" 31 | 32 | def enum_format_fn(sequence: str) -> str: 33 | return str(sequence) 34 | 35 | return Regex(enum_regex_str), enum_format_fn 36 | 37 | if python_type is float: 38 | 39 | def float_format_fn(sequence: str) -> float: 40 | return float(sequence) 41 | 42 | return number_regex, float_format_fn 43 | elif python_type is int: 44 | 45 | def int_format_fn(sequence: str) -> int: 46 | return int(sequence) 47 | 48 | return integer_regex, int_format_fn 49 | elif python_type is bool: 50 | 51 | def bool_format_fn(sequence: str) -> bool: 52 | return bool(sequence) 53 | 54 | return boolean_regex, bool_format_fn 55 | elif python_type == datetime.date: 56 | 57 | def date_format_fn(sequence: str) -> datetime.date: 58 | return datetime.datetime.strptime(sequence, "%Y-%m-%d").date() 59 | 60 | return date_regex, date_format_fn 61 | elif python_type == datetime.time: 62 | 63 | def time_format_fn(sequence: str) -> datetime.time: 64 | return datetime.datetime.strptime(sequence, "%H:%M:%S").time() 65 | 66 | return time_regex, time_format_fn 67 | elif python_type == datetime.datetime: 68 | 69 | def datetime_format_fn(sequence: str) -> datetime.datetime: 70 | return datetime.datetime.strptime(sequence, "%Y-%m-%d %H:%M:%S") 71 | 72 | return datetime_regex, datetime_format_fn 73 | else: 74 | raise NotImplementedError( 75 | f"The Python type {python_type} is not supported. Please open an issue." 76 | ) 77 | -------------------------------------------------------------------------------- /outlines/function.py: -------------------------------------------------------------------------------- 1 | import importlib.util 2 | from dataclasses import dataclass 3 | from typing import TYPE_CHECKING, Callable, Optional, Tuple, Union 4 | 5 | import requests 6 | 7 | from outlines import generate, models 8 | 9 | if TYPE_CHECKING: 10 | from outlines.generate.api import SequenceGenerator 11 | from outlines.templates import Template 12 | 13 | 14 | @dataclass 15 | class Function: 16 | """Represents an Outlines function. 17 | 18 | Functions are a convenient way to encapsulate a prompt template, a language 19 | model and a Pydantic model that define the output structure. Once defined, 20 | the function can be called with arguments that will be used to render the 21 | prompt template. 22 | 23 | """ 24 | 25 | prompt_template: "Template" 26 | schema: Union[str, Callable, object] 27 | model_name: str 28 | generator: Optional["SequenceGenerator"] = None 29 | 30 | @classmethod 31 | def from_github(cls, program_path: str, function_name: str = "fn"): 32 | """Load a function stored on GitHub""" 33 | program_content = download_from_github(program_path) 34 | function = extract_function_from_file(program_content, function_name) 35 | 36 | return function 37 | 38 | def init_generator(self): 39 | """Load the model and initialize the generator.""" 40 | model = models.transformers(self.model_name) 41 | self.generator = generate.json(model, self.schema) 42 | 43 | def __call__(self, *args, **kwargs): 44 | """Call the function. 45 | 46 | .. warning:: 47 | 48 | This currently does not support batching. 49 | 50 | Parameters 51 | ---------- 52 | args 53 | Values to pass to the prompt template as positional arguments. 54 | kwargs 55 | Values to pass to the prompt template as keyword arguments. 56 | 57 | """ 58 | if self.generator is None: 59 | self.init_generator() 60 | 61 | prompt = self.prompt_template(*args, **kwargs) 62 | return self.generator(prompt) 63 | 64 | 65 | def download_from_github(short_path: str): 66 | """Download the file in which the function is stored on GitHub.""" 67 | GITHUB_BASE_URL = "https://raw.githubusercontent.com" 68 | BRANCH = "main" 69 | 70 | path = short_path.split("/") 71 | if len(path) < 3: 72 | raise ValueError( 73 | "Please provide a valid path in the form {USERNAME}/{REPO_NAME}/{PATH_TO_FILE}." 74 | ) 75 | elif short_path[-3:] == ".py": 76 | raise ValueError("Do not append the `.py` extension to the program name.") 77 | 78 | username = path[0] 79 | repo = path[1] 80 | path_to_file = path[2:] 81 | 82 | url = "/".join([GITHUB_BASE_URL, username, repo, BRANCH] + path_to_file) + ".py" 83 | result = requests.get(url) 84 | 85 | if result.status_code == 200: 86 | return result.text 87 | elif result.status_code == 404: 88 | raise ValueError( 89 | f"Program could not be found at {url}. Please make sure you entered the GitHub username, repository name and path to the program correctly." 90 | ) 91 | else: 92 | result.raise_for_status() 93 | 94 | 95 | def extract_function_from_file(content: str, function_name: str) -> Tuple[Callable]: 96 | """Extract a function object from a downloaded file.""" 97 | 98 | spec = importlib.util.spec_from_loader( 99 | "outlines_function", loader=None, origin="github" 100 | ) 101 | if spec is not None: 102 | module = importlib.util.module_from_spec(spec) 103 | exec(content, module.__dict__) 104 | 105 | try: 106 | fn = getattr(module, function_name) 107 | except AttributeError: 108 | raise AttributeError( 109 | "Could not find an `outlines.Function` instance in the remote file. Make sure that the path you specified is correct." 110 | ) 111 | 112 | if not isinstance(fn, module.outlines.Function): 113 | raise TypeError( 114 | f"The `{function_name}` variable in the program must be an instance of `outlines.Function`" 115 | ) 116 | 117 | return fn 118 | -------------------------------------------------------------------------------- /outlines/generate/__init__.py: -------------------------------------------------------------------------------- 1 | from .api import SequenceGenerator 2 | from .cfg import cfg 3 | from .choice import choice 4 | from .format import format 5 | from .fsm import fsm 6 | from .json import json 7 | from .regex import regex 8 | from .text import text 9 | -------------------------------------------------------------------------------- /outlines/generate/cfg.py: -------------------------------------------------------------------------------- 1 | from functools import singledispatch 2 | 3 | from outlines.generate.api import ( 4 | SequenceGeneratorAdapter, 5 | VisionSequenceGeneratorAdapter, 6 | ) 7 | from outlines.models import LlamaCpp, OpenAI, TransformersVision 8 | from outlines.samplers import Sampler, multinomial 9 | 10 | 11 | @singledispatch 12 | def cfg( 13 | model, cfg_str: str, sampler: Sampler = multinomial() 14 | ) -> SequenceGeneratorAdapter: 15 | """Generate text in the language of a Context-Free Grammar 16 | 17 | Arguments 18 | --------- 19 | model: 20 | An `outlines.model` instance. 21 | sampler: 22 | The sampling algorithm to use to generate token ids from the logits 23 | distribution. 24 | 25 | Returns 26 | ------- 27 | A `SequenceGeneratorAdapter` instance that generates text. 28 | 29 | """ 30 | from outlines.processors import CFGLogitsProcessor 31 | 32 | logits_processor = CFGLogitsProcessor(cfg_str, tokenizer=model.tokenizer) 33 | return SequenceGeneratorAdapter(model, logits_processor, sampler) 34 | 35 | 36 | @cfg.register(TransformersVision) 37 | def cfg_vision(model, cfg_str: str, sampler: Sampler = multinomial()): 38 | from outlines.processors import CFGLogitsProcessor 39 | 40 | logits_processor = CFGLogitsProcessor(cfg_str, tokenizer=model.tokenizer) 41 | return VisionSequenceGeneratorAdapter(model, logits_processor, sampler) 42 | 43 | 44 | @cfg.register(LlamaCpp) 45 | def cfg_llamacpp(model, cfg_str: str, sampler: Sampler = multinomial()): 46 | raise NotImplementedError("Not yet available due to bug in llama_cpp tokenizer") 47 | 48 | 49 | @cfg.register(OpenAI) 50 | def cfg_openai(model, cfg_str: str, sampler: Sampler = multinomial()): 51 | raise NotImplementedError( 52 | "Cannot use grammar-structured generation with an OpenAI model" 53 | + "due to the limitations of the OpenAI API." 54 | ) 55 | -------------------------------------------------------------------------------- /outlines/generate/choice.py: -------------------------------------------------------------------------------- 1 | import json as pyjson 2 | import re 3 | from enum import Enum 4 | from functools import singledispatch 5 | from typing import Callable, List, Union 6 | 7 | from outlines_core.fsm.json_schema import build_regex_from_schema 8 | 9 | from outlines.fsm.json_schema import get_schema_from_enum 10 | from outlines.generate.api import SequenceGeneratorAdapter 11 | from outlines.models import OpenAI 12 | from outlines.samplers import Sampler, multinomial 13 | 14 | from .json import json 15 | from .regex import regex 16 | 17 | 18 | @singledispatch 19 | def choice( 20 | model, choices: Union[List[str], type[Enum]], sampler: Sampler = multinomial() 21 | ) -> SequenceGeneratorAdapter: 22 | if isinstance(choices, type(Enum)): 23 | regex_str = build_regex_from_schema(pyjson.dumps(get_schema_from_enum(choices))) 24 | else: 25 | choices = [re.escape(choice) for choice in choices] # type: ignore 26 | regex_str = r"(" + r"|".join(choices) + r")" 27 | 28 | generator = regex(model, regex_str, sampler) 29 | if isinstance(choices, type(Enum)): 30 | generator.format_sequence = lambda x: pyjson.loads(x) 31 | else: 32 | generator.format_sequence = lambda x: x 33 | 34 | return generator 35 | 36 | 37 | @choice.register(OpenAI) 38 | def choice_openai( 39 | model: OpenAI, choices: List[str], sampler: Sampler = multinomial() 40 | ) -> Callable: 41 | """ 42 | Call OpenAI API with response_format of a dict: 43 | {"result": } 44 | """ 45 | 46 | choices_schema = pyjson.dumps( 47 | { 48 | "type": "object", 49 | "properties": {"result": {"type": "string", "enum": choices}}, 50 | "additionalProperties": False, 51 | "required": ["result"], 52 | } 53 | ) 54 | generator = json(model, choices_schema, sampler) 55 | 56 | def generate_choice(*args, **kwargs): 57 | return generator(*args, **kwargs)["result"] 58 | 59 | return generate_choice 60 | -------------------------------------------------------------------------------- /outlines/generate/format.py: -------------------------------------------------------------------------------- 1 | from functools import singledispatch 2 | 3 | from outlines.fsm.types import python_types_to_regex 4 | from outlines.generate.api import SequenceGeneratorAdapter 5 | from outlines.models import OpenAI 6 | from outlines.samplers import Sampler, multinomial 7 | 8 | from .regex import regex 9 | 10 | 11 | @singledispatch 12 | def format( 13 | model, python_type, sampler: Sampler = multinomial() 14 | ) -> SequenceGeneratorAdapter: 15 | """Generate structured data that can be parsed as a Python type. 16 | 17 | Parameters 18 | ---------- 19 | model: 20 | An instance of `Transformer` that represents a model from the 21 | `transformers` library. 22 | python_type: 23 | A Python type. The output of the generator must be parseable into 24 | this type. 25 | sampler: 26 | The sampling algorithm to use to generate token ids from the logits 27 | distribution. 28 | 29 | Returns 30 | ------- 31 | A `SequenceGenerator` instance that generates text constrained by the Python type 32 | and translates this text into the corresponding type. 33 | 34 | """ 35 | regex_str, format_fn = python_types_to_regex(python_type) 36 | regex_str = regex_str.pattern 37 | generator = regex(model, regex_str, sampler) 38 | generator.format_sequence = format_fn 39 | 40 | return generator 41 | 42 | 43 | @format.register(OpenAI) 44 | def format_openai(model, python_type, sampler: Sampler = multinomial()): 45 | raise NotImplementedError( 46 | "Cannot use Python type-structured generation with an OpenAI model" 47 | + " due to the limitations of the OpenAI API." 48 | ) 49 | -------------------------------------------------------------------------------- /outlines/generate/fsm.py: -------------------------------------------------------------------------------- 1 | from functools import singledispatch 2 | 3 | import interegular 4 | 5 | from outlines.fsm.guide import RegexGuide 6 | from outlines.generate.api import ( 7 | SequenceGeneratorAdapter, 8 | VisionSequenceGeneratorAdapter, 9 | ) 10 | from outlines.models import TransformersVision 11 | from outlines.samplers import Sampler, multinomial 12 | 13 | 14 | @singledispatch 15 | def fsm( 16 | model, fsm: interegular.fsm.FSM, sampler: Sampler = multinomial() 17 | ) -> SequenceGeneratorAdapter: 18 | from outlines.processors import GuideLogitsProcessor 19 | 20 | guide = RegexGuide.from_interegular_fsm(fsm, model.tokenizer) 21 | logits_processor = GuideLogitsProcessor(tokenizer=model.tokenizer, guide=guide) 22 | return SequenceGeneratorAdapter(model, logits_processor, sampler) 23 | 24 | 25 | @fsm.register(TransformersVision) 26 | def fsm_vision(model, fsm: interegular.fsm.FSM, sampler: Sampler = multinomial()): 27 | from outlines.processors import GuideLogitsProcessor 28 | 29 | guide = RegexGuide.from_interegular_fsm(fsm, model.tokenizer) 30 | logits_processor = GuideLogitsProcessor(tokenizer=model.tokenizer, guide=guide) 31 | return VisionSequenceGeneratorAdapter(model, logits_processor, sampler) 32 | -------------------------------------------------------------------------------- /outlines/generate/regex.py: -------------------------------------------------------------------------------- 1 | from functools import singledispatch 2 | 3 | from outlines.generate.api import ( 4 | SequenceGeneratorAdapter, 5 | VisionSequenceGeneratorAdapter, 6 | ) 7 | from outlines.models import OpenAI, TransformersVision 8 | from outlines.samplers import Sampler, multinomial 9 | from outlines.types import Regex 10 | 11 | 12 | @singledispatch 13 | def regex(model, regex_str: str | Regex, sampler: Sampler = multinomial()): 14 | """Generate structured text in the language of a regular expression. 15 | 16 | Parameters 17 | ---------- 18 | model: 19 | An instance of `Transformer` that represents a model from the 20 | `transformers` library. 21 | regex_str: 22 | The regular expression that the output must follow. 23 | sampler: 24 | The sampling algorithm to use to generate token ids from the logits 25 | distribution. 26 | 27 | Returns 28 | ------- 29 | A `SequenceGeneratorAdapter` instance that generates text constrained by the 30 | regular expression. 31 | 32 | """ 33 | from outlines.processors import RegexLogitsProcessor 34 | 35 | if isinstance(regex_str, Regex): 36 | regex_str = regex_str.pattern 37 | 38 | logits_processor = RegexLogitsProcessor(regex_str, tokenizer=model.tokenizer) 39 | return SequenceGeneratorAdapter(model, logits_processor, sampler) 40 | 41 | 42 | @regex.register(TransformersVision) 43 | def regex_vision( 44 | model, 45 | regex_str: str | Regex, 46 | sampler: Sampler = multinomial(), 47 | ): 48 | from outlines.processors import RegexLogitsProcessor 49 | 50 | if isinstance(regex_str, Regex): 51 | regex_str = regex_str.pattern 52 | 53 | logits_processor = RegexLogitsProcessor(regex_str, tokenizer=model.tokenizer) 54 | return VisionSequenceGeneratorAdapter(model, logits_processor, sampler) 55 | 56 | 57 | @regex.register(OpenAI) 58 | def regex_openai( 59 | model: OpenAI, 60 | regex_str: str, 61 | sampler: Sampler = multinomial(), 62 | ): 63 | raise NotImplementedError( 64 | "Cannot use regex-structured generation with an OpenAI model" 65 | + "due to the limitations of the OpenAI API." 66 | ) 67 | -------------------------------------------------------------------------------- /outlines/generate/text.py: -------------------------------------------------------------------------------- 1 | from functools import singledispatch 2 | 3 | from outlines.generate.api import ( 4 | SequenceGeneratorAdapter, 5 | VisionSequenceGeneratorAdapter, 6 | ) 7 | from outlines.models import OpenAI, TransformersVision 8 | from outlines.samplers import Sampler, multinomial 9 | 10 | 11 | @singledispatch 12 | def text(model, sampler: Sampler = multinomial()) -> SequenceGeneratorAdapter: 13 | """Generate text with a `Transformer` model. 14 | 15 | Note 16 | ---- 17 | Python 3.11 allows dispatching on Union types and 18 | this should greatly simplify the code. 19 | 20 | Arguments 21 | --------- 22 | model: 23 | An instance of `Transformer` that represents a model from the 24 | `transformers` library. 25 | sampler: 26 | The sampling algorithm to use to generate token ids from the logits 27 | distribution. 28 | 29 | Returns 30 | ------- 31 | A `SequenceGeneratorAdapter` instance that generates text. 32 | 33 | """ 34 | return SequenceGeneratorAdapter(model, None, sampler) 35 | 36 | 37 | @text.register(TransformersVision) 38 | def text_vision(model, sampler: Sampler = multinomial()): 39 | return VisionSequenceGeneratorAdapter(model, None, sampler) 40 | 41 | 42 | @text.register(OpenAI) 43 | def text_openai(model: OpenAI, sampler: Sampler = multinomial()) -> OpenAI: 44 | if not isinstance(sampler, multinomial): 45 | raise NotImplementedError( 46 | r"The OpenAI API does not support any other sampling algorithm " 47 | + "than the multinomial sampler." 48 | ) 49 | 50 | return model 51 | -------------------------------------------------------------------------------- /outlines/grammars.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | GRAMMAR_PATH = Path(__file__).parent / "grammars" 4 | 5 | 6 | def read_grammar(grammar_file_name, base_grammar_path=GRAMMAR_PATH): 7 | """Read grammar file from default grammar path""" 8 | full_path = base_grammar_path / grammar_file_name 9 | with open(full_path) as file: 10 | return file.read() 11 | 12 | 13 | arithmetic = read_grammar("arithmetic.lark") 14 | json = read_grammar("json.lark") 15 | -------------------------------------------------------------------------------- /outlines/grammars/arithmetic.lark: -------------------------------------------------------------------------------- 1 | ?start: sum 2 | 3 | ?sum: product 4 | | sum "+" product -> add 5 | | sum "-" product -> sub 6 | 7 | ?product: atom 8 | | product "*" atom -> mul 9 | | product "/" atom -> div 10 | 11 | ?atom: NUMBER -> number 12 | | "-" atom -> neg 13 | | "(" sum ")" 14 | 15 | %import common.NUMBER 16 | %import common.WS_INLINE 17 | 18 | %ignore WS_INLINE 19 | -------------------------------------------------------------------------------- /outlines/grammars/common.lark: -------------------------------------------------------------------------------- 1 | // Adapted from https://github.com/lark-parser/lark/blob/master/lark/grammars/common.lark 2 | 3 | // Lark License: 4 | // Copyright © 2017 Erez Shinan 5 | // 6 | // Permission is hereby granted, free of charge, to any person obtaining a copy of 7 | // this software and associated documentation files (the "Software"), to deal in 8 | // the Software without restriction, including without limitation the rights to 9 | // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 10 | // the Software, and to permit persons to whom the Software is furnished to do so, 11 | // subject to the following conditions: 12 | // 13 | // The above copyright notice and this permission notice shall be included in all 14 | // copies or substantial portions of the Software. 15 | // 16 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 18 | // FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 19 | // COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 20 | // IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 21 | // CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 22 | 23 | 24 | // Basic terminals for common use 25 | 26 | 27 | // 28 | // Numbers 29 | // 30 | 31 | DIGIT: "0".."9" 32 | HEXDIGIT: "a".."f"|"A".."F"|DIGIT 33 | 34 | INT: DIGIT+ 35 | SIGNED_INT: ["+"|"-"] INT 36 | DECIMAL: INT "." INT? | "." INT 37 | 38 | // float = /-?\d+(\.\d+)?([eE][+-]?\d+)?/ 39 | _EXP: ("e"|"E") SIGNED_INT 40 | FLOAT: INT _EXP | DECIMAL _EXP? 41 | SIGNED_FLOAT: ["+"|"-"] FLOAT 42 | 43 | NUMBER: FLOAT | INT 44 | SIGNED_NUMBER: ["+"|"-"] NUMBER 45 | 46 | UNESCAPED_STRING: /\"[^"]*\"/ 47 | 48 | // based on `outlines/fsm/json_schema.py` 49 | _NON_CONTROL_CHAR: /([^"\\\x00-\x1F\x7F-\x9F])/ 50 | _ESCAPED_CHAR: /\\/ (_NON_CONTROL_CHAR | /\\/ | /"/) 51 | ESCAPED_STRING_INNER: _NON_CONTROL_CHAR | _ESCAPED_CHAR 52 | ESCAPED_STRING: /"/ ESCAPED_STRING_INNER* /"/ 53 | 54 | 55 | 56 | // 57 | // Names (Variables) 58 | // 59 | LCASE_LETTER: "a".."z" 60 | UCASE_LETTER: "A".."Z" 61 | 62 | LETTER: UCASE_LETTER | LCASE_LETTER 63 | WORD: LETTER+ 64 | 65 | CNAME: ("_"|LETTER) ("_"|LETTER|DIGIT)* 66 | 67 | 68 | // 69 | // Whitespace 70 | // 71 | WS_INLINE: (" "|/\t/)+ 72 | WS: /[ \t\f\r\n]/+ 73 | 74 | CR : /\r/ 75 | LF : /\n/ 76 | NEWLINE: (CR? LF)+ 77 | 78 | 79 | // Comments 80 | SH_COMMENT: /#[^\n]*/ 81 | CPP_COMMENT: /\/\/[^\n]*/ 82 | C_COMMENT: "/*" /(.|\n)*?/ "*/" 83 | SQL_COMMENT: /--[^\n]*/ 84 | -------------------------------------------------------------------------------- /outlines/grammars/json.lark: -------------------------------------------------------------------------------- 1 | ?start: value 2 | 3 | ?value: object 4 | | array 5 | | ESCAPED_STRING 6 | | SIGNED_NUMBER -> number 7 | | "true" -> true 8 | | "false" -> false 9 | | "null" -> null 10 | 11 | array : "[" [value ("," value)*] "]" 12 | object : "{" [pair ("," pair)*] "}" 13 | pair : ESCAPED_STRING ":" value 14 | 15 | %import common.ESCAPED_STRING 16 | %import common.SIGNED_NUMBER 17 | %import common.WS 18 | 19 | %ignore WS 20 | -------------------------------------------------------------------------------- /outlines/models/__init__.py: -------------------------------------------------------------------------------- 1 | """Module that contains all the models integrated in outlines. 2 | 3 | We group the models in submodules by provider instead of theme (completion, chat 4 | completion, diffusers, etc.) and use routing functions everywhere else in the 5 | codebase. 6 | 7 | """ 8 | 9 | from typing import Union 10 | 11 | from .exllamav2 import ExLlamaV2Model, exl2 12 | from .llamacpp import LlamaCpp, llamacpp 13 | from .mlxlm import MLXLM, mlxlm 14 | from .openai import OpenAI, azure_openai, openai 15 | from .transformers import Transformers, TransformerTokenizer, mamba, transformers 16 | from .transformers_vision import TransformersVision, transformers_vision 17 | from .vllm import VLLM, vllm 18 | 19 | LogitsGenerator = Union[Transformers, LlamaCpp, OpenAI, ExLlamaV2Model, MLXLM, VLLM] 20 | -------------------------------------------------------------------------------- /outlines/models/tokenizer.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Hashable, List, Protocol, Set, Tuple, Union 2 | 3 | import numpy as np 4 | from numpy.typing import NDArray 5 | 6 | 7 | class Tokenizer(Hashable, Protocol): 8 | eos_token: str 9 | eos_token_id: int 10 | pad_token_id: int 11 | vocabulary: Dict[str, int] 12 | special_tokens: Set[str] 13 | 14 | def encode( 15 | self, prompt: Union[str, List[str]] 16 | ) -> Tuple[NDArray[np.int64], NDArray[np.int64]]: 17 | """Translate the input prompts into arrays of token ids and attention mask.""" 18 | ... 19 | 20 | def decode(self, token_ids: NDArray[np.int64]) -> List[str]: 21 | """Translate an array of token ids to a string or list of strings.""" 22 | ... 23 | 24 | def convert_token_to_string(self, token: str) -> str: 25 | """Convert a token to its equivalent string. 26 | 27 | This is for instance useful for BPE tokenizers where whitespaces are 28 | represented by the special characted `Ġ`. This prevents matching a raw 29 | token that includes `Ġ` with a string. 30 | """ 31 | ... 32 | -------------------------------------------------------------------------------- /outlines/processors/__init__.py: -------------------------------------------------------------------------------- 1 | from .structured import ( 2 | CFGLogitsProcessor, 3 | GuideLogitsProcessor, 4 | JSONLogitsProcessor, 5 | OutlinesLogitsProcessor, 6 | RegexLogitsProcessor, 7 | ) 8 | -------------------------------------------------------------------------------- /outlines/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/outlines/py.typed -------------------------------------------------------------------------------- /outlines/serve/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/outlines/serve/__init__.py -------------------------------------------------------------------------------- /outlines/types/__init__.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | from . import airports, countries, locale 4 | from outlines.types.dsl import ( 5 | Regex, 6 | json_schema, 7 | regex, 8 | either, 9 | optional, 10 | exactly, 11 | at_least, 12 | at_most, 13 | between, 14 | one_or_more, 15 | zero_or_more, 16 | ) 17 | 18 | # Python types 19 | integer = Regex(r"[+-]?(0|[1-9][0-9]*)") 20 | boolean = Regex("(True|False)") 21 | number = Regex(rf"{integer.pattern}(\.[0-9]+)?([eE][+-][0-9]+)?") 22 | date = Regex(r"(\d{4})-(0[1-9]|1[0-2])-([0-2][0-9]|3[0-1])") 23 | time = Regex(r"([0-1][0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9])") 24 | datetime = Regex(rf"({date.pattern})(\s)({time.pattern})") 25 | 26 | # Basic regex types 27 | digit = Regex(r"\d") 28 | char = Regex(r"\w") 29 | newline = Regex(r"(\r\n|\r|\n)") # Matched new lines on Linux, Windows & MacOS 30 | whitespace = Regex(r"\s") 31 | 32 | # Document-specific types 33 | sentence = Regex(r"[A-Z].*\s*[.!?]") 34 | paragraph = Regex(rf"{sentence.pattern}(?:\s+{sentence.pattern})*\n+") 35 | 36 | 37 | # The following regex is FRC 5322 compliant and was found at: 38 | # https://emailregex.com/ 39 | email = Regex( 40 | r"""(?:[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*|"(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])*")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\[(?:(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9]))\.){3}(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9])|[a-z0-9-]*[a-z0-9]:(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x53-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])+)\])""" 41 | ) 42 | 43 | # Matches any ISBN number. Note that this is not completely correct as not all 44 | # 10 or 13 digits numbers are valid ISBNs. See https://en.wikipedia.org/wiki/ISBN 45 | # Taken from O'Reilly's Regular Expression Cookbook: 46 | # https://www.oreilly.com/library/view/regular-expressions-cookbook/9781449327453/ch04s13.html 47 | # 48 | # TODO: The check digit can only be computed by calling a function to compute it dynamically 49 | isbn = Regex( 50 | r"(?:ISBN(?:-1[03])?:? )?(?=[0-9X]{10}$|(?=(?:[0-9]+[- ]){3})[- 0-9X]{13}$|97[89][0-9]{10}$|(?=(?:[0-9]+[- ]){4})[- 0-9]{17}$)(?:97[89][- ]?)?[0-9]{1,5}[- ]?[0-9]+[- ]?[0-9]+[- ]?[0-9X]" 51 | ) 52 | -------------------------------------------------------------------------------- /outlines/types/airports.py: -------------------------------------------------------------------------------- 1 | """Generate valid airport codes.""" 2 | 3 | from enum import Enum 4 | 5 | import airportsdata 6 | 7 | AIRPORT_IATA_LIST = [ 8 | (v["iata"], v["iata"]) for v in airportsdata.load().values() if v["iata"] 9 | ] 10 | IATA = Enum("Airport", AIRPORT_IATA_LIST) # type:ignore 11 | -------------------------------------------------------------------------------- /outlines/types/countries.py: -------------------------------------------------------------------------------- 1 | """Generate valid country codes and names.""" 2 | 3 | from enum import Enum 4 | 5 | from iso3166 import countries 6 | 7 | 8 | def get_country_flags(): 9 | """Generate Unicode flags for all ISO 3166-1 alpha-2 country codes in Alpha2 Enum.""" 10 | base = ord("🇦") 11 | return { 12 | code.name: chr(base + ord(code.name[0]) - ord("A")) 13 | + chr(base + ord(code.name[1]) - ord("A")) 14 | for code in Alpha2 15 | } 16 | 17 | 18 | ALPHA_2_CODE = [(country.alpha2, country.alpha2) for country in countries] 19 | Alpha2 = Enum("Alpha_2", ALPHA_2_CODE) # type:ignore 20 | 21 | ALPHA_3_CODE = [(country.alpha3, country.alpha3) for country in countries] 22 | Alpha3 = Enum("Alpha_3", ALPHA_3_CODE) # type:ignore 23 | 24 | NUMERIC_CODE = [(str(country.numeric), str(country.numeric)) for country in countries] 25 | Numeric = Enum("Numeric_code", NUMERIC_CODE) # type:ignore 26 | 27 | NAME = [(country.name, country.name) for country in countries] 28 | Name = Enum("Name", NAME) # type:ignore 29 | 30 | flag_mapping = get_country_flags() 31 | FLAG = [(flag, flag) for code, flag in flag_mapping.items()] 32 | Flag = Enum("Flag", FLAG) # type:ignore 33 | -------------------------------------------------------------------------------- /outlines/types/locale/__init__.py: -------------------------------------------------------------------------------- 1 | from . import us 2 | -------------------------------------------------------------------------------- /outlines/types/locale/us.py: -------------------------------------------------------------------------------- 1 | from outlines.types.dsl import Regex 2 | 3 | zip_code = Regex(r"\d{5}(?:-\d{4})?") 4 | phone_number = Regex(r"(\([0-9]{3}\) |[0-9]{3}-)[0-9]{3}-[0-9]{4}") 5 | -------------------------------------------------------------------------------- /requirements-doc.txt: -------------------------------------------------------------------------------- 1 | mkdocs 2 | mkdocs-material 3 | mkdocs-material[imaging] 4 | mkdocs-mermaid2-plugin 5 | mkdocs-section-index 6 | mkdocstrings[python] 7 | mkdocs-git-committers-plugin-2 8 | mkdocs-git-revision-date-localized-plugin 9 | mike 10 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 88 3 | select = C,E,F,W 4 | ignore = E203,E231,E501,E741,W503,W504,C901,E731 5 | per-file-ignores = 6 | **/__init__.py:F401,F403 7 | exclude = 8 | normalai/_version.py 9 | -------------------------------------------------------------------------------- /shell.nix: -------------------------------------------------------------------------------- 1 | { pkgs ? import { config = { allowUnfree = true; }; } }: 2 | 3 | (pkgs.buildFHSEnv { 4 | name = "dottxt-ai"; 5 | targetPkgs = pkgs: 6 | with pkgs; [ 7 | autoconf 8 | binutils 9 | cmake 10 | cudatoolkit 11 | curl 12 | freeglut 13 | gcc13 14 | git 15 | gitRepo 16 | gnumake 17 | gnupg 18 | gperf 19 | libGL 20 | libGLU 21 | linuxPackages.nvidia_x11 22 | m4 23 | ncurses5 24 | procps 25 | python311 26 | stdenv.cc 27 | unzip 28 | util-linux 29 | uv 30 | xorg.libX11 31 | xorg.libXext 32 | xorg.libXi 33 | xorg.libXmu 34 | xorg.libXrandr 35 | xorg.libXv 36 | zlib 37 | ]; 38 | 39 | multiPkgs = pkgs: with pkgs; [ zlib ]; 40 | 41 | runScript = "bash"; 42 | 43 | profile = '' 44 | # CUDA paths 45 | export CUDA_HOME=${pkgs.cudatoolkit} 46 | export CUDA_PATH=${pkgs.cudatoolkit} 47 | 48 | # Ensure proper binary paths are included 49 | export PATH=${pkgs.gcc13}/bin:${pkgs.cudatoolkit}/bin:$PATH 50 | 51 | # Set library paths, including additional directories for CUPTI 52 | export LD_LIBRARY_PATH=${pkgs.cudatoolkit}/lib64:${pkgs.cudatoolkit}/extras/CUPTI/lib64:${pkgs.linuxPackages.nvidia_x11}/lib:$LD_LIBRARY_PATH 53 | 54 | # Add static library paths to EXTRA_LDFLAGS for the linker 55 | export EXTRA_LDFLAGS="-L${pkgs.cudatoolkit}/lib64 -L${pkgs.cudatoolkit}/extras/CUPTI/lib64 -L${pkgs.linuxPackages.nvidia_x11}/lib -L${pkgs.cudatoolkit}/libdevice $EXTRA_LDFLAGS" 56 | export EXTRA_CCFLAGS="-I${pkgs.cudatoolkit}/include $EXTRA_CCFLAGS" 57 | 58 | # Set CMake paths 59 | export CMAKE_PREFIX_PATH=${pkgs.cudatoolkit}:${pkgs.linuxPackages.nvidia_x11}:$CMAKE_PREFIX_PATH 60 | 61 | # C++ and CC flags 62 | export CXXFLAGS="--std=c++17 $EXTRA_CCFLAGS" 63 | export CC=${pkgs.gcc13}/bin/gcc 64 | export CXX=${pkgs.gcc13}/bin/g++ 65 | 66 | # NVCC flags to use the right compiler 67 | export NVCC_FLAGS="-ccbin ${pkgs.gcc13}/bin/gcc" 68 | ''; 69 | 70 | structuredAttrs__ = { 71 | stdenv = pkgs.stdenv.overrideCC pkgs.stdenv.cc pkgs.gcc13; 72 | }; 73 | }).env 74 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/tests/__init__.py -------------------------------------------------------------------------------- /tests/cfg_samples/arithmetic/lots_of_ops.arithmetic.test: -------------------------------------------------------------------------------- 1 | 5+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1 2 | -------------------------------------------------------------------------------- /tests/cfg_samples/arithmetic/simple_math.arithmetic.test: -------------------------------------------------------------------------------- 1 | (1 * 2) - (0.1 * 2 * 9.42) 2 | -------------------------------------------------------------------------------- /tests/cfg_samples/json/simple_fruit.json.test: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "ID": "1", 4 | "Name": "Andrew \"The Escaper\" Lapp", 5 | "Age": "30", 6 | "FavFruit": "Banana" 7 | }, 8 | { 9 | "ID": "2", 10 | "Name": "Mohammad", 11 | "Age": "40", 12 | "FavFruit": "\"Any Fruit As Long as It's In Quotes!\"" 13 | }, 14 | { 15 | "ID": "3", 16 | "Name": "Alice", 17 | "Age": "61", 18 | "FavFruit": "Peaches, but only \n newline separated peaches" 19 | } 20 | ] 21 | -------------------------------------------------------------------------------- /tests/cfg_samples/json/simple_fruit_no_indent.json.test: -------------------------------------------------------------------------------- 1 | [{"ID": "1", "Name": "Andrew", "Age": "30", "FavFruit": "Banana"}, {"ID": "2", "Name": "Mohammad", "Age": "40", "FavFruit": "Apple"}, {"ID": "3", "Name": "Alice", "Age": "61", "FavFruit": "Peach"}] 2 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import pytest 4 | 5 | 6 | def pytest_collection_modifyitems(config, items): 7 | if sys.platform != "linux": 8 | if not config.option.keyword or ( 9 | config.option.keyword and "test_integration_vllm" in config.option.keyword 10 | ): 11 | print( 12 | "WARNING: test_integration_vllm tests are skipped because vLLM only supports Linux platform (including WSL)." 13 | ) 14 | skip_vllm = pytest.mark.skip(reason="vLLM models can only be run on Linux.") 15 | for item in items: 16 | if "test_integration_vllm" in item.nodeid: 17 | item.add_marker(skip_vllm) 18 | -------------------------------------------------------------------------------- /tests/fsm/test_json_schema.py: -------------------------------------------------------------------------------- 1 | import json 2 | from contextlib import nullcontext 3 | from enum import Enum 4 | from functools import partial 5 | from typing import List 6 | 7 | import pytest 8 | from outlines_core.fsm.json_schema import build_regex_from_schema 9 | from pydantic import BaseModel, constr 10 | 11 | from outlines.fsm.json_schema import get_schema_from_enum, get_schema_from_signature 12 | 13 | 14 | def test_function_basic(): 15 | def test_function(foo: str, bar: List[int]): 16 | pass 17 | 18 | result = get_schema_from_signature(test_function) 19 | assert result["type"] == "object" 20 | assert list(result["properties"].keys()) == ["foo", "bar"] 21 | assert result["properties"]["foo"]["type"] == "string" 22 | assert result["properties"]["bar"]["type"] == "array" 23 | assert result["properties"]["bar"]["items"]["type"] == "integer" 24 | 25 | 26 | def test_function_no_type(): 27 | def test_function(foo, bar: List[int]): 28 | pass 29 | 30 | with pytest.raises(ValueError): 31 | get_schema_from_signature(test_function) 32 | 33 | 34 | def test_from_pydantic(): 35 | class User(BaseModel): 36 | user_id: int 37 | name: str 38 | maxlength_name: constr(max_length=10) 39 | minlength_name: constr(min_length=10) 40 | value: float 41 | is_true: bool 42 | 43 | schema = json.dumps(User.model_json_schema()) 44 | regex_str = build_regex_from_schema(schema) 45 | assert isinstance(regex_str, str) 46 | 47 | 48 | def add(a: float, b: float) -> float: 49 | return a + b 50 | 51 | 52 | class MyEnum(Enum): 53 | add = partial(add) 54 | a = "a" 55 | b = 2 56 | 57 | 58 | # if you don't register your function as callable, you will get an empty enum 59 | class EmptyEnum(Enum): 60 | add = add 61 | 62 | 63 | @pytest.mark.parametrize( 64 | "enum,expectation", 65 | [ 66 | (MyEnum, nullcontext()), 67 | (EmptyEnum, pytest.raises(ValueError)), 68 | ], 69 | ) 70 | def test_enum_schema(enum, expectation): 71 | with expectation: 72 | schema = get_schema_from_enum(enum) 73 | regex_str = build_regex_from_schema(json.dumps(schema)) 74 | assert isinstance(regex_str, str) 75 | assert schema["title"] == enum.__name__ 76 | assert len(schema["oneOf"]) == len(enum) 77 | for elt in schema["oneOf"]: 78 | assert type(elt) in [int, float, bool, type(None), str, dict] 79 | -------------------------------------------------------------------------------- /tests/fsm/test_types.py: -------------------------------------------------------------------------------- 1 | import datetime as pydatetime 2 | 3 | import pytest 4 | 5 | from outlines.fsm.types import python_types_to_regex 6 | from outlines import types 7 | 8 | 9 | @pytest.mark.parametrize( 10 | "python_type,custom_type", 11 | [ 12 | (int, types.integer), 13 | (float, types.number), 14 | (bool, types.boolean), 15 | (pydatetime.date, types.date), 16 | (pydatetime.time, types.time), 17 | (pydatetime.datetime, types.datetime), 18 | ], 19 | ) 20 | def test_python_types(python_type, custom_type): 21 | test_regex, _ = python_types_to_regex(python_type) 22 | assert custom_type.pattern == test_regex.pattern 23 | -------------------------------------------------------------------------------- /tests/generate/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/tests/generate/__init__.py -------------------------------------------------------------------------------- /tests/generate/conftest.py: -------------------------------------------------------------------------------- 1 | from importlib import reload 2 | 3 | import pytest 4 | import torch 5 | 6 | 7 | def is_metal_available(): 8 | try: 9 | import mlx.core as mx 10 | import mlx_lm # noqa: F401 11 | 12 | assert mx.metal.is_available() 13 | except (ImportError, AssertionError): 14 | return False 15 | return True 16 | 17 | 18 | def pytest_collection_modifyitems(config, items): 19 | """ 20 | If mlxlm and Metal aren't available, skip mlxlm tests 21 | If CUDA isn't available, skip vllm and transformers_vision 22 | """ 23 | if not torch.cuda.is_available(): 24 | skip_marker = pytest.mark.skip( 25 | reason="Skipping test because CUDA is not available" 26 | ) 27 | for item in items: 28 | if "model_fixture" in item.fixturenames: 29 | model_param = item.callspec.params.get("model_fixture", None) 30 | if ( 31 | model_param.startswith("model_transformers_vision") 32 | or model_param.startswith("model_vllm") 33 | or model_param.startswith("model_exllamav2") 34 | ): 35 | item.add_marker(skip_marker) 36 | 37 | if not is_metal_available(): 38 | skip_marker = pytest.mark.skip( 39 | reason="Skipping test because mlx-lm or Metal are not available" 40 | ) 41 | for item in items: 42 | if "model_fixture" in item.fixturenames: 43 | model_param = item.callspec.params.get("model_fixture", None) 44 | if model_param.startswith("model_mlxlm"): 45 | item.add_marker(skip_marker) 46 | 47 | 48 | @pytest.fixture 49 | def temp_cache_dir(): 50 | import os 51 | import tempfile 52 | 53 | import outlines.caching 54 | import outlines.fsm.guide 55 | 56 | with tempfile.TemporaryDirectory() as tempdir: 57 | os.environ["OUTLINES_CACHE_DIR"] = tempdir 58 | outlines.caching.get_cache.cache_clear() 59 | reload(outlines) 60 | reload(outlines.fsm.guide) 61 | cache_status = outlines.caching._caching_enabled 62 | try: 63 | outlines.caching._caching_enabled = True 64 | yield 65 | finally: 66 | outlines.caching._caching_enabled = cache_status 67 | -------------------------------------------------------------------------------- /tests/generate/test_api.py: -------------------------------------------------------------------------------- 1 | from io import BytesIO 2 | from urllib.request import urlopen 3 | 4 | import pytest 5 | from PIL import Image # type: ignore 6 | 7 | from outlines.generate.api import VisionSequenceGeneratorAdapter 8 | 9 | IMG_URI = "https://upload.wikimedia.org/wikipedia/en/a/a9/Example.jpg" 10 | PIL_IMG = Image.open(BytesIO(urlopen(IMG_URI).read())).convert("RGB") 11 | 12 | 13 | @pytest.mark.parametrize( 14 | "prompts,media,type_error", 15 | [ 16 | ("single prompt", [PIL_IMG], False), 17 | (["prompt0", "prompt1"], [[PIL_IMG], [PIL_IMG]], False), 18 | ("single prompt", [PIL_IMG, PIL_IMG], False), 19 | (["prompt0", "prompt1"], [[PIL_IMG, PIL_IMG], [PIL_IMG]], False), 20 | ("single prompt", "this isn't an image, it's a string", True), 21 | ("single prompt", PIL_IMG, True), 22 | (["prompt0", "prompt1"], [PIL_IMG], True), 23 | (["prompt0", "prompt1"], [[PIL_IMG]], True), 24 | (["prompt0", "prompt1"], [[[PIL_IMG]], [[PIL_IMG]]], True), 25 | ], 26 | ) 27 | def test_vision_sequence_generator_validate_types(prompts, media, type_error): 28 | """Ensure inputs are validated correctly""" 29 | if type_error: 30 | with pytest.raises(TypeError): 31 | VisionSequenceGeneratorAdapter._validate_prompt_media_types(prompts, media) 32 | else: 33 | VisionSequenceGeneratorAdapter._validate_prompt_media_types(prompts, media) 34 | -------------------------------------------------------------------------------- /tests/generate/test_integration_transformers_vision.py: -------------------------------------------------------------------------------- 1 | from io import BytesIO 2 | from urllib.request import urlopen 3 | 4 | import pytest 5 | from PIL import Image 6 | from transformers import AutoProcessor, LlavaForConditionalGeneration 7 | 8 | import outlines 9 | from outlines.models.transformers_vision import transformers_vision 10 | 11 | IMAGE_URLS = [ 12 | "https://upload.wikimedia.org/wikipedia/commons/2/25/Siam_lilacpoint.jpg", 13 | "https://upload.wikimedia.org/wikipedia/commons/7/71/2010-kodiak-bear-1.jpg", 14 | "https://upload.wikimedia.org/wikipedia/commons/b/be/Tamias-rufus-001.jpg", 15 | ] 16 | 17 | 18 | def img_from_url(url): 19 | img_byte_stream = BytesIO(urlopen(url).read()) 20 | return Image.open(img_byte_stream).convert("RGB") 21 | 22 | 23 | @pytest.fixture(scope="session") 24 | def model(tmp_path_factory): 25 | return transformers_vision( 26 | "trl-internal-testing/tiny-LlavaForConditionalGeneration", 27 | model_class=LlavaForConditionalGeneration, 28 | device="cpu", 29 | ) 30 | 31 | 32 | @pytest.fixture(scope="session") 33 | def processor(tmp_path_factory): 34 | return AutoProcessor.from_pretrained("llava-hf/llava-interleave-qwen-0.5b-hf") 35 | 36 | 37 | def test_single_image_text_gen(model, processor): 38 | conversation = [ 39 | { 40 | "role": "user", 41 | "content": [{"type": "text", "text": "What is this?"}, {"type": "image"}], 42 | }, 43 | ] 44 | generator = outlines.generate.text(model) 45 | sequence = generator( 46 | processor.apply_chat_template(conversation), 47 | [img_from_url(IMAGE_URLS[0])], 48 | seed=10000, 49 | max_tokens=10, 50 | ) 51 | assert isinstance(sequence, str) 52 | 53 | 54 | def test_multi_image_text_gen(model, processor): 55 | """If the length of image tags and number of images we pass are > 1 and equal, 56 | we should yield a successful generation. 57 | """ 58 | conversation = [ 59 | { 60 | "role": "user", 61 | "content": [ 62 | {"type": "text", "text": "What do all these have in common?"}, 63 | ] 64 | + [{"type": "image"} for _ in range(len(IMAGE_URLS))], 65 | }, 66 | ] 67 | generator = outlines.generate.text(model) 68 | sequence = generator( 69 | processor.apply_chat_template(conversation), 70 | [img_from_url(i) for i in IMAGE_URLS], 71 | seed=10000, 72 | max_tokens=10, 73 | ) 74 | assert isinstance(sequence, str) 75 | 76 | 77 | def test_mismatched_image_text_gen(model, processor): 78 | """If the length of image tags and number of images we pass are unequal, 79 | we should raise an error. 80 | """ 81 | conversation = [ 82 | { 83 | "role": "user", 84 | "content": [ 85 | {"type": "text", "text": "I'm passing 3 images, but only 1 image tag"}, 86 | {"type": "image"}, 87 | ], 88 | }, 89 | ] 90 | generator = outlines.generate.text(model) 91 | with pytest.raises(ValueError): 92 | _ = generator( 93 | processor.apply_chat_template(conversation), 94 | [img_from_url(i) for i in IMAGE_URLS], 95 | seed=10000, 96 | max_tokens=10, 97 | ) 98 | 99 | 100 | def test_single_image_choice(model, processor): 101 | conversation = [ 102 | { 103 | "role": "user", 104 | "content": [{"type": "text", "text": "What is this?"}, {"type": "image"}], 105 | }, 106 | ] 107 | choices = ["cat", "dog"] 108 | generator = outlines.generate.choice(model, choices) 109 | sequence = generator( 110 | processor.apply_chat_template(conversation), 111 | [img_from_url(IMAGE_URLS[0])], 112 | seed=10000, 113 | max_tokens=10, 114 | ) 115 | assert isinstance(sequence, str) 116 | assert sequence in choices 117 | -------------------------------------------------------------------------------- /tests/models/test_mlxlm.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from outlines.models.mlxlm import mlxlm 4 | from outlines.models.transformers import TransformerTokenizer 5 | 6 | try: 7 | import mlx.core as mx 8 | 9 | HAS_MLX = mx.metal.is_available() 10 | except ImportError: 11 | HAS_MLX = False 12 | 13 | 14 | TEST_MODEL = "mlx-community/SmolLM-135M-Instruct-4bit" 15 | 16 | 17 | @pytest.mark.skipif(not HAS_MLX, reason="MLX tests require Apple Silicon") 18 | def test_mlxlm_model(): 19 | model = mlxlm(TEST_MODEL) 20 | assert hasattr(model, "model") 21 | assert hasattr(model, "tokenizer") 22 | assert isinstance(model.tokenizer, TransformerTokenizer) 23 | 24 | 25 | @pytest.mark.skipif(not HAS_MLX, reason="MLX tests require Apple Silicon") 26 | def test_mlxlm_tokenizer(): 27 | model = mlxlm(TEST_MODEL) 28 | 29 | # Test single string encoding/decoding 30 | test_text = "Hello, world!" 31 | token_ids = mx.array(model.mlx_tokenizer.encode(test_text)) 32 | assert isinstance(token_ids, mx.array) 33 | 34 | 35 | @pytest.mark.skipif(not HAS_MLX, reason="MLX tests require Apple Silicon") 36 | def test_mlxlm_generate(): 37 | from outlines.generate.api import GenerationParameters, SamplingParameters 38 | 39 | model = mlxlm(TEST_MODEL) 40 | prompt = "Write a haiku about programming:" 41 | 42 | # Test with basic generation parameters 43 | gen_params = GenerationParameters(max_tokens=50, stop_at=None, seed=None) 44 | 45 | # Test with different sampling parameters 46 | sampling_params = SamplingParameters( 47 | sampler="multinomial", num_samples=1, top_p=0.9, top_k=None, temperature=0.7 48 | ) 49 | 50 | # Test generation 51 | output = model.generate(prompt, gen_params, None, sampling_params) 52 | assert isinstance(output, str) 53 | assert len(output) > 0 54 | 55 | 56 | @pytest.mark.skipif(not HAS_MLX, reason="MLX tests require Apple Silicon") 57 | def test_mlxlm_stream(): 58 | from outlines.generate.api import GenerationParameters, SamplingParameters 59 | 60 | model = mlxlm(TEST_MODEL) 61 | prompt = "Count from 1 to 5:" 62 | 63 | gen_params = GenerationParameters(max_tokens=20, stop_at=None, seed=None) 64 | 65 | sampling_params = SamplingParameters( 66 | sampler="greedy", # Use greedy sampling for deterministic output 67 | num_samples=1, 68 | top_p=None, 69 | top_k=None, 70 | temperature=0.0, 71 | ) 72 | 73 | # Test streaming 74 | stream = model.stream(prompt, gen_params, None, sampling_params) 75 | tokens = list(stream) 76 | assert len(tokens) > 0 77 | assert all(isinstance(token, str) for token in tokens) 78 | 79 | # Test that concatenated streaming output matches generate output 80 | streamed_text = "".join(tokens) 81 | generated_text = model.generate(prompt, gen_params, None, sampling_params) 82 | assert streamed_text == generated_text 83 | 84 | 85 | @pytest.mark.skipif(not HAS_MLX, reason="MLX tests require Apple Silicon") 86 | def test_mlxlm_errors(): 87 | model = mlxlm(TEST_MODEL) 88 | 89 | # Test batch inference (should raise NotImplementedError) 90 | with pytest.raises(NotImplementedError): 91 | from outlines.generate.api import GenerationParameters, SamplingParameters 92 | 93 | gen_params = GenerationParameters(max_tokens=10, stop_at=None, seed=None) 94 | sampling_params = SamplingParameters("multinomial", 1, None, None, 1.0) 95 | model.generate(["prompt1", "prompt2"], gen_params, None, sampling_params) 96 | 97 | # Test beam search (should raise NotImplementedError) 98 | with pytest.raises(NotImplementedError): 99 | sampling_params = SamplingParameters("beam_search", 1, None, None, 1.0) 100 | model.generate("test prompt", gen_params, None, sampling_params) 101 | -------------------------------------------------------------------------------- /tests/models/test_tokenizer.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from outlines.models.tokenizer import Tokenizer 4 | 5 | 6 | def test_tokenizer(): 7 | with pytest.raises(TypeError, match="instantiate abstract"): 8 | Tokenizer() 9 | -------------------------------------------------------------------------------- /tests/processors/test_base_processor.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import jax.numpy as jnp 4 | import numpy as np 5 | import pytest 6 | import torch 7 | 8 | from outlines.processors.base_logits_processor import OutlinesLogitsProcessor 9 | 10 | arrays = { 11 | "list": [[1.0, 2.0], [3.0, 4.0]], 12 | "np": np.array([[1, 2], [3, 4]], dtype=np.float32), 13 | "jax": jnp.array([[1, 2], [3, 4]], dtype=jnp.float32), 14 | "torch": torch.tensor([[1, 2], [3, 4]], dtype=torch.float32), 15 | } 16 | 17 | try: 18 | import mlx.core as mx 19 | 20 | arrays["mlx"] = mx.array([[1, 2], [3, 4]], dtype=mx.float32) 21 | arrays["mlx_bfloat16"] = mx.array([[1, 2], [3, 4]], dtype=mx.bfloat16) 22 | except ImportError: 23 | pass 24 | 25 | try: 26 | import jax.numpy as jnp 27 | 28 | arrays["jax"] = jnp.array([[1, 2], [3, 4]], dtype=jnp.float32) 29 | except ImportError: 30 | pass 31 | 32 | 33 | # Mock implementation of the abstract class for testing 34 | class MockLogitsProcessor(OutlinesLogitsProcessor): 35 | def process_logits( 36 | self, input_ids: List[List[int]], logits: torch.Tensor 37 | ) -> torch.Tensor: 38 | # For testing purposes, let's just return logits multiplied by 2 39 | return logits * 2 40 | 41 | 42 | @pytest.fixture 43 | def processor(): 44 | """Fixture for creating an instance of the MockLogitsProcessor.""" 45 | return MockLogitsProcessor() 46 | 47 | 48 | @pytest.mark.parametrize("array_type", arrays.keys()) 49 | def test_to_torch(array_type, processor): 50 | data = arrays[array_type] 51 | torch_tensor = processor._to_torch(data) 52 | assert isinstance(torch_tensor, torch.Tensor) 53 | assert torch.allclose( 54 | torch_tensor.cpu(), torch.tensor([[1, 2], [3, 4]], dtype=torch.float32) 55 | ) 56 | 57 | 58 | @pytest.mark.parametrize("array_type", arrays.keys()) 59 | def test_from_torch(array_type, processor): 60 | torch_tensor = torch.tensor([[1, 2], [3, 4]], dtype=torch.float32) 61 | data = processor._from_torch(torch_tensor, type(arrays[array_type])) 62 | assert isinstance(data, type(arrays[array_type])) 63 | if array_type == "mlx_bfloat16": 64 | # For bfloat16, we expect the output to be float32 due to the conversion 65 | assert data.dtype == mx.float32 66 | assert np.allclose(np.array(data), np.array([[1, 2], [3, 4]], dtype=np.float32)) 67 | else: 68 | assert np.allclose(data, arrays[array_type]) 69 | 70 | 71 | @pytest.mark.parametrize("array_type", arrays.keys()) 72 | def test_call(array_type, processor): 73 | input_ids = arrays[array_type] 74 | logits = arrays[array_type] 75 | processed_logits = processor(input_ids, logits) 76 | 77 | assert isinstance(processed_logits, type(arrays[array_type])) 78 | assert np.allclose( 79 | np.array(processed_logits), np.array([[2.0, 4.0], [6.0, 8.0]], dtype=np.float32) 80 | ) 81 | -------------------------------------------------------------------------------- /tests/test_function.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import responses 3 | from pydantic import BaseModel 4 | from requests.exceptions import HTTPError 5 | 6 | import outlines 7 | from outlines.function import Function, download_from_github, extract_function_from_file 8 | 9 | 10 | def test_function_basic(): 11 | with pytest.deprecated_call(match="The @prompt decorator"): 12 | 13 | @outlines.prompt 14 | def test_template(text: str): 15 | """{{ text }}""" 16 | 17 | class Foo(BaseModel): 18 | id: int 19 | 20 | fn = Function(test_template, Foo, "hf-internal-testing/tiny-random-GPTJForCausalLM") 21 | 22 | assert fn.generator is None 23 | 24 | result = fn("test") 25 | assert isinstance(result, BaseModel) 26 | 27 | 28 | def test_download_from_github_invalid(): 29 | with pytest.raises(ValueError, match="Please provide"): 30 | download_from_github("outlines/program") 31 | 32 | with pytest.raises(ValueError, match="Do not append"): 33 | download_from_github("dottxt-ai/outlines/program.py") 34 | 35 | 36 | @responses.activate 37 | def test_download_from_github_success(): 38 | responses.add( 39 | responses.GET, 40 | "https://raw.githubusercontent.com/dottxt-ai/outlines/main/program.py", 41 | body="import outlines\n", 42 | status=200, 43 | ) 44 | 45 | file = download_from_github("dottxt-ai/outlines/program") 46 | assert file == "import outlines\n" 47 | 48 | responses.add( 49 | responses.GET, 50 | "https://raw.githubusercontent.com/dottxt-ai/outlines/main/foo/bar/program.py", 51 | body="import outlines\n", 52 | status=200, 53 | ) 54 | 55 | file = download_from_github("dottxt-ai/outlines/foo/bar/program") 56 | assert file == "import outlines\n" 57 | 58 | 59 | @responses.activate 60 | def test_download_from_github_error(): 61 | responses.add( 62 | responses.GET, 63 | "https://raw.githubusercontent.com/foo/bar/main/program.py", 64 | json={"error": "not found"}, 65 | status=404, 66 | ) 67 | 68 | with pytest.raises(ValueError, match="Program could not be found at"): 69 | download_from_github("foo/bar/program") 70 | 71 | responses.add( 72 | responses.GET, 73 | "https://raw.githubusercontent.com/foo/bar/main/program.py", 74 | json={"error": "Internal Server Error"}, 75 | status=500, 76 | ) 77 | 78 | with pytest.raises(HTTPError, match="500 Server Error"): 79 | download_from_github("foo/bar/program") 80 | 81 | 82 | def test_extract_function_from_file(): 83 | content = """ 84 | import outlines 85 | from pydantic import BaseModel 86 | 87 | model = "gpt2" 88 | 89 | 90 | @outlines.prompt 91 | def prompt(): 92 | '''Hello''' 93 | 94 | 95 | class User(BaseModel): 96 | id: int 97 | name: str 98 | 99 | 100 | function = outlines.Function( 101 | prompt, 102 | User, 103 | "gpt2", 104 | ) 105 | """ 106 | 107 | with pytest.deprecated_call(match="The @prompt decorator"): 108 | fn = extract_function_from_file(content, "function") 109 | assert ( 110 | str(type(fn)) == "" 111 | ) # because imported via `exec` 112 | 113 | 114 | def test_extract_function_from_file_no_function(): 115 | content = """ 116 | import outlines 117 | from pydantic import BaseModel 118 | 119 | @outlines.prompt 120 | def prompt(): 121 | '''Hello''' 122 | 123 | 124 | class User(BaseModel): 125 | id: int 126 | name: str 127 | 128 | program = outlines.Function( 129 | prompt, 130 | User, 131 | "gpt2", 132 | ) 133 | """ 134 | 135 | with pytest.deprecated_call(match="The @prompt decorator"): 136 | with pytest.raises(AttributeError, match="Could not find"): 137 | extract_function_from_file(content, "function") 138 | -------------------------------------------------------------------------------- /tests/test_grammars.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import outlines.grammars as grammars 4 | from outlines.fsm.guide import CFGGuide 5 | 6 | 7 | @pytest.mark.parametrize("grammar", [grammars.json, grammars.arithmetic]) 8 | def test_grammar_module(grammar): 9 | class MockTokenizer: 10 | vocabulary = {"(": 1, ")": 2, "a": 3, "eos": 4} 11 | special_tokens = {"eos"} 12 | eos_token = "eos" 13 | eos_token_id = 4 14 | 15 | def convert_token_to_string(self, token): 16 | return token 17 | 18 | @property 19 | def inverse_vocabulary(self): 20 | return {v: k for k, v in self.vocabulary.items()} 21 | 22 | def decode(self, token_ids): 23 | return [self.inverse_vocabulary[t] for t in token_ids] 24 | 25 | cfg_str = """ 26 | start: s 27 | s: "(" s ")" | /a+/ 28 | """ 29 | tokenizer = MockTokenizer() 30 | fsm = CFGGuide(cfg_str, tokenizer) 31 | assert isinstance(fsm, CFGGuide) 32 | -------------------------------------------------------------------------------- /tests/types/test_to_regex.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | 4 | from outlines.types.dsl import ( 5 | String, 6 | Regex, 7 | JsonSchema, 8 | KleeneStar, 9 | KleenePlus, 10 | QuantifyBetween, 11 | QuantifyExact, 12 | QuantifyMaximum, 13 | QuantifyMinimum, 14 | Sequence, 15 | Alternatives, 16 | Optional, 17 | Term, 18 | to_regex, 19 | ) 20 | 21 | 22 | def test_to_regex_simple(): 23 | a = String("a") 24 | assert to_regex(a) == "a" 25 | assert a.matches("a") is True 26 | 27 | a = Regex("[0-9]") 28 | assert to_regex(a) == "([0-9])" 29 | assert a.matches(0) is True 30 | assert a.matches(10) is False 31 | assert a.matches("a") is False 32 | 33 | a = JsonSchema({"type": "integer"}) 34 | assert to_regex(a) == r"((-)?(0|[1-9][0-9]*))" 35 | assert a.matches(1) is True 36 | assert a.matches("1") is True 37 | assert a.matches("a") is False 38 | 39 | a = Optional(String("a")) 40 | assert to_regex(a) == "(a)?" 41 | assert a.matches("") is True 42 | assert a.matches("a") is True 43 | 44 | a = KleeneStar(String("a")) 45 | assert to_regex(a) == "(a)*" 46 | assert a.matches("") is True 47 | assert a.matches("a") is True 48 | assert a.matches("aaaaa") is True 49 | 50 | a = KleenePlus(String("a")) 51 | assert to_regex(a) == "(a)+" 52 | assert a.matches("") is False 53 | assert a.matches("a") is True 54 | assert a.matches("aaaaa") is True 55 | 56 | a = QuantifyExact(String("a"), 2) 57 | assert to_regex(a) == "(a){2}" 58 | assert a.matches("a") is False 59 | assert a.matches("aa") is True 60 | assert a.matches("aaa") is False 61 | 62 | a = QuantifyMinimum(String("a"), 2) 63 | assert to_regex(a) == "(a){2,}" 64 | assert a.matches("a") is False 65 | assert a.matches("aa") is True 66 | assert a.matches("aaa") is True 67 | 68 | a = QuantifyMaximum(String("a"), 2) 69 | assert to_regex(a) == "(a){,2}" 70 | assert a.matches("aa") is True 71 | assert a.matches("aaa") is False 72 | 73 | a = QuantifyBetween(String("a"), 1, 2) 74 | assert to_regex(a) == "(a){1,2}" 75 | assert a.matches("") is False 76 | assert a.matches("a") is True 77 | assert a.matches("aa") is True 78 | assert a.matches("aaa") is False 79 | 80 | with pytest.raises(TypeError, match="Cannot convert"): 81 | to_regex(Term()) 82 | 83 | 84 | def test_to_regex_combinations(): 85 | a = Sequence([Regex("dog|cat"), String("fish")]) 86 | assert to_regex(a) == "(dog|cat)fish" 87 | --------------------------------------------------------------------------------