├── .devcontainer
    └── devcontainer.json
├── .editorconfig
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.yml
    │   └── config.yml
    ├── PULL_REQUEST_TEMPLATE
    │   └── pull_request_template.md
    ├── scripts
    │   └── build_sdist_and_wheel.sh
    └── workflows
    │   ├── asv_benchmark_pr.yml
    │   ├── build_documentation.yml
    │   ├── publish_documentation.yml
    │   ├── release_docker.yml
    │   ├── release_pypi.yaml
    │   └── tests.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .pydocstyle
├── .readthedocs.yaml
├── .vscode
    └── settings.json
├── Dockerfile
├── LICENSE
├── README.md
├── benchmarks
    ├── __init__.py
    ├── asv.conf.json
    ├── bench_cfg_guide.py
    ├── bench_json_schema.py
    ├── bench_processors.py
    ├── bench_regex_guide.py
    └── common.py
├── docs
    ├── api
    │   ├── guide.md
    │   ├── index.md
    │   ├── json_schema.md
    │   ├── models.md
    │   ├── parsing.md
    │   ├── regex.md
    │   ├── samplers.md
    │   └── templates.md
    ├── assets
    │   └── images
    │   │   ├── dottxt-dark.svg
    │   │   ├── dottxt-light.svg
    │   │   ├── dottxt.png
    │   │   ├── logits_processing_diagram.svg
    │   │   ├── logo-dark-mode.png
    │   │   ├── logo-dark-mode.svg
    │   │   ├── logo-light-mode.png
    │   │   ├── logo-light-mode.svg
    │   │   ├── logo-simple.png
    │   │   ├── logo-square.png
    │   │   ├── logo-square.svg
    │   │   └── normal_computing.jpg
    ├── blog
    │   ├── .authors.yml
    │   ├── assets
    │   │   └── 4000_stars.png
    │   ├── index.md
    │   └── posts
    │   │   └── roadmap-2024.md
    ├── community
    │   ├── belonging.png
    │   ├── contribute.md
    │   ├── examples.md
    │   ├── feedback.md
    │   ├── index.md
    │   └── versioning.md
    ├── cookbook
    │   ├── atomic_caption.md
    │   ├── chain_of_density.md
    │   ├── chain_of_thought.md
    │   ├── classification.md
    │   ├── dating_profiles.md
    │   ├── deploy-using-bentoml.md
    │   ├── deploy-using-cerebrium.md
    │   ├── deploy-using-modal.md
    │   ├── earnings-reports.md
    │   ├── extract_event_details.md
    │   ├── extract_event_details.py
    │   ├── extraction.md
    │   ├── images
    │   │   ├── chain_of_density.png
    │   │   ├── coding_structure_diagram.png
    │   │   ├── knowledge-graph-extraction.png
    │   │   ├── nvidia-income.png
    │   │   ├── simtom.png
    │   │   └── trader-joes-receipt.jpg
    │   ├── index.md
    │   ├── knowledge_graph_extraction.md
    │   ├── models_playing_chess.md
    │   ├── qa-with-citations.md
    │   ├── react_agent.md
    │   ├── read-pdfs.md
    │   ├── receipt-digitization.md
    │   ├── simtom.md
    │   └── structured_generation_workflow.md
    ├── index.md
    ├── installation.md
    ├── licence.md
    ├── logos
    │   ├── amazon.png
    │   ├── apple.png
    │   ├── best_buy.png
    │   ├── canoe.png
    │   ├── cisco.png
    │   ├── dassault_systems.png
    │   ├── databricks.png
    │   ├── datadog.png
    │   ├── dbt_labs.png
    │   ├── gladia.jpg
    │   ├── harvard.png
    │   ├── hf.png
    │   ├── johns_hopkins.png
    │   ├── meta.png
    │   ├── mit.png
    │   ├── mount_sinai.png
    │   ├── nvidia.png
    │   ├── nyu.png
    │   ├── safran.png
    │   ├── salesforce.png
    │   ├── shopify.png
    │   ├── smithsonian.png
    │   ├── tinder.png
    │   └── upenn.png
    ├── overrides
    │   ├── home.html
    │   └── main.html
    ├── quickstart.md
    ├── reference
    │   ├── chat_templating.md
    │   ├── generation
    │   │   ├── cfg.md
    │   │   ├── choices.md
    │   │   ├── creating_grammars.md
    │   │   ├── custom_fsm_ops.md
    │   │   ├── format.md
    │   │   ├── generation.md
    │   │   ├── json.md
    │   │   ├── regex.md
    │   │   ├── structured_generation_explanation.md
    │   │   └── types.md
    │   ├── index.md
    │   ├── models
    │   │   ├── exllamav2.md
    │   │   ├── llamacpp.md
    │   │   ├── mlxlm.md
    │   │   ├── models.md
    │   │   ├── openai.md
    │   │   ├── tgi.md
    │   │   ├── transformers.md
    │   │   ├── transformers_vision.md
    │   │   └── vllm.md
    │   ├── prompting.md
    │   ├── regex_dsl.md
    │   ├── samplers.md
    │   ├── serve
    │   │   ├── lmstudio.md
    │   │   └── vllm.md
    │   └── text.md
    ├── stylesheets
    │   └── extra.css
    └── welcome.md
├── environment.yml
├── examples
    ├── babyagi.py
    ├── beam-cloud
    │   ├── README.md
    │   └── app.py
    ├── bentoml
    │   ├── .bentoignore
    │   ├── bentofile.yaml
    │   ├── import_model.py
    │   ├── requirements.txt
    │   └── service.py
    ├── cerebrium
    │   ├── cerebrium.toml
    │   └── main.py
    ├── cfg.py
    ├── dating_profile.py
    ├── llamacpp_example.py
    ├── llamacpp_processor.py
    ├── math_generate_code.py
    ├── meta_prompting.py
    ├── modal_example.py
    ├── parsing.py
    ├── pick_odd_one_out.py
    ├── react.py
    ├── sampling.ipynb
    ├── self_consistency.py
    ├── simulation_based_inference.ipynb
    ├── transformers_integration.py
    └── vllm_integration.py
├── flake.lock
├── flake.nix
├── mkdocs.yml
├── outlines
    ├── __init__.py
    ├── base.py
    ├── caching.py
    ├── fsm
    │   ├── __init__.py
    │   ├── guide.py
    │   ├── json_schema.py
    │   ├── parsing.py
    │   └── types.py
    ├── function.py
    ├── generate
    │   ├── __init__.py
    │   ├── api.py
    │   ├── cfg.py
    │   ├── choice.py
    │   ├── format.py
    │   ├── fsm.py
    │   ├── generator.py
    │   ├── json.py
    │   ├── regex.py
    │   └── text.py
    ├── grammars.py
    ├── grammars
    │   ├── arithmetic.lark
    │   ├── common.lark
    │   └── json.lark
    ├── models
    │   ├── __init__.py
    │   ├── exllamav2.py
    │   ├── llamacpp.py
    │   ├── mlxlm.py
    │   ├── openai.py
    │   ├── tokenizer.py
    │   ├── transformers.py
    │   ├── transformers_vision.py
    │   └── vllm.py
    ├── processors
    │   ├── __init__.py
    │   ├── base_logits_processor.py
    │   └── structured.py
    ├── py.typed
    ├── samplers.py
    ├── serve
    │   ├── __init__.py
    │   └── serve.py
    ├── templates.py
    └── types
    │   ├── __init__.py
    │   ├── airports.py
    │   ├── countries.py
    │   ├── dsl.py
    │   └── locale
    │       ├── __init__.py
    │       └── us.py
├── pyproject.toml
├── requirements-doc.txt
├── setup.cfg
├── shell.nix
├── tests
    ├── __init__.py
    ├── cfg_samples
    │   ├── arithmetic
    │   │   ├── lots_of_ops.arithmetic.test
    │   │   └── simple_math.arithmetic.test
    │   └── json
    │   │   ├── outlines.generate.samplers.mypy.json.test
    │   │   ├── simple_fruit.json.test
    │   │   └── simple_fruit_no_indent.json.test
    ├── conftest.py
    ├── fsm
    │   ├── partial_python.lark
    │   ├── test_cfg_guide.py
    │   ├── test_guide.py
    │   ├── test_json_schema.py
    │   ├── test_parsing.py
    │   └── test_types.py
    ├── generate
    │   ├── __init__.py
    │   ├── conftest.py
    │   ├── test_api.py
    │   ├── test_generate.py
    │   ├── test_generator.py
    │   ├── test_integration_exllamav2.py
    │   ├── test_integration_llamacpp.py
    │   ├── test_integration_transformers.py
    │   ├── test_integration_transformers_vision.py
    │   └── test_integration_vllm.py
    ├── models
    │   ├── test_mlxlm.py
    │   ├── test_openai.py
    │   ├── test_tokenizer.py
    │   └── test_transformers.py
    ├── processors
    │   └── test_base_processor.py
    ├── test_base.py
    ├── test_cache.py
    ├── test_function.py
    ├── test_grammars.py
    ├── test_samplers.py
    ├── test_templates.py
    └── types
    │   ├── test_custom_types.py
    │   ├── test_dsl.py
    │   └── test_to_regex.py
└── uv.lock


/.devcontainer/devcontainer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "dottxt-ai",
 3 |   "image": "mcr.microsoft.com/devcontainers/python:3.12",
 4 |   "runArgs": [
 5 |     "--device=nvidia.com/gpu=all"
 6 |   ],
 7 |   "hostRequirements": {
 8 |     "gpu": "optional"
 9 |   },
10 |   "features": {
11 |     "ghcr.io/devcontainers/features/conda:1": {},
12 |     "ghcr.io/devcontainers/features/nvidia-cuda:1": {
13 |       "installCudnn": true,
14 |       "installToolkit": true,
15 |       "cudaVersion": "12.4"
16 |     },
17 |     "ghcr.io/devcontainers/features/rust:1": {}
18 |   }
19 | }
20 | 


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
 1 | # EditorConfig is awesome: https://EditorConfig.org
 2 | 
 3 | # top-most EditorConfig file
 4 | root = true
 5 | 
 6 | [*]
 7 | indent_style = space
 8 | indent_size = 4
 9 | end_of_line = lf
10 | charset = utf-8
11 | trim_trailing_whitespace = true
12 | insert_final_newline = true
13 | 
14 | [*.yaml]
15 | indent_size = 2
16 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.yml:
--------------------------------------------------------------------------------
 1 | # Issue template inspired by NumPy's excellent template:
 2 | # https://github.com/numpy/numpy/edit/main/.github/ISSUE_TEMPLATE/bug-report.yml
 3 | name: 🐞 Bug report
 4 | description: Create a bug report to help us reproduce and fix it.
 5 | title: "<Please write a descriptive title>"
 6 | labels: ["bug"]
 7 | 
 8 | body:
 9 |   - type: markdown
10 |     attributes:
11 |       value: >-
12 |         Thank you for taking the time to file a bug report. First, carefully read
13 |         the following before everything else:
14 | 
15 |           - Does your issue only arise in a library that uses Outlines? If so,
16 |             submit your issue to this library's issue tracker.
17 |           - Did you check the issue tracker for open and closed issues that may be
18 |             related to your bug?
19 | 
20 |   - type: textarea
21 |     attributes:
22 |       label: "Describe the issue as clearly as possible:"
23 |     validations:
24 |       required: true
25 | 
26 |   - type: textarea
27 |     attributes:
28 |       label: "Steps/code to reproduce the bug:"
29 |       description: >
30 |         A short code example that reproduces the problem/missing feature. It
31 |         should be self-contained, i.e., can be copy-pasted into the Python
32 |         interpreter or run as-is via `python myproblem.py`.
33 |       placeholder: |
34 |         import outlines
35 | 
36 |         << your code here >>
37 |       render: python
38 |     validations:
39 |       required: true
40 | 
41 |   - type: textarea
42 |     attributes:
43 |       label: "Expected result:"
44 |       description: >
45 |         Please describe what you expect the above example to output.
46 |       placeholder: |
47 |         << the expected result here >>
48 |       render: shell
49 |     validations:
50 |       required: true
51 | 
52 |   - type: textarea
53 |     attributes:
54 |       label: "Error message:"
55 |       description: >
56 |         Please include the full error message, if any.
57 |       placeholder: |
58 |         << Full traceback starting from `Traceback: ...` >>
59 |       render: shell
60 | 
61 |   - type: textarea
62 |     attributes:
63 |       label: "Outlines/Python version information:"
64 |       description: |
65 |           Please run the following code and paste the output here.
66 |           python -c "from outlines import _version; print(_version.__version__)";
67 |           python -c "import sys; print('Python', sys.version)";
68 |           pip freeze;
69 |       value: |
70 |           Version information
71 |           <details>
72 |           ```
73 |           (command output here)
74 |           ```
75 |           </details>
76 |     validations:
77 |       required: true
78 | 
79 |   - type: textarea
80 |     attributes:
81 |       label: "Context for the issue:"
82 |       description: |
83 |         Please explain how this issue affects your work or why it should be prioritized.
84 |       placeholder: |
85 |         << your explanation here >>
86 |     validations:
87 |       required: false
88 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | contact_links:
2 |   - name: 🤔 Questions & Help
3 |     url: https://github.com/dottxt-ai/outlines/discussions/new
4 |     about: "If you have a question about how to use Outlines, please start a discussion."
5 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE/pull_request_template.md:
--------------------------------------------------------------------------------
 1 | # 🚧 Thank you for opening a PR!
 2 | 
 3 | A few important guidelines and requirements before we can merge your PR:
 4 | 
 5 | - [ ] We should be able to understand what the PR does from its title only;
 6 | - [ ] There is a high-level description of the changes;
 7 | - [ ] *If I add a new feature*, there is an [issue][issues] discussing it already;
 8 | - [ ] There are links to *all* the relevant issues, discussions and PRs;
 9 | - [ ] The branch is rebased on the latest `main` commit;
10 | - [ ] **Commit messages** follow these [guidelines][git-guidelines];
11 | - [ ] One commit per logical change;
12 | - [ ] The code respects the current **naming conventions**;
13 | - [ ] Docstrings follow the [numpy style guide][docstring-guidelines];
14 | - [ ] `pre-commit` is installed and configured on your machine, and you ran it before opening the PR;
15 | - [ ] There are tests covering the changes;
16 | - [ ] The documentation is up-to-date;
17 | 
18 | Consider opening a **Draft PR** if your work is still in progress but you would
19 | like some feedback from other contributors.
20 | 
21 | [issues]: https://github.com/dottxt-ai/outlines/issues
22 | [git-guidelines]: https://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html
23 | [docstring-guidelines]: https://numpydoc.readthedocs.io/en/latest/format.html
24 | 


--------------------------------------------------------------------------------
/.github/scripts/build_sdist_and_wheel.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Build sdist and wheel
 4 | python -m pip install -U pip
 5 | python -m pip install build
 6 | python -m build
 7 | 
 8 | # Check sdist install and imports
 9 | mkdir -p test-sdist
10 | cd test-sdist
11 | python -m venv venv-sdist
12 | venv-sdist/bin/python -m pip install ../dist/outlines-*.tar.gz
13 | venv-sdist/bin/python -c "import outlines"
14 | cd ..
15 | 
16 | # Check wheel install and imports
17 | mkdir -p test-wheel
18 | cd test-wheel
19 | python -m venv venv-wheel
20 | venv-wheel/bin/python -m pip install ../dist/outlines-*.whl
21 | venv-wheel/bin/python -c "import outlines"
22 | cd ..
23 | 


--------------------------------------------------------------------------------
/.github/workflows/asv_benchmark_pr.yml:
--------------------------------------------------------------------------------
 1 | name: Benchmark PR
 2 | 
 3 | on:
 4 |   push:
 5 |   pull_request:
 6 |     types: [synchronize, labeled]
 7 |   workflow_dispatch:
 8 | env:
 9 |   PYTHON_VERSION: "3.10"
10 |   WORKING_DIR: ${{ github.workspace }}/benchmarks
11 |   BENCHMARKS_OUTPUT: ${{ github.workspace }}/benchmarks_output
12 | 
13 | permissions:
14 |   contents: read
15 | 
16 | # Cancels all previous workflow runs for pull requests that have not completed.
17 | concurrency:
18 |   # The concurrency group contains the workflow name and the branch name for pull requests
19 |   # or the commit hash for any other events.
20 |   group: ${{ github.workflow }}-${{ github.event_name == 'pull_request' && github.head_ref || github.sha }}
21 |   cancel-in-progress: true
22 | 
23 | jobs:
24 |   benchmark-pr:
25 |     runs-on: ubuntu-latest
26 |     if: ${{ contains(github.event.pull_request.labels.*.name, 'run-benchmarks') || github.ref == 'refs/heads/main' }}
27 | 
28 |     defaults:
29 |       run:
30 |         working-directory: ${{ env.WORKING_DIR }}
31 | 
32 |     steps:
33 | 
34 |     - name: Checkout repository
35 |       uses: actions/checkout@v3
36 |       with:
37 |         fetch-depth: 0
38 | 
39 |     - name: Set up Python
40 |       uses: actions/setup-python@v4
41 |       with:
42 |         python-version: ${{ env.PYTHON_VERSION }}
43 | 
44 |     - name: Install dependencies
45 |       run: |
46 |         python -m pip install --upgrade pip
47 |         pip install asv virtualenv lf-asv-formatter
48 | 
49 |     - name: Create ASV machine config file
50 |       run: asv machine --machine gh-runner --yes
51 | 
52 |     - name: Run Benchmarks - `PR HEAD` vs `main`
53 |       run: |
54 |         # prepare main branch for comparison
55 |         git remote add upstream https://github.com/${{ github.repository }}.git
56 |         git fetch upstream main
57 | 
58 |         # Run benchmarks, allow errors, they will be caught in the next step
59 |         asv continuous upstream/main HEAD \
60 |             --no-stats --interleave-rounds -a repeat=3 || true
61 | 
62 |     - name: BENCHMARK RESULTS
63 |       run: |
64 |         asv compare --factor=1.1 --no-stats --split upstream/main HEAD | tee ${{ env.BENCHMARKS_OUTPUT }}
65 |         if grep -q "Benchmarks that have got worse" "${{ env.BENCHMARKS_OUTPUT }}"; then
66 |           echo "Performance degradation detected!"
67 |           exit 1
68 |         fi
69 | 


--------------------------------------------------------------------------------
/.github/workflows/build_documentation.yml:
--------------------------------------------------------------------------------
 1 | name: Build the documentation
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     branches: [main]
 6 |   workflow_dispatch:
 7 | 
 8 | jobs:
 9 |   build:
10 |     name: Build
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |       - uses: actions/checkout@v4
14 |       - uses: actions/setup-python@v4
15 |         with:
16 |           python-version: "3.10"
17 | 
18 |       - name: Build the documentation
19 |         env:
20 |           GOOGLE_ANALYTICS_KEY: ${{ secrets.GOOGLE_ANALYTICS_KEY }}
21 |         run: |
22 |           pip install -r requirements-doc.txt
23 |           mkdocs build
24 | 


--------------------------------------------------------------------------------
/.github/workflows/publish_documentation.yml:
--------------------------------------------------------------------------------
 1 | name: Publish the documentation
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   push:
 6 |     branches:
 7 |       - main
 8 |   release:
 9 |     types:
10 |       - created
11 | 
12 | permissions:
13 |   contents: write
14 | 
15 | jobs:
16 |   deploy:
17 |     runs-on: ubuntu-latest
18 |     steps:
19 |       - uses: actions/checkout@v4
20 |         with:
21 |           fetch-depth: 0
22 |       - uses: actions/setup-python@v4
23 |         with:
24 |           python-version: 3.x
25 |       - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV
26 |       - uses: actions/cache@v3
27 |         with:
28 |           key: mkdocs-material-${{ env.cache_id }}
29 |           path: .cache
30 |           restore-keys: |
31 |             mkdocs-material-
32 |       - run: pip install -r requirements-doc.txt
33 |       - run: mkdocs build
34 | 
35 |       - name: Set up Git
36 |         run: |
37 |           git config user.name ${{ github.actor }}
38 |           git config user.email ${{ github.actor }}@users.noreply.github.com
39 | 
40 |       - name: Publish Tag as latest
41 |         env:
42 |           GOOGLE_ANALYTICS_KEY: ${{ secrets.GOOGLE_ANALYTICS_KEY }}
43 |         if: github.event_name == 'release'
44 |         run: |
45 |           mike deploy --push --update-aliases ${{ github.ref_name }} latest
46 |           mike set-default --push latest
47 | 
48 |       - name: Publish main as unstable
49 |         env:
50 |           GOOGLE_ANALYTICS_KEY: ${{ secrets.GOOGLE_ANALYTICS_KEY }}
51 |         if: github.event_name == 'push'
52 |         run: |
53 |           mike deploy --push --update-aliases ${{ github.ref_name }} unstable
54 | 


--------------------------------------------------------------------------------
/.github/workflows/release_docker.yml:
--------------------------------------------------------------------------------
 1 | name: Release Docker
 2 | 
 3 | on:
 4 |   release:
 5 |     types:
 6 |       - created
 7 |   workflow_dispatch:
 8 |     inputs:
 9 |       release_tag:
10 |         description: 'Release Tag (for manual dispatch)'
11 |         required: false
12 |         default: 'latest'
13 | jobs:
14 |   release-job:
15 |     name: Build and publish on Docker Hub
16 |     runs-on: ubuntu-latest
17 |     environment: release
18 |     steps:
19 |     - name: Checkout
20 |       uses: actions/checkout@v4
21 |     - name: Log in to Docker Hub
22 |       uses: docker/login-action@v3
23 |       with:
24 |         username: ${{ secrets.DOCKERHUB_USERNAME }}
25 |         password: ${{ secrets.DOCKERHUB_TOKEN }}
26 |     - name: Build and push Docker image
27 |       uses: docker/build-push-action@v5
28 |       with:
29 |         push: true
30 |         tags: |
31 |           outlinesdev/outlines:latest
32 |           outlinesdev/outlines:${{ github.event.release.tag_name || github.event.inputs.release_tag }}
33 |         build-args: |
34 |           BUILDKIT_CONTEXT_KEEP_GIT_DIR=true
35 |     - name: Clean docker cache
36 |       run: docker system prune --all --force
37 | 


--------------------------------------------------------------------------------
/.github/workflows/release_pypi.yaml:
--------------------------------------------------------------------------------
 1 | name: Release PyPi
 2 | 
 3 | on:
 4 |   release:
 5 |     types:
 6 |       - created
 7 | jobs:
 8 |   release-job:
 9 |     name: Build and publish on PyPi
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |     - name: Checkout
13 |       uses: actions/checkout@v2
14 |     - name: Set up Python
15 |       uses: actions/setup-python@v2
16 |       with:
17 |         python-version: "3.10"
18 |     - name: Build SDist and Wheel
19 |       run: ./.github/scripts/build_sdist_and_wheel.sh
20 |     - name: Check that the package version matches the Release name
21 |       run: |
22 |         grep -Rq "^Version: ${GITHUB_REF:10}$" outlines.egg-info/PKG-INFO
23 |     - name: Publish to PyPi
24 |       uses: pypa/gh-action-pypi-publish@v1.4.2
25 |       with:
26 |         user: __token__
27 |         password: ${{ secrets.PYPI_TOKEN }}
28 | 


--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
  1 | name: Tests
  2 | 
  3 | on:
  4 |   pull_request:
  5 |     branches: [main]
  6 |   push:
  7 |     branches: [main]
  8 | 
  9 | jobs:
 10 |   style:
 11 |     name: Check the code style
 12 |     runs-on: ubuntu-latest
 13 |     steps:
 14 |     - uses: actions/checkout@v3
 15 |     - uses: actions/setup-python@v4
 16 |       with:
 17 |         python-version: "3.10"
 18 |     - uses: pre-commit/action@v3.0.0
 19 | 
 20 |   tests:
 21 |     name: Run the tests
 22 |     runs-on: ubuntu-latest
 23 |     strategy:
 24 |       matrix:
 25 |         python-version: ["3.10"]
 26 |     steps:
 27 |     - uses: actions/checkout@v3
 28 |     - name: Set up Python ${{ matrix.python-version }}
 29 |       uses: actions/setup-python@v4
 30 |       with:
 31 |         python-version: ${{ matrix.python-version }}
 32 |     - name: Set up test environment
 33 |       run: |
 34 |         python -m pip install --upgrade pip
 35 |         pip install uv
 36 |         uv venv
 37 |         uv pip install -e .[test]
 38 |     - name: Create matrix id
 39 |       id: matrix-id
 40 |       env:
 41 |         MATRIX_CONTEXT: ${{ toJson(matrix) }}
 42 |       run: |
 43 |         echo $MATRIX_CONTEXT
 44 |         export MATRIX_ID=`echo $MATRIX_CONTEXT | md5sum | cut -c 1-32`
 45 |         echo $MATRIX_ID
 46 |         echo "::set-output name=id::$MATRIX_ID"
 47 |     - name: Run tests
 48 |       run: |
 49 |         uv run pytest -x --cov=outlines
 50 |       env:
 51 |         COVERAGE_FILE: .coverage.${{ steps.matrix-id.outputs.id }}
 52 |     - name: Upload coverage data
 53 |       uses: actions/upload-artifact@v4
 54 |       with:
 55 |         name: coverage-data
 56 |         path: .coverage.*
 57 |         if-no-files-found: ignore
 58 |         include-hidden-files: true
 59 |         # TODO FIXME: This is only using the last run
 60 |         overwrite: true
 61 | 
 62 |   coverage:
 63 |     name: Combine & check coverage.
 64 |     needs: tests
 65 |     runs-on: ubuntu-latest
 66 | 
 67 |     steps:
 68 |       - uses: actions/checkout@v3
 69 |         with:
 70 |           fetch-depth: 0
 71 | 
 72 |       - uses: actions/setup-python@v4
 73 |         with:
 74 |           cache: pip
 75 |           python-version: "3.11"
 76 | 
 77 |       - name: Set up environment
 78 |         run: |
 79 |           pip install --upgrade "coverage[toml]>=5.1" diff-cover
 80 | 
 81 |       - uses: actions/download-artifact@v4
 82 |         with:
 83 |           name: coverage-data
 84 | 
 85 |       - name: Fetch main for coverage diff
 86 |         run: |
 87 |           git fetch --no-tags --prune origin main
 88 | 
 89 |       - name: Combine coverage & fail if it's <100%.
 90 |         run: |
 91 |           python -m coverage combine
 92 |           python -m coverage html --skip-covered --skip-empty
 93 |           python -m coverage xml
 94 |           diff-cover coverage.xml --markdown-report=coverage.md --fail-under=100 || (cat coverage.md >> $GITHUB_STEP_SUMMARY && exit 1)
 95 | 
 96 |       - name: Upload HTML report if check failed.
 97 |         uses: actions/upload-artifact@v4
 98 |         with:
 99 |           name: html-report
100 |           path: htmlcov
101 |           # TODO FIXME: This is only using the last run
102 |           overwrite: true
103 |         if: ${{ failure() }}
104 | 
105 |   build-wheel:
106 |     name: Build Wheel and Test SDist
107 |     runs-on: ubuntu-latest
108 |     steps:
109 |       - uses: actions/checkout@v3
110 |       - name: Build SDist and Wheel
111 |         run: ./.github/scripts/build_sdist_and_wheel.sh
112 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | __pycache__
 2 | .benchmarks
 3 | .cache
 4 | .coverage
 5 | .direnv
 6 | .env
 7 | .idea
 8 | .pytest_cache
 9 | .python-version
10 | .venv
11 | *_version.py
12 | *.egg-info
13 | *.gguf
14 | benchmarks/results
15 | build
16 | docs/build
17 | logs
18 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | - repo: https://github.com/pre-commit/pre-commit-hooks
 3 |   rev: v5.0.0
 4 |   hooks:
 5 |     -   id: check-merge-conflict
 6 |     -   id: debug-statements
 7 |     -   id: end-of-file-fixer
 8 |     -   id: trailing-whitespace
 9 | - repo: https://github.com/pre-commit/mirrors-mypy
10 |   rev: v1.14.1
11 |   hooks:
12 |     - id: mypy
13 |       args: [--allow-redefinition]
14 |       exclude: ^examples/
15 |       additional_dependencies: [types-tqdm, types-Pillow]
16 | - repo: https://github.com/astral-sh/ruff-pre-commit
17 |   rev: v0.9.1
18 |   hooks:
19 |     - id: ruff
20 |       args: ["--config=pyproject.toml"]
21 | 


--------------------------------------------------------------------------------
/.pydocstyle:
--------------------------------------------------------------------------------
1 | [pydocstyle]
2 | convention = numpy
3 | 


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | python:
 4 |   version: "3.8"
 5 |   install:
 6 |       - method: pip
 7 |         path: .
 8 |         extra_requirements:
 9 |           - rtd
10 |       - requirements: requirements-doc.txt
11 | 
12 | sphinx:
13 |   builder: html
14 |   configuration: docs/source/conf.py
15 |   fail_on_warning: true
16 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 |     "python.testing.pytestArgs": [
3 |         "tests"
4 |     ],
5 |     "python.testing.unittestEnabled": false,
6 |     "python.testing.pytestEnabled": true
7 | }
8 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | ### Build stage
 2 | FROM python:3.10 AS builder
 3 | 
 4 | WORKDIR /outlines
 5 | 
 6 | RUN pip install --upgrade pip
 7 | 
 8 | # Copy necessary build components
 9 | COPY pyproject.toml .
10 | COPY outlines ./outlines
11 | 
12 | # Install outlines and outlines[serve]
13 | # .git required by setuptools-scm
14 | RUN --mount=source=.git,target=.git,type=bind \
15 |     pip install --no-cache-dir .[serve]
16 | 
17 | ### Runtime stage
18 | FROM python:3.10
19 | WORKDIR /outlines
20 | COPY --from=builder /outlines /outlines
21 | 
22 | # https://dottxt-ai.github.io/outlines/reference/vllm/
23 | ENTRYPOINT ["python3", "-m", "outlines.serve.serve"]
24 | 


--------------------------------------------------------------------------------
/benchmarks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/benchmarks/__init__.py


--------------------------------------------------------------------------------
/benchmarks/asv.conf.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "version": 1,
 3 |     "project": "Outlines",
 4 |     "project_url": "https://dottxt-ai.github.io/outlines/",
 5 |     "repo": "..",
 6 |     "branches": [
 7 | 	"HEAD"
 8 |     ],
 9 |     "build_command": [
10 |         "python -mpip install .[test]",
11 |         "PIP_NO_BUILD_ISOLATION=false python -mpip wheel --no-deps --no-index -w {build_cache_dir} {build_dir}",
12 |     ],
13 |     "environment_type": "virtualenv",
14 |     "show_commit_url": "https://github.com/dottxt-ai/outlines/commit/",
15 |     "benchmark_dir": ".",
16 |     "env_dir": "env",
17 |     "results_dir": "results",
18 |     "html_dir": "html",
19 |     "build_cache_size": 8
20 | }
21 | 


--------------------------------------------------------------------------------
/benchmarks/bench_cfg_guide.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | 
 3 | from transformers import AutoTokenizer
 4 | 
 5 | import outlines.grammars
 6 | from outlines.caching import cache_disabled
 7 | from outlines.fsm.guide import CFGGuide
 8 | from outlines.models.transformers import TransformerTokenizer
 9 | 
10 | random.seed(42)
11 | 
12 | 
13 | def get_tiny_tokenizer():
14 |     """1000 tokens in vocabulary"""
15 |     return TransformerTokenizer(
16 |         AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-gpt2")
17 |     )
18 | 
19 | 
20 | benched_grammars = {
21 |     "json": outlines.grammars.json,
22 |     "arithmetic": outlines.grammars.arithmetic,
23 | }
24 | 
25 | 
26 | class CFGGuideBenchmark:
27 |     params = benched_grammars.keys()
28 | 
29 |     def setup(self, grammar_name):
30 |         self.tokenizer = get_tiny_tokenizer()
31 |         self.prebuilt_cfg_guide = CFGGuide(
32 |             benched_grammars[grammar_name], self.tokenizer
33 |         )
34 | 
35 |     @staticmethod
36 |     def _run_random_cfg(guide, rejection_sampling=True):
37 |         state = guide.initial_state
38 |         token_ids = list(guide.tokenizer.vocabulary.values())
39 |         for i in range(40):
40 |             # simulate ordering of logits top prob to lowest prob
41 |             random.shuffle(token_ids)
42 |             # simulate sampling and state update
43 |             if rejection_sampling:
44 |                 next_token_id = next(guide.iter_valid_token_ids(state, token_ids))
45 |                 state = guide.get_next_state(state, next_token_id)
46 |             else:
47 |                 next_token_id = random.choice(guide.get_next_instruction(state).tokens)
48 |                 state = guide.get_next_state(state, next_token_id)
49 | 
50 |     @cache_disabled()
51 |     def time_cfg_guide_setup(self, grammar_name):
52 |         CFGGuide(benched_grammars[grammar_name], self.tokenizer)
53 | 
54 |     @cache_disabled()
55 |     def time_cfg_guide_run_rejection_sampling(self, grammar):
56 |         self._run_random_cfg(self.prebuilt_cfg_guide, rejection_sampling=True)
57 | 
58 |     @cache_disabled()
59 |     def time_cfg_guide_run(self, grammar):
60 |         self._run_random_cfg(self.prebuilt_cfg_guide, rejection_sampling=False)
61 | 
62 |     @cache_disabled()
63 |     def peakmem_cfg_guide_run(self, grammar):
64 |         self._run_random_cfg(self.prebuilt_cfg_guide)
65 | 


--------------------------------------------------------------------------------
/benchmarks/bench_json_schema.py:
--------------------------------------------------------------------------------
 1 | from outlines_core.fsm.json_schema import build_regex_from_schema
 2 | 
 3 | from outlines.caching import cache_disabled
 4 | from outlines.fsm.guide import RegexGuide
 5 | 
 6 | from .common import setup_tokenizer  # noqa: E402
 7 | 
 8 | simple_schema = """{
 9 |         "$defs": {
10 |             "Armor": {
11 |                 "enum": ["leather", "chainmail", "plate"],
12 |                 "title": "Armor",
13 |                 "type": "string"
14 |             }
15 |         },
16 |         "properties": {
17 |             "name": {"maxLength": 10, "title": "Name", "type": "string"},
18 |             "age": {"title": "Age", "type": "integer"},
19 |             "armor": {"$ref": "#/$defs/Armor"},
20 |             "strength": {"title": "Strength", "type": "integer"}\
21 |         },
22 |         "required": ["name", "age", "armor", "strength"],
23 |         "title": "Character",
24 |         "type": "object"
25 |     }"""
26 | 
27 | 
28 | complex_schema = """{
29 |   "$schema": "http://json-schema.org/draft-04/schema#",
30 |   "title": "Schema for a recording",
31 |   "type": "object",
32 |   "definitions": {
33 |     "artist": {
34 |       "type": "object",
35 |       "properties": {
36 |         "id": {"type": "number"},
37 |         "name": {"type": "string"},
38 |         "functions": {
39 |           "type": "array",
40 |           "items": {"type": "string"}
41 |         }
42 |       },
43 |       "required": ["id", "name", "functions"]
44 |     }
45 |   },
46 |   "properties": {
47 |     "id": {"type": "number"},
48 |     "work": {
49 |       "type": "object",
50 |       "properties": {
51 |         "id": {"type": "number"},
52 |         "name": {"type": "string"},
53 |         "composer": {"$ref": "#/definitions/artist"}
54 |       }
55 |     },
56 |     "recording_artists": {
57 |       "type": "array",
58 |       "items": {"$ref": "#/definitions/artist"}
59 |     }
60 |   },
61 |   "required": ["id", "work", "recording_artists"]
62 | }"""
63 | 
64 | schemas = dict(simple_schema=simple_schema, complex_schema=complex_schema)
65 | 
66 | 
67 | class JsonSchemaBenchmark:
68 |     params = schemas.keys()
69 | 
70 |     def setup(self, schema_name):
71 |         self.tokenizer = setup_tokenizer()
72 |         self.schema = schemas[schema_name]
73 | 
74 |     @cache_disabled()
75 |     def time_json_schema_to_fsm(self, schema_name):
76 |         regex = build_regex_from_schema(self.schema)
77 |         RegexGuide.from_regex(regex, self.tokenizer)
78 | 


--------------------------------------------------------------------------------
/benchmarks/bench_processors.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | 
  4 | import outlines.models as models
  5 | from outlines.processors import OutlinesLogitsProcessor, RegexLogitsProcessor
  6 | 
  7 | try:
  8 |     import mlx.core as mx
  9 | except ImportError:
 10 |     pass
 11 | 
 12 | try:
 13 |     import jax
 14 |     import jax.numpy as jnp
 15 | except ImportError:
 16 |     pass
 17 | 
 18 | 
 19 | def is_mlx_lm_allowed():
 20 |     try:
 21 |         import mlx.core as mx
 22 |     except ImportError:
 23 |         return False
 24 |     return mx.metal.is_available()
 25 | 
 26 | 
 27 | def is_jax_allowed():
 28 |     try:
 29 |         import jax  # noqa: F401
 30 |     except ImportError:
 31 |         return False
 32 |     return True
 33 | 
 34 | 
 35 | def get_mock_processor_inputs(array_library, num_tokens=30000):
 36 |     """
 37 |     logits: (4, 30,000 ) dtype=float
 38 |     input_ids shape: (4, 2048) dtype=int
 39 |     """
 40 |     if array_library.startswith("torch"):
 41 |         device = array_library.split("_")[1] if "_" in array_library else "cpu"
 42 | 
 43 |         logits = torch.rand((4, num_tokens), dtype=torch.float, device=device)
 44 |         input_ids = torch.randint(
 45 |             low=0, high=num_tokens, size=(4, 2048), dtype=torch.int, device=device
 46 |         )
 47 |     elif array_library == "numpy":
 48 |         logits = np.random.rand(4, num_tokens).astype(np.float32)
 49 |         input_ids = np.random.randint(low=0, high=num_tokens, size=(4, 2048))
 50 |     elif array_library == "mlx":
 51 |         logits = mx.random.uniform(
 52 |             low=-1e9, high=1e9, shape=(4, num_tokens), dtype=mx.float32
 53 |         )
 54 |         input_ids = mx.random.randint(
 55 |             low=0, high=num_tokens, shape=(4, 2048), dtype=mx.int32
 56 |         )
 57 |     elif array_library == "jax":
 58 |         logits = jnp.random.uniform(
 59 |             key=jax.random.PRNGKey(0), shape=(4, num_tokens), dtype=jnp.float32
 60 |         )
 61 |         input_ids = jnp.random.randint(
 62 |             key=jax.random.PRNGKey(0), low=0, high=num_tokens, shape=(4, 2048)
 63 |         )
 64 |     else:
 65 |         raise ValueError
 66 | 
 67 |     return logits, input_ids
 68 | 
 69 | 
 70 | class HalvingLogitsProcessor(OutlinesLogitsProcessor):
 71 |     """Simply halve the passed logits"""
 72 | 
 73 |     def process_logits(self, input_ids, logits):
 74 |         return logits / 2
 75 | 
 76 | 
 77 | class LogitsProcessorPassthroughBenchmark:
 78 |     """
 79 |     Benchmark the time it takes to convert between array frameworks
 80 |     This should be on the order of microseconds
 81 |     """
 82 | 
 83 |     params = ["torch", "numpy"]
 84 |     if is_mlx_lm_allowed():
 85 |         params += ["mlx"]
 86 |     if torch.cuda.is_available():
 87 |         params += ["torch_cuda"]
 88 |     if torch.mps.is_available():
 89 |         params += ["torch_mps"]
 90 |     if is_jax_allowed():
 91 |         params += ["jax"]
 92 | 
 93 |     def setup(self, array_library):
 94 |         self.logits_processor = HalvingLogitsProcessor()
 95 | 
 96 |         self.logits, self.input_ids = get_mock_processor_inputs(array_library)
 97 | 
 98 |     def time_passthrough(self, *params):
 99 |         self.logits_processor(self.input_ids, self.logits)
100 | 
101 | 
102 | class LogitsProcessorStructuredBenchmark:
103 |     """
104 |     Benchmark structured generation mask application for single decoder pass
105 |     """
106 | 
107 |     array_libraries = ["torch", "numpy"]
108 |     if is_mlx_lm_allowed():
109 |         array_libraries += ["mlx"]
110 |     if torch.cuda.is_available():
111 |         array_libraries += ["torch_cuda"]
112 |     if torch.mps.is_available():
113 |         array_libraries += ["torch_mps"]
114 | 
115 |     # accept very many or very few tokens, respectively
116 |     patterns = [r"[^Z]*", "Z*"]
117 | 
118 |     params = [array_libraries, patterns]
119 |     param_names = ["array_library, pattern"]
120 | 
121 |     def setup(self, array_library, pattern):
122 |         tokenizer = models.transformers("facebook/opt-125m", device="cpu").tokenizer
123 | 
124 |         self.logits_processor = RegexLogitsProcessor(pattern, tokenizer)
125 | 
126 |         self.logits, self.input_ids = get_mock_processor_inputs(
127 |             array_library, len(tokenizer.vocabulary)
128 |         )
129 | 
130 |     def time_structured_generation(self, array_library, pattern):
131 |         self.logits_processor(self.input_ids, self.logits)
132 | 


--------------------------------------------------------------------------------
/benchmarks/bench_regex_guide.py:
--------------------------------------------------------------------------------
 1 | from outlines.caching import cache_disabled
 2 | from outlines.fsm.guide import RegexGuide
 3 | 
 4 | from .common import setup_tokenizer
 5 | 
 6 | regex_samples = {
 7 |     "email": r"[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?",
 8 |     "complex_phone": "\\+?\\d{1,4}?[-.\\s]?\\(?\\d{1,3}?\\)?[-.\\s]?\\d{1,4}[-.\\s]?\\d{1,4}[-.\\s]?\\d{1,9}",
 9 |     "simple_phone": "\\+?[1-9][0-9]{7,14}",
10 |     "date": r"([1-9]|0[1-9]|1[0-9]|2[0-9]|3[0-1])(\.|-|/)([1-9]|0[1-9]|1[0-2])(\.|-|/)([0-9][0-9]|19[0-9][0-9]|20[0-9][0-9])|([0-9][0-9]|19[0-9][0-9]|20[0-9][0-9])(\.|-|/)([1-9]|0[1-9]|1[0-2])(\.|-|/)([1-9]|0[1-9]|1[0-9]|2[0-9]|3[0-1])",
11 |     "time": r"(0?[1-9]|1[0-2]):[0-5]\d\s?(am|pm)?",
12 |     "ip": r"(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)",
13 |     "url": r"(https?:\/\/)?([\da-z\.-]+)\.([a-z\.]{2,6})([\/\w \.-]*)*\/?",
14 |     "ssn": r"\d{3}-\d{2}-\d{4}",
15 |     "complex_span_constrained_relation_extraction": "(['\"\\ ,]?((?:of|resulting|case|which|cultures|a|core|extreme|selflessness|spiritual|various|However|both|vary|in|other|secular|the|religious|among|moral|and|It|object|worldviews|altruism|traditional|material|aspect|or|life|beings|virtue|is|however|opposite|concern|an|practice|it|for|s|quality|religions|In|Altruism|animals|happiness|many|become|principle|human|selfishness|may|synonym)['\"\\ ,]?)+['\"\\ ,]?\\s\\|\\s([^|\\(\\)\n]{1,})\\s\\|\\s['\"\\ ,]?((?:of|resulting|case|which|cultures|a|core|extreme|selflessness|spiritual|various|However|both|vary|in|other|secular|the|religious|among|moral|and|It|object|worldviews|altruism|traditional|material|aspect|or|life|beings|virtue|is|however|opposite|concern|an|practice|it|for|s|quality|religions|In|Altruism|animals|happiness|many|become|principle|human|selfishness|may|synonym)['\"\\ ,]?)+['\"\\ ,]?(\\s\\|\\s\\(([^|\\(\\)\n]{1,})\\s\\|\\s([^|\\(\\)\n]{1,})\\))*\\n)*",
16 | }
17 | 
18 | 
19 | class RegexGuideBenchmark:
20 |     params = regex_samples.keys()
21 | 
22 |     def setup(self, pattern_name):
23 |         self.tokenizer = setup_tokenizer()
24 |         self.pattern = regex_samples[pattern_name]
25 | 
26 |     @cache_disabled()
27 |     def time_regex_to_guide(self, pattern_name):
28 |         RegexGuide.from_regex(self.pattern, self.tokenizer)
29 | 
30 | 
31 | class MemoryRegexGuideBenchmark:
32 |     params = ["simple_phone", "complex_span_constrained_relation_extraction"]
33 | 
34 |     def setup(self, pattern_name):
35 |         self.tokenizer = setup_tokenizer()
36 |         self.pattern = regex_samples[pattern_name]
37 | 
38 |     @cache_disabled()
39 |     def peakmem_regex_to_guide(self, pattern_name):
40 |         RegexGuide.from_regex(self.pattern, self.tokenizer)
41 | 


--------------------------------------------------------------------------------
/benchmarks/common.py:
--------------------------------------------------------------------------------
1 | from transformers import AutoTokenizer
2 | 
3 | from outlines.models.transformers import TransformerTokenizer
4 | 
5 | 
6 | def setup_tokenizer():
7 |     tokenizer = AutoTokenizer.from_pretrained("gpt2")
8 |     return TransformerTokenizer(tokenizer)
9 | 


--------------------------------------------------------------------------------
/docs/api/guide.md:
--------------------------------------------------------------------------------
1 | ::: outlines.fsm.guide
2 | 


--------------------------------------------------------------------------------
/docs/api/index.md:
--------------------------------------------------------------------------------
1 | # API Reference
2 | 


--------------------------------------------------------------------------------
/docs/api/json_schema.md:
--------------------------------------------------------------------------------
1 | ::: outlines.fsm.json_schema
2 | 


--------------------------------------------------------------------------------
/docs/api/models.md:
--------------------------------------------------------------------------------
1 | ::: outlines.models
2 | 


--------------------------------------------------------------------------------
/docs/api/parsing.md:
--------------------------------------------------------------------------------
1 | ::: outlines.fsm.parsing
2 | 


--------------------------------------------------------------------------------
/docs/api/regex.md:
--------------------------------------------------------------------------------
1 | ::: outlines.generate.regex
2 | 


--------------------------------------------------------------------------------
/docs/api/samplers.md:
--------------------------------------------------------------------------------
1 | ::: outlines.samplers
2 | 


--------------------------------------------------------------------------------
/docs/api/templates.md:
--------------------------------------------------------------------------------
1 | ::: outlines.templates
2 | 


--------------------------------------------------------------------------------
/docs/assets/images/dottxt-light.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <svg id="Layer_2" xmlns="http://www.w3.org/2000/svg" version="1.1" viewBox="0 0 129.1 196.3">
 3 |   <!-- Generator: Adobe Illustrator 29.0.1, SVG Export Plug-In . SVG Version: 2.1.0 Build 192)  -->
 4 |   <defs>
 5 |     <style>
 6 |       .st0 {
 7 |         fill: #1d1d1b;
 8 |       }
 9 |     </style>
10 |   </defs>
11 |   <g id="Layer_1-2">
12 |     <rect class="st0" x="39.1" y="3.9" width="3.9" height="3.9"/>
13 |     <path class="st0" d="M46.9,7.8h7.8v-3.9h-3.9V0h-7.8v3.9h3.9v3.9Z"/>
14 |     <path class="st0" d="M58.7,7.8h7.8v-3.9h-3.9V0h-7.8v3.9h3.9v3.9Z"/>
15 |     <path class="st0" d="M70.4,7.8h7.8v-3.9h-3.9V0h-7.8v3.9h3.9v3.9Z"/>
16 |     <path class="st0" d="M82.2,7.8h3.9v3.9h11.7v-3.9h3.9v-3.9h-3.9V0h-7.8v3.9h3.9v3.9h-3.9v-3.9h-3.9V0h-7.8v3.9h3.9v3.9Z"/>
17 |     <path class="st0" d="M105.6,7.8h7.8v3.9h7.8v-3.9h-3.9v-3.9h-7.8V0h-7.8v3.9h3.9v3.9Z"/>
18 |     <rect class="st0" x="43" y="7.8" width="3.9" height="3.9"/>
19 |     <rect class="st0" x="54.8" y="7.8" width="3.9" height="3.9"/>
20 |     <rect class="st0" x="66.5" y="7.8" width="3.9" height="3.9"/>
21 |     <rect class="st0" x="78.2" y="7.8" width="3.9" height="3.9"/>
22 |     <rect class="st0" x="101.7" y="7.8" width="3.9" height="3.9"/>
23 |     <path class="st0" d="M39.1,39.1h39.1v-3.9h-39.1v3.9Z"/>
24 |     <path class="st0" d="M31.3,50.9v3.9h43v-3.9H31.3Z"/>
25 |     <path class="st0" d="M43,101.7h66.5v-3.9H43v-7.8h35.2v-3.9h31.3v-3.9h-27.4v-7.8h3.9v-3.9h23.5v-3.9h-19.5v-7.8h3.9v-3.9h15.6v-3.9h-11.7v-7.8h3.9v-3.9h7.8v-3.9h-3.9v-3.9h3.9v-11.7h3.9v-7.8h-7.8v7.8h-3.9v7.8h-3.9v7.8h-3.9v7.8h-3.9v7.8h-3.9v7.8h-3.9v7.8h-3.9v7.8h-3.9v7.8H7.8v-11.7h3.9v-7.8h3.9v-7.8h3.9v-7.8h3.9v-7.8h3.9v-7.8h3.9v-7.8h3.9v-7.8h3.9v-7.8h-7.8v7.8h-3.9v7.8h-3.9v7.8h-3.9v7.8h-3.9v7.8h-3.9v7.8h-3.9v7.8h-3.9v7.8H0v15.6h7.8v3.9h27.4v19.6h7.8v-7.8Z"/>
26 |     <path class="st0" d="M125.2,11.7h-3.9v97.8h-3.9v3.9H50.9v-3.9h-7.8v7.8h7.8v3.9h66.5v-3.9h7.8v-7.8h3.9V11.7h-3.9Z"/>
27 |     <path class="st0" d="M.3,196.3v-7.4h7.3v7.4H.3Z"/>
28 |     <path class="st0" d="M19.5,196.3v-44.2H4.8v-7.4h36.8v7.4h-14.7v44.2h-7.4Z"/>
29 |     <path class="st0" d="M46.6,196.3v-11h3.7v-3.7h3.7v-3.7h3.7v-3.7h3.7v-7.4h-3.7v-3.7h-3.7v-3.7h-3.7v-3.7h-3.7v-11.1h7.3v7.4h3.7v3.7h3.7v3.7h3.7v3.7h3.7v-3.7h3.7v-3.7h3.7v-3.7h3.7v-7.4h7.4v11.1h-3.7v3.7h-3.7v3.7h-3.7v3.7h-3.7v7.4h3.7v3.7h3.7v3.7h3.7v3.7h3.7v11h-7.4v-7.4h-3.7v-3.7h-3.7v-3.7h-3.7v-3.7h-3.7v3.7h-3.7v3.7h-3.7v3.7h-3.7v7.4h-7.4Z"/>
30 |     <path class="st0" d="M106.9,196.3v-44.2h-14.7v-7.4h36.8v7.4h-14.7v44.2h-7.3Z"/>
31 |   </g>
32 | </svg>
33 | 


--------------------------------------------------------------------------------
/docs/assets/images/dottxt.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/assets/images/dottxt.png


--------------------------------------------------------------------------------
/docs/assets/images/logo-dark-mode.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/assets/images/logo-dark-mode.png


--------------------------------------------------------------------------------
/docs/assets/images/logo-light-mode.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/assets/images/logo-light-mode.png


--------------------------------------------------------------------------------
/docs/assets/images/logo-simple.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/assets/images/logo-simple.png


--------------------------------------------------------------------------------
/docs/assets/images/logo-square.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/assets/images/logo-square.png


--------------------------------------------------------------------------------
/docs/assets/images/logo-square.svg:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
  2 | <svg
  3 |    width="69.82"
  4 |    height="94.089996"
  5 |    viewBox="0 0 69.82 94.089996"
  6 |    fill="none"
  7 |    version="1.1"
  8 |    id="svg39"
  9 |    sodipodi:docname="Outlines_white dottxt.ai.svg"
 10 |    inkscape:export-filename="../projects/outlines/docs/assets/images/logo-dark-mode.png"
 11 |    inkscape:export-xdpi="137.93103"
 12 |    inkscape:export-ydpi="137.93103"
 13 |    inkscape:version="1.4 (e7c3feb1, 2024-10-09)"
 14 |    xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
 15 |    xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
 16 |    xmlns="http://www.w3.org/2000/svg"
 17 |    xmlns:svg="http://www.w3.org/2000/svg">
 18 |   <sodipodi:namedview
 19 |      id="namedview39"
 20 |      pagecolor="#ffffff"
 21 |      bordercolor="#000000"
 22 |      borderopacity="0.25"
 23 |      inkscape:showpageshadow="2"
 24 |      inkscape:pageopacity="0.0"
 25 |      inkscape:pagecheckerboard="0"
 26 |      inkscape:deskcolor="#d1d1d1"
 27 |      inkscape:zoom="0.27368421"
 28 |      inkscape:cx="171.73077"
 29 |      inkscape:cy="45.673077"
 30 |      inkscape:window-width="1416"
 31 |      inkscape:window-height="820"
 32 |      inkscape:window-x="30"
 33 |      inkscape:window-y="38"
 34 |      inkscape:window-maximized="0"
 35 |      inkscape:current-layer="g39" />
 36 |   <g
 37 |      clip-path="url(#clip0_56_2)"
 38 |      id="g39">
 39 |     <path
 40 |        d="M 15.18,39.46 V 42.5 H 54.64 V 39.46 Z"
 41 |        fill="#e35a26"
 42 |        id="path12" />
 43 |     <path
 44 |        d="m 39.46,30.3501 v -3.04 H 15.18 v 3.04 z"
 45 |        fill="#e35a26"
 46 |        id="path13" />
 47 |     <path
 48 |        d="m 48.57,27.3199 h -3.04 v 3.04 h 3.04 z"
 49 |        fill="#e35a26"
 50 |        id="path14" />
 51 |     <path
 52 |        d="m 51.6,24.28 h -3.04 v 3.04 h 3.04 z"
 53 |        fill="#e35a26"
 54 |        id="path15" />
 55 |     <path
 56 |        d="M 54.64,27.3199 H 51.6 v 3.04 h 3.04 z"
 57 |        fill="#e35a26"
 58 |        id="path16" />
 59 |     <path
 60 |        d="m 48.57,21.25 h -3.04 v 3.04 h 3.04 z"
 61 |        fill="#e35a26"
 62 |        id="path17" />
 63 |     <path
 64 |        d="M 54.64,21.25 H 51.6 v 3.04 h 3.04 z"
 65 |        fill="#e35a26"
 66 |        id="path18" />
 67 |     <path
 68 |        d="m 15.18,51.6 v 3.04 H 54.64 V 51.6 Z"
 69 |        fill="#e35a26"
 70 |        id="path19" />
 71 |     <path
 72 |        d="M 39.46,24.28 V 21.24 H 15.18 v 3.04 z"
 73 |        fill="#e35a26"
 74 |        id="path20" />
 75 |     <path
 76 |        d="m 15.18,63.74 v 3.04 h 39.46 v -3.04 z"
 77 |        fill="#e35a26"
 78 |        id="path21" />
 79 |     <path
 80 |        d="m 15.18,75.88 v 3.04 h 39.46 v -3.04 z"
 81 |        fill="#e35a26"
 82 |        id="path22" />
 83 |     <path
 84 |        d="m 15.18,6.07 h 6.07 V 3.03 H 18.21 V 0 h -6.07 v 3.04 h 3.04 v 3.04 z"
 85 |        fill="#e35a26"
 86 |        id="path23" />
 87 |     <path
 88 |        d="m 24.28,6.07 h 6.07 V 3.03 H 27.31 V 0 h -6.07 v 3.04 h 3.04 v 3.04 z"
 89 |        fill="#e35a26"
 90 |        id="path24" />
 91 |     <path
 92 |        d="m 33.39,6.07 h 3.04 v 3.04 h 9.11 V 6.07 h 3.04 V 3.03 H 45.54 V 0 h -6.07 v 3.04 h 3.04 V 6.08 H 39.47 V 3.04 H 36.43 V 0 h -6.07 v 3.04 h 3.04 v 3.04 z"
 93 |        fill="#e35a26"
 94 |        id="path25" />
 95 |     <path
 96 |        d="m 51.6,6.07 h -3.04 v 3.04 h 6.07 V 6.07 h 3.04 V 3.03 H 54.63 V 0 h -6.07 v 3.04 h 3.04 v 3.04 z"
 97 |        fill="#e35a26"
 98 |        id="path26" />
 99 |     <path
100 |        d="m 60.71,0 h -3.04 v 3.04 h 3.04 z"
101 |        fill="#e35a26"
102 |        id="path27" />
103 |     <path
104 |        d="m 15.18,6.06995 h -3.04 v 3.04 h 3.04 z"
105 |        fill="#e35a26"
106 |        id="path28" />
107 |     <path
108 |        d="m 24.29,6.06995 h -3.04 v 3.04 h 3.04 z"
109 |        fill="#e35a26"
110 |        id="path29" />
111 |     <path
112 |        d="m 33.39,6.06995 h -3.04 v 3.04 h 3.04 z"
113 |        fill="#e35a26"
114 |        id="path30" />
115 |     <path
116 |        d="m 66.77,9.11003 v -3.04 h -3.04 v -3.04 h -3.04 v 3.04 h -3.04 v 3.04 h 6.07 V 84.99 h -3.04 v 3.04 H 9.11 V 84.99 H 6.07 V 9.11003 h 3.04 v -3.04 h 3.04 v -3.04 H 6.08 v 3.04 H 3.04 v 3.04 H 0 V 84.99 h 3.04 v 6.07 h 6.07 v 3.04 h 51.6 v -3.04 h 6.07 v -6.07 h 3.04 V 9.11003 h -3.04 z"
117 |        fill="#e35a26"
118 |        id="path31" />
119 |   </g>
120 |   <defs
121 |      id="defs39">
122 |     <clipPath
123 |        id="clip0_56_2">
124 |       <rect
125 |          width="347.60001"
126 |          height="94.089996"
127 |          fill="#ffffff"
128 |          id="rect39"
129 |          x="0"
130 |          y="0" />
131 |     </clipPath>
132 |   </defs>
133 | </svg>
134 | 


--------------------------------------------------------------------------------
/docs/assets/images/normal_computing.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/assets/images/normal_computing.jpg


--------------------------------------------------------------------------------
/docs/blog/.authors.yml:
--------------------------------------------------------------------------------
1 | authors:
2 |   remilouf:
3 |     name: Remi Louf
4 |     description: author
5 |     avatar: https://avatars.githubusercontent.com/u/3885044?v=4
6 | 


--------------------------------------------------------------------------------
/docs/blog/assets/4000_stars.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/blog/assets/4000_stars.png


--------------------------------------------------------------------------------
/docs/blog/index.md:
--------------------------------------------------------------------------------
1 | # Blog
2 | 


--------------------------------------------------------------------------------
/docs/community/belonging.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/community/belonging.png


--------------------------------------------------------------------------------
/docs/community/examples.md:
--------------------------------------------------------------------------------
 1 | # Community projects and articles
 2 | 
 3 | Publishing examples and articles about Outlines are a meaningful way to contribute to the community. Here is a list of projects we are aware of. Drop us a line if we forgot yours!
 4 | 
 5 | [MMSG](https://github.com/leloykun/mmsg) is a Python library for generating interleaved text and image content in a structured format you can directly pass to downstream APIs.
 6 | 
 7 | [Multimodal Structured Generation: CVPR's 2nd MMFM Challenge Technical Report](https://arxiv.org/abs/2406.11403) shows that Structured Generation can outperform finetuning, and maybe even multimodality, in document-image understanding tasks as part of CVPR's 2nd MMFM Challenge.
 8 | 
 9 | [Chess LLM Arena](https://huggingface.co/spaces/mlabonne/chessllm) is a HuggingFace Space where you can make LLMs compete in a chess match.
10 | 
11 | [LLM Data Gen](https://huggingface.co/spaces/lhoestq/LLM_DataGen) is a HuggingFace Space that generates synthetic dataset files in JSONLines format.
12 | 
13 | [Fast, High-Fidelity LLM Decoding with Regex Constraints ](https://vivien000.github.io/blog/journal/llm-decoding-with-regex-constraints.html) presents an efficient alternative to Outlines's structured generation.
14 | 
15 | [gigax](https://github.com/GigaxGames/gigax) is an Open-Source library that allows to create real-time LLM-powered NPCs for video games.
16 | 
17 | [Improving Prompt Consistency with Structured Generations](https://huggingface.co/blog/evaluation-structured-outputs) shows how structured generation can improve consistency of evaluation runs by reducing sensitivity to changes in prompt format.
18 | 
19 | [AskNews](https://asknews.app) is a news curation service processing 300k news articles per day in a structured way, with Outlines.
20 | 


--------------------------------------------------------------------------------
/docs/community/index.md:
--------------------------------------------------------------------------------
 1 | # Community
 2 | 
 3 | ![Belonging](belonging.png)
 4 | 
 5 | Outlines exists for a community of users who believe software doesn't need to be complicated. Who share the same passion for Large Language Models but don't want to compromise on robustness. Together, we are bringing these powerful models back to the world of software.
 6 | 
 7 | ## Connect on Discord
 8 | 
 9 | The Outlines community lives on our Discord server. There you can ask questions, share ideas or just chat with people like you. Don't be a stranger and [join us][discord].
10 | 
11 | 
12 | [discord]: https://discord.gg/UppQmhEpe8
13 | 


--------------------------------------------------------------------------------
/docs/community/versioning.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Versioning Guide
 3 | ---
 4 | 
 5 | # Versioning Guide
 6 | 
 7 | 
 8 | The Outlines project follows a structured versioning scheme designed to provide clarity and minimize risk for downstream dependents.
 9 | 
10 | Each part of the version number (`major.minor.patch`) conveys information about the nature and impact of the changes included in the release.
11 | 
12 | - **Major Releases** includes compatibility-breaking changes to core interfaces, such as `LogitsProcessor`s and `Guides`.
13 | - **Minor Releases** introduce changes of substance to internal or unexposed functionality. These changes are well tested and intended to maintain compatability with existing use of core interfaces.
14 | - **Patch Releases** address bug fixes and incorporate low-risk changes to improve stability and performance.
15 | 
16 | ## Releases
17 | 
18 | Releases along with release notes can be found on the [Outlines Releases GitHub Page](https://github.com/dottxt-ai/outlines/releases).
19 | 
20 | ## Version Pinning Recommendations
21 | 
22 | Here are our recommendations for managing dependencies on the Outlines package:
23 | 
24 | **Small, Risk-Tolerant Projects:** Pin to a specific major version.
25 | 
26 | **Large, Conservative Projects:** Pin to a specific minor version.
27 | 


--------------------------------------------------------------------------------
/docs/cookbook/classification.md:
--------------------------------------------------------------------------------
  1 | # Classification
  2 | 
  3 | Classification is a classic problem in NLP and finds many applications: spam detection, sentiment analysis, triaging of incoming requests, etc. We will use the example of a company that wants to sort support requests between those that require immediate attention (`URGENT`), those that can wait a little (`STANDARD`). You could easily extend the example by adding new labels.
  4 | 
  5 | 
  6 | This tutorial shows how one can implement multi-label classification using Outlines. We will use two functionalities of the library: `generate.choice` and `generate.json`.
  7 | 
  8 | As always, we start with initializing the model. Since we are GPU poor we will be using a quantized version of Mistal-7B-v0.1:
  9 | 
 10 | ```python
 11 | import outlines
 12 | 
 13 | model = outlines.models.transformers("TheBloke/Mistral-7B-OpenOrca-AWQ", device="cuda")
 14 | ```
 15 | 
 16 | We will use the following prompt template:
 17 | 
 18 | ```python
 19 | from outlines import Template
 20 | 
 21 | 
 22 | customer_support = Template.from_string(
 23 |     """You are an experienced customer success manager.
 24 | 
 25 |     Given a request from a client, you need to determine when the
 26 |     request is urgent using the label "URGENT" or when it can wait
 27 |     a little with the label "STANDARD".
 28 | 
 29 |     # Examples
 30 | 
 31 |     Request: "How are you?"
 32 |     Label: STANDARD
 33 | 
 34 |     Request: "I need this fixed immediately!"
 35 |     Label: URGENT
 36 | 
 37 |     # TASK
 38 | 
 39 |     Request: {{ request }}
 40 |     Label: """
 41 | )
 42 | ```
 43 | 
 44 | ## Choosing between multiple choices
 45 | 
 46 | Outlines provides a shortcut to do multi-label classification, using the `outlines.generate.choice` function to initialize a generator. Outlines uses multinomial sampling by default, here we will use the greedy sampler to get the label with the highest probability:
 47 | 
 48 | ```python
 49 | from outlines.samplers import greedy
 50 | 
 51 | generator = outlines.generate.choice(model, ["URGENT", "STANDARD"], sampler=greedy())
 52 | ```
 53 | Outlines supports batched requests, so we will pass two requests to the model:
 54 | 
 55 | ```python
 56 | requests = [
 57 |     "My hair is one fire! Please help me!!!",
 58 |     "Just wanted to say hi"
 59 | ]
 60 | 
 61 | prompts = [customer_support(request) for request in requests]
 62 | ```
 63 | 
 64 | We can now asks the model to classify the requests:
 65 | 
 66 | ```python
 67 | labels = generator(prompts)
 68 | print(labels)
 69 | # ['URGENT', 'STANDARD']
 70 | ```
 71 | 
 72 | Now, you might be in a hurry and don't want to wait until the model finishes completion. After all, you only need to see the first letter of the response to know whether the request is urgent or standard. You can instead stream the response:
 73 | 
 74 | ```python
 75 | tokens = generator.stream(prompts)
 76 | labels = ["URGENT" if "U" in token else "STANDARD" for token in next(tokens)]
 77 | print(labels)
 78 | # ['URGENT', 'STANDARD']
 79 | ```
 80 | 
 81 | ## Using JSON-structured generation
 82 | 
 83 | Another (convoluted) way to do multi-label classification is to JSON-structured generation in Outlines. We first need to define our Pydantic schema that contains the labels:
 84 | 
 85 | ```python
 86 | from enum import Enum
 87 | from pydantic import BaseModel
 88 | 
 89 | 
 90 | class Label(str, Enum):
 91 |     urgent = "URGENT"
 92 |     standard = "STANDARD"
 93 | 
 94 | 
 95 | class Classification(BaseModel):
 96 |     label: Label
 97 | ```
 98 | 
 99 | and we can use `generate.json` by passing this Pydantic model we just defined, and call the generator:
100 | 
101 | ```python
102 | generator = outlines.generate.json(model, Classification, sampler=greedy())
103 | labels = generator(prompts)
104 | print(labels)
105 | # [Classification(label=<Label.urgent: 'URGENT'>), Classification(label=<Label.standard: 'STANDARD'>)]
106 | ```
107 | 


--------------------------------------------------------------------------------
/docs/cookbook/deploy-using-cerebrium.md:
--------------------------------------------------------------------------------
  1 | # Run Outlines using Cerebrium
  2 | 
  3 | [Cerebrium](https://www.cerebrium.ai/) is a serverless AI infrastructure platform that makes it easier for companies to build and deploy AI based applications. They offer Serverless GPU's with low cold start times with over 12 varieties of GPU chips that auto scale and you only pay for the compute you use.
  4 | 
  5 | In this guide we will show you how you can use Cerebrium to run programs written with Outlines on GPUs in the cloud.
  6 | 
  7 | # Setup Cerebrium
  8 | 
  9 | First, we install Cerebrium and login to get authenticated.
 10 | 
 11 | ```bash
 12 | pip install cerebrium
 13 | cerebrium login
 14 | ```
 15 | 
 16 | Then let us create our first project
 17 | 
 18 | ```bash
 19 | cerebrium init outlines-project
 20 | ```
 21 | 
 22 | ## Setup Environment and Hardware
 23 | 
 24 | You set up your environment and hardware in the cerebrium.toml file that was created using the init function above.
 25 | 
 26 | ```toml
 27 | [cerebrium.deployment]
 28 | docker_base_image_url = "nvidia/cuda:12.1.1-runtime-ubuntu22.04"
 29 | 
 30 | [cerebrium.hardware]
 31 | cpu = 2
 32 | memory = 14.0
 33 | gpu = "AMPERE A10"
 34 | gpu_count = 1
 35 | provider = "aws"
 36 | region = "us-east-1"
 37 | 
 38 | [cerebrium.dependencies.pip]
 39 | outline = "==0.0.37"
 40 | transformers = "==4.38.2"
 41 | datasets = "==2.18.0"
 42 | accelerate = "==0.27.2"
 43 | ```
 44 | 
 45 | ## Setup inference
 46 | 
 47 | Running code in Cerebrium is like writing normal python with no special syntax. In a `main.py` file specify the following:
 48 | 
 49 | ```python
 50 | import outlines
 51 | 
 52 | 
 53 | model = outlines.models.transformers("microsoft/Phi-3-mini-4k-instruct")
 54 | 
 55 | schema = """{
 56 |     "title": "Character",
 57 |     "type": "object",
 58 |     "properties": {
 59 |         "name": {
 60 |             "title": "Name",
 61 |             "maxLength": 10,
 62 |             "type": "string"
 63 |         },
 64 |         "age": {
 65 |             "title": "Age",
 66 |             "type": "integer"
 67 |         },
 68 |         "armor": {"$ref": "#/definitions/Armor"},
 69 |         "weapon": {"$ref": "#/definitions/Weapon"},
 70 |         "strength": {
 71 |             "title": "Strength",
 72 |             "type": "integer"
 73 |         }
 74 |     },
 75 |     "required": ["name", "age", "armor", "weapon", "strength"],
 76 |     "definitions": {
 77 |         "Armor": {
 78 |             "title": "Armor",
 79 |             "description": "An enumeration.",
 80 |             "enum": ["leather", "chainmail", "plate"],
 81 |             "type": "string"
 82 |         },
 83 |         "Weapon": {
 84 |             "title": "Weapon",
 85 |             "description": "An enumeration.",
 86 |             "enum": ["sword", "axe", "mace", "spear", "bow", "crossbow"],
 87 |             "type": "string"
 88 |         }
 89 |     }
 90 | }"""
 91 | 
 92 | generator = outlines.generate.json(model, schema)
 93 | ```
 94 | 
 95 | On first deploy, it will download the model and store it on disk therefore for subsequent calls it will load the model from disk.
 96 | 
 97 | Every function in Cerebrium is callable through an API endpoint. Code at the top most layer (ie: not in a function) is instantiated only when the container is spun up the first time so for subsequent calls, it will simply run the code defined in the function you call.
 98 | 
 99 | To deploy an API that creates a new character when called with a prompt you can add the following code to `main.py`:
100 | 
101 | ```python
102 | def generate(
103 |     prompt: str = "Amiri, a 53 year old warrior woman with a sword and leather armor.",
104 | ):
105 | 
106 |     character = generator(
107 |         f"<s>[INST]Give me a character description. Describe {prompt}.[/INST]"
108 |     )
109 | 
110 |     return character
111 | ```
112 | 
113 | 
114 | ## Run on the cloud
115 | 
116 | ```bash
117 | cerebrium deploy
118 | ```
119 | 
120 | You will see your application deploy, install pip packages and download the model. Once completed it will output a CURL request you can use to call your endpoint. Just remember to end
121 | the url with the function you would like to call - in this case /generate. You should see your response returned!
122 | 


--------------------------------------------------------------------------------
/docs/cookbook/extract_event_details.md:
--------------------------------------------------------------------------------
 1 | This recipe demonstrates how to use the `outlines` library to extract structured event details from a text message.
 2 | We will extract the title, location, and start date and time from messages like the following:
 3 | 
 4 | ```plaintext
 5 | Hello Kitty, my grandmother will be here, I think it's better to postpone
 6 | our appointment to review math lessons to next Monday at 2pm at the same
 7 | place, 3 avenue des tanneurs, one hour will be enough see you 😘
 8 | ```
 9 | 
10 | Let see how to extract the event details from the message with the MLX
11 | library dedicated to  Apple Silicon processor  (M series).
12 | 
13 | ```python
14 | --8<-- "docs/cookbook/extract_event_details.py"
15 | ```
16 | 
17 | The output will be:
18 | 
19 | ```plaintext
20 | Today: Saturday 16 November 2024 and it's 10:55
21 | ```
22 | 
23 | and the extracted event information will be:
24 | 
25 | ```json
26 | {
27 |   "title":"Math Review",
28 |   "location":"3 avenue des tanneurs",
29 |   "start":"2024-11-22T14:00:00Z"
30 | }
31 | ```
32 | 
33 | 
34 | To find out more about this use case, we recommend the project developped by [Joseph Rudoler](https://x.com/JRudoler) the [ICS Generator](https://github.com/jrudoler/ics-generator)
35 | 


--------------------------------------------------------------------------------
/docs/cookbook/extract_event_details.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | 
 3 | from pydantic import BaseModel, Field
 4 | 
 5 | from outlines import generate, models
 6 | 
 7 | # Load the model
 8 | model = models.mlxlm("mlx-community/Hermes-3-Llama-3.1-8B-8bit")
 9 | 
10 | 
11 | # Define the event schema using Pydantic
12 | class Event(BaseModel):
13 |     title: str = Field(description="title of the event")
14 |     location: str
15 |     start: datetime = Field(
16 |         default=None, description="date of the event if available in iso format"
17 |     )
18 | 
19 | 
20 | # Get the current date and time
21 | now = datetime.now().strftime("%A %d %B %Y and it's %H:%M")
22 | 
23 | # Define the prompt
24 | prompt = f"""
25 | Today's date and time are {now}
26 | Given a user message, extract information of the event like date and time in iso format, location and title.
27 | If the given date is relative, think step by step to find the right date.
28 | Here is the message:
29 | """
30 | 
31 | # Sample message
32 | message = """Hello Kitty, my grandmother will be here , I think it's better to postpone our
33 | appointment to review math lessons to next Friday at 2pm at the same place, 3 avenue des tanneurs, I think that one hour will be enough
34 | see you 😘 """
35 | 
36 | # Create the generator
37 | generator = generate.json(model, Event)
38 | 
39 | # Extract the event information
40 | event = generator(prompt + message)
41 | 
42 | # Print the current date and time
43 | print(f"Today: {now}")
44 | 
45 | # Print the extracted event information in JSON format
46 | print(event.json())
47 | 


--------------------------------------------------------------------------------
/docs/cookbook/extraction.md:
--------------------------------------------------------------------------------
 1 | # Named entity extraction
 2 | 
 3 | Named Entity Extraction is a fundamental problem in NLP. It involves identifying and categorizing named entities within a document: people, organization, dates, places, etc. It is usually the first step in a more complex NLP worklow. Here we will use the example of a pizza restaurant that receives orders via their website and need to identify the number and types of pizzas that are being ordered.
 4 | 
 5 | Getting LLMs to output the extracted entities in a structured format can be challenging. In this tutorial we will see how we can use Outlines' JSON-structured generation to extract entities from a document and return them in a valid JSON data structure 100% of the time.
 6 | 
 7 | As always, we start with initializing the model. We will be using a quantized version of Mistal-7B-v0.1 (we're GPU poor):
 8 | 
 9 | ```python
10 | import outlines
11 | 
12 | model = outlines.models.transformers("TheBloke/Mistral-7B-OpenOrca-AWQ", device="cuda")
13 | ```
14 | 
15 | And we will be using the following prompt template:
16 | 
17 | ```python
18 | from outlines import Template
19 | 
20 | 
21 | take_order = Template.from_string(
22 |     """You are the owner of a pizza parlor. Customers \
23 |     send you orders from which you need to extract:
24 | 
25 |     1. The pizza that is ordered
26 |     2. The number of pizzas
27 | 
28 |     # EXAMPLE
29 | 
30 |     ORDER: I would like one Margherita pizza
31 |     RESULT: {"pizza": "Margherita", "number": 1}
32 | 
33 |     # OUTPUT INSTRUCTIONS
34 | 
35 |     Answer in valid JSON. Here are the different objects relevant for the output:
36 | 
37 |     Order:
38 |         pizza (str): name of the pizza
39 |         number (int): number of pizzas
40 | 
41 |     Return a valid JSON of type "Order"
42 | 
43 |     # OUTPUT
44 | 
45 |     ORDER: {{ order }}
46 |     RESULT: """
47 | )
48 | ```
49 | 
50 | We now define our data model using Pydantic:
51 | 
52 | ```python
53 | from enum import Enum
54 | from pydantic import BaseModel
55 | 
56 | class Pizza(str, Enum):
57 |     margherita = "Margherita"
58 |     pepperonni = "Pepperoni"
59 |     calzone = "Calzone"
60 | 
61 | class Order(BaseModel):
62 |     pizza: Pizza
63 |     number: int
64 | ```
65 | 
66 | We can now define our generator and call it on several incoming orders:
67 | 
68 | ```python
69 | orders = [
70 |     "Hi! I would like to order two pepperonni pizzas and would like them in 30mins.",
71 |     "Is it possible to get 12 margheritas?"
72 | ]
73 | prompts = [take_order(order) for order in orders]
74 | 
75 | generator = outlines.generate.json(model, Order)
76 | 
77 | results = generator(prompts)
78 | print(results)
79 | # [Order(pizza=<Pizza.pepperonni: 'Pepperoni'>, number=2),
80 | #  Order(pizza=<Pizza.margherita: 'Margherita'>, number=12)]
81 | ```
82 | 
83 | There are several ways you could improve this example:
84 | 
85 | - Clients may order several types of pizzas.
86 | - Clients may order drinks as well.
87 | - If the pizza place has a delivery service we need to extract the client's address and phone number
88 | - Clients may specify the time for which they want the pizza. We could then check against a queuing system and reply to them with the estimated delivery time.
89 | 
90 | How would you change the Pydantic model to account for these use cases?
91 | 


--------------------------------------------------------------------------------
/docs/cookbook/images/chain_of_density.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/cookbook/images/chain_of_density.png


--------------------------------------------------------------------------------
/docs/cookbook/images/coding_structure_diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/cookbook/images/coding_structure_diagram.png


--------------------------------------------------------------------------------
/docs/cookbook/images/knowledge-graph-extraction.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/cookbook/images/knowledge-graph-extraction.png


--------------------------------------------------------------------------------
/docs/cookbook/images/nvidia-income.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/cookbook/images/nvidia-income.png


--------------------------------------------------------------------------------
/docs/cookbook/images/simtom.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/cookbook/images/simtom.png


--------------------------------------------------------------------------------
/docs/cookbook/images/trader-joes-receipt.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/cookbook/images/trader-joes-receipt.jpg


--------------------------------------------------------------------------------
/docs/cookbook/index.md:
--------------------------------------------------------------------------------
 1 | # Examples
 2 | 
 3 | This part of the documentation provides a few cookbooks that you can browse to get acquainted with the library and get some inspiration about what you could do with structured generation. Remember that you can easily change the model that is being used!
 4 | 
 5 | - [Classification](classification.md): Classify customer requests.
 6 | - [Named Entity Extraction](extraction.md): Extract information from pizza orders.
 7 | - [Dating Profile](dating_profiles.md): Build dating profiles from descriptions using prompt templating and JSON-structured generation.
 8 | - [Chain Of Density](chain_of_density.md): Summarize documents using chain of density prompting and JSON-structured generation.
 9 | - [Playing Chess](models_playing_chess.md): Make Phi-3 Mini play chess against itself using regex-structured generation.
10 | - [SimToM](simtom.md): Improve LLMs' Theory of Mind capabilities with perspective-taking prompting and JSON-structured generation.
11 | - [Q&A with Citations](qa-with-citations.md): Answer questions and provide citations using JSON-structured generation.
12 | - [Knowledge Graph Generation](knowledge_graph_extraction.md): Generate a Knowledge Graph from unstructured text using JSON-structured generation.
13 | - [Chain Of Thought (CoT)](chain_of_thought.md): Generate a series of intermediate reasoning steps using regex-structured generation.
14 | - [ReAct Agent](react_agent.md): Build an agent with open weights models using regex-structured generation.
15 | - [Earnings reports to CSV](earnings-reports.md): Extract data from earnings reports to CSV using regex-structured generation.
16 | - [Vision-Language Models](atomic_caption.md): Use Outlines with vision-language models for tasks like image captioning and visual reasoning.
17 | - [Receipt Digitization](receipt-digitization.md): Extract information from a picture of a receipt using structured generation.
18 | - [Structured Generation from PDFs](read-pdfs.md): Use Outlines with vision-language models to read PDFs and produce structured output.
19 | 


--------------------------------------------------------------------------------
/docs/cookbook/models_playing_chess.md:
--------------------------------------------------------------------------------
 1 | # Large language models playing chess
 2 | 
 3 | In this example we will make a Phi-2 model play chess against itself. On its own the model easily generates invalid moves, so we will give it a little help. At each step we will generate a regex that only matches valid move, and use it to help the model only generating valid moves.
 4 | 
 5 | ## The chessboard
 6 | 
 7 | The game will be played on a standard checkboard. We will use the `chess` [library](https://github.com/niklasf/python-chess) to track the opponents' moves, and check that the moves are valid.
 8 | 
 9 | ```python
10 | %pip install outlines -q
11 | %pip install chess -q
12 | %pip install transformers accelerate einops -q
13 | 
14 | import chess
15 | 
16 | board = chess.Board("rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1")
17 | ```
18 | 
19 | ## The opponents
20 | 
21 | Phi-2 will be playing against itself:
22 | 
23 | ```python
24 | from outlines import models
25 | 
26 | model = models.transformers("microsoft/Phi-3-mini-4k-instruct")
27 | 
28 | ```
29 | 
30 | ## A little help for the language model
31 | 
32 | To make sure Phi-2 generates valid chess moves we will use Outline's regex-structured generation. We define a function that takes the current state of the board and returns a regex that matches all possible legal moves:
33 | 
34 | ```python
35 | import re
36 | 
37 | def legal_moves_regex(board):
38 |     """Build a regex that only matches valid moves."""
39 |     legal_moves = list(board.legal_moves)
40 |     legal_modes_str = [board.san(move) for move in legal_moves]
41 |     legal_modes_str = [re.sub(r"[+#]", "", move) for move in legal_modes_str]
42 |     regex_pattern = "|".join(re.escape(move) for move in legal_modes_str)
43 |     regex_pattern = f"{regex_pattern}"
44 |     return regex_pattern
45 | ```
46 | 
47 | ## Prompting the language model
48 | 
49 | The prompt corresponds to the current state of the board, so we start with:
50 | 
51 | ```python
52 | prompt = "Let's play Chess. Moves: "
53 | 
54 | ```
55 | 
56 | We update the prompt at each step so it reflects the state of the board after the previous move.
57 | 
58 | ## Let's play
59 | 
60 | ```python
61 | from outlines import generate
62 | 
63 | board_state = " "
64 | turn_number = 0
65 | while not board.is_game_over():
66 |     regex_pattern = legal_moves_regex(board)
67 |     structured = generate.regex(model, regex_pattern)(prompt + board_state)
68 |     move = board.parse_san(structured)
69 | 
70 |     if turn_number % 2 == 0 :  # It's White's turn
71 |         board_state += board.san(move) + " "
72 |     else:
73 |         board_state += board.san(move) + " " + str(turn_number) + "."
74 | 
75 |     turn_number += 1
76 | 
77 |     board.push(move)
78 | 
79 |     print(board_state)
80 | ```
81 | 
82 | Interestingly enough, Phi-2 hates capturing.
83 | 
84 | ```pgn
85 |  e4 e5 1.Nf3 Ne7 3.b4 Nf5 5.Nc3 Ne7 7.Bb5 a6 9.Na4 b6 11.c3 Nec6 13.c4 a5 15.d4 Qg5 17.Nd2 Bb7 19.dxe5
86 | ```
87 | 
88 | *This example was originally authored by [@903124S](https://x.com/903124S) in [this gist](https://gist.github.com/903124/cfbefa24da95e2316e0d5e8ef8ed360d).*
89 | 


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Outlines
3 | template: home.html # Note that this is managed in overrides/home.html
4 | hide:
5 |   - navigation
6 |   - toc
7 |   - feedback
8 | ---
9 | 


--------------------------------------------------------------------------------
/docs/installation.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Installation
 3 | ---
 4 | 
 5 | # Installation
 6 | 
 7 | You can install Outlines with `pip`:
 8 | 
 9 | ```sh
10 | pip install outlines
11 | ```
12 | 
13 | Outlines supports OpenAI, Transformers, Mamba, llama.cpp, and ExLlamaV2, but **you will need to install them manually**:
14 | 
15 | ```sh
16 | pip install openai
17 | pip install transformers datasets accelerate torch
18 | pip install llama-cpp-python
19 | pip install exllamav2 transformers torch
20 | pip install mamba_ssm transformers torch
21 | pip install vllm
22 | ```
23 | 
24 | If you encounter any problems using Outlines with these libraries, take a look at their installation instructions. The installation of `openai` and `transformers` should be straightforward, but other libraries have specific hardware requirements.
25 | 
26 | ## Optional Dependencies
27 | 
28 | Outlines provides multiple optional dependency sets to support different backends and use cases. You can install them as needed using:
29 | 
30 | - `pip install "outlines[vllm]"` for [vLLM](https://github.com/vllm-project/vllm), optimized for high-throughput inference.
31 | - `pip install "outlines[transformers]"` for [Hugging Face Transformers](https://huggingface.co/docs/transformers/index).
32 | - `pip install "outlines[mlx]"` for [MLX-LM](https://github.com/ml-explore/mlx-lm), optimized for Apple silicon.
33 | - `pip install "outlines[openai]"` to use OpenAI’s API.
34 | - `pip install "outlines[llamacpp]"` for [llama.cpp](https://github.com/ggerganov/llama.cpp), a lightweight LLM inference engine.
35 | - `pip install "outlines[exllamav2]"` for [ExLlamaV2](https://github.com/turboderp/exllamav2), optimized for NVIDIA GPUs.
36 | 
37 | ## Bleeding Edge
38 | 
39 | You can install the latest version of Outlines from the repository's `main` branch:
40 | 
41 | ```sh
42 | pip install git+https://github.com/dottxt-ai/outlines.git@main
43 | ```
44 | 
45 | This can be useful, for instance, when a fix has been merged but not yet released.
46 | 
47 | ## Installing for Development
48 | 
49 | See the [contributing documentation](community/contribute.md) for instructions on how to install Outlines for development, including an example using the `dot-install` method for one of the backends.
50 | 


--------------------------------------------------------------------------------
/docs/licence.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Licence
 3 | ---
 4 | 
 5 | # Licence and citations
 6 | 
 7 | Outlines is licenced under the Apache 2.0 licence. To comply with the licence you need to add the following notice at the top every file that uses part of Outlines' code:
 8 | 
 9 | ```
10 | Copyright 2023- The Outlines developers
11 | 
12 | Licensed under the Apache License, Version 2.0 (the "License");
13 | you may not use this file except in compliance with the License.
14 | You may obtain a copy of the License at
15 | 
16 |     http://www.apache.org/licenses/LICENSE-2.0
17 | 
18 | Unless required by applicable law or agreed to in writing, software
19 | distributed under the License is distributed on an "AS IS" BASIS,
20 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21 | See the License for the specific language governing permissions and
22 | limitations under the License.
23 | ```
24 | 
25 | If you use Outlines in your work you can use the following citation:
26 | 
27 | ```
28 | @article{willard2023efficient,
29 |   title={Efficient Guided Generation for LLMs},
30 |   author={Willard, Brandon T and Louf, R{\'e}mi},
31 |   journal={arXiv preprint arXiv:2307.09702},
32 |   year={2023}
33 | }
34 | ```
35 | 


--------------------------------------------------------------------------------
/docs/logos/amazon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/logos/amazon.png


--------------------------------------------------------------------------------
/docs/logos/apple.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/logos/apple.png


--------------------------------------------------------------------------------
/docs/logos/best_buy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/logos/best_buy.png


--------------------------------------------------------------------------------
/docs/logos/canoe.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/logos/canoe.png


--------------------------------------------------------------------------------
/docs/logos/cisco.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/logos/cisco.png


--------------------------------------------------------------------------------
/docs/logos/dassault_systems.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/logos/dassault_systems.png


--------------------------------------------------------------------------------
/docs/logos/databricks.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/logos/databricks.png


--------------------------------------------------------------------------------
/docs/logos/datadog.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/logos/datadog.png


--------------------------------------------------------------------------------
/docs/logos/dbt_labs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/logos/dbt_labs.png


--------------------------------------------------------------------------------
/docs/logos/gladia.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/logos/gladia.jpg


--------------------------------------------------------------------------------
/docs/logos/harvard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/logos/harvard.png


--------------------------------------------------------------------------------
/docs/logos/hf.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/logos/hf.png


--------------------------------------------------------------------------------
/docs/logos/johns_hopkins.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/logos/johns_hopkins.png


--------------------------------------------------------------------------------
/docs/logos/meta.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/logos/meta.png


--------------------------------------------------------------------------------
/docs/logos/mit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/logos/mit.png


--------------------------------------------------------------------------------
/docs/logos/mount_sinai.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/logos/mount_sinai.png


--------------------------------------------------------------------------------
/docs/logos/nvidia.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/logos/nvidia.png


--------------------------------------------------------------------------------
/docs/logos/nyu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/logos/nyu.png


--------------------------------------------------------------------------------
/docs/logos/safran.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/logos/safran.png


--------------------------------------------------------------------------------
/docs/logos/salesforce.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/logos/salesforce.png


--------------------------------------------------------------------------------
/docs/logos/shopify.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/logos/shopify.png


--------------------------------------------------------------------------------
/docs/logos/smithsonian.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/logos/smithsonian.png


--------------------------------------------------------------------------------
/docs/logos/tinder.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/logos/tinder.png


--------------------------------------------------------------------------------
/docs/logos/upenn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/docs/logos/upenn.png


--------------------------------------------------------------------------------
/docs/overrides/main.html:
--------------------------------------------------------------------------------
1 | {% extends "base.html" %}
2 | 


--------------------------------------------------------------------------------
/docs/reference/chat_templating.md:
--------------------------------------------------------------------------------
 1 | # Chat templating
 2 | 
 3 | Instruction-tuned language models use "special tokens" to indicate different parts of text, such as the system prompt, the user prompt, any images, and the assistant's response. A [chat template](https://huggingface.co/docs/transformers/main/en/chat_templating) is how different types of input are composited together into a single, machine-readable string.
 4 | 
 5 | Outlines does not manage chat templating tokens when using instruct models. You must apply the chat template tokens to the prompt yourself -- if you do not apply chat templating on instruction-tuned models, you will often get nonsensical output from the model.
 6 | 
 7 | Chat template tokens are not needed for base models.
 8 | 
 9 | You can find the chat template tokens in the model's HuggingFace repo or documentation. As an example, the `SmolLM2-360M-Instruct` special tokens can be found [here](https://huggingface.co/HuggingFaceTB/SmolLM2-360M-Instruct/blob/main/special_tokens_map.json).
10 | 
11 | However, it can be slow to manually look up a model's special tokens, and special tokens vary by models. If you change the model, your prompts may break if you have hard-coded special tokens.
12 | 
13 | If you need a convenient tool to apply chat templating for you, you should use the `tokenizer` from the `transformers` library:
14 | 
15 | ```python
16 | from transformers import AutoTokenizer
17 | 
18 | tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/SmolLM2-360M-Instruct")
19 | prompt = tokenizer.apply_chat_template(
20 |     [
21 |         {"role": "system", "content": "You extract information from text."},
22 |         {"role": "user", "content": "What food does the following text describe?"},
23 |     ],
24 |     tokenize=False,
25 |     add_bos=True,
26 |     add_generation_prompt=True,
27 | )
28 | ```
29 | 
30 | yields
31 | 
32 | ```
33 | <|im_start|>system
34 | You extract information from text.<|im_end|>
35 | <|im_start|>user
36 | What food does the following text describe?<|im_end|>
37 | <|im_start|>assistant
38 | ```
39 | 


--------------------------------------------------------------------------------
/docs/reference/generation/choices.md:
--------------------------------------------------------------------------------
 1 | # Multiple choices
 2 | 
 3 | Oultines allows you to make sure the generated text is chosen between different options:
 4 | 
 5 | ```python
 6 | from outlines import models, generate
 7 | 
 8 | model = models.transformers("microsoft/Phi-3-mini-4k-instruct")
 9 | generator = generate.choice(model, ["skirt", "dress", "pen", "jacket"])
10 | answer = generator("Pick the odd word out: skirt, dress, pen, jacket")
11 | 
12 | ```
13 | 
14 | !!! Note "Performance"
15 | 
16 |     `generation.choice` computes an index that helps Outlines guide generation. This can take some time, but only needs to be done once. If you want to generate from the same list of choices several times make sure that you only call `generate.choice` once.
17 | 


--------------------------------------------------------------------------------
/docs/reference/generation/custom_fsm_ops.md:
--------------------------------------------------------------------------------
 1 | # Custom FSM Operations
 2 | 
 3 | Outlines is fast because it compiles regular expressions into an index ahead of inference. To do so we use the equivalence between regular expressions and Finite State Machines (FSMs), and the library [interegular](https://github.com/MegaIng/interegular) to perform the translation.
 4 | 
 5 | Alternatively, one can pass a FSM built using `integular` directly to structure the generation.
 6 | 
 7 | ## Example
 8 | 
 9 | ### Using the `difference` operation
10 | 
11 | In the following example we build a fsm which recognizes only the strings valid to the first regular expression but not the second. In particular, it will prevent the words "pink" and "elephant" from being generated:
12 | 
13 | ```python
14 | import interegular
15 | from outlines import models, generate
16 | 
17 | 
18 | list_of_strings_pattern = """\["[^"\s]*"(?:,"[^"\s]*")*\]"""
19 | pink_elephant_pattern = """.*(pink|elephant).*"""
20 | 
21 | list_of_strings_fsm = interegular.parse_pattern(list_of_strings_pattern).to_fsm()
22 | pink_elephant_fsm = interegular.parse_pattern(pink_elephant_pattern).to_fsm()
23 | 
24 | difference_fsm = list_of_strings_fsm - pink_elephant_fsm
25 | 
26 | difference_fsm_fsm.accepts('["a","pink","elephant"]')
27 | # False
28 | difference_fsm_fsm.accepts('["a","blue","donkey"]')
29 | # True
30 | 
31 | 
32 | model = models.transformers("microsoft/Phi-3-mini-4k-instruct")
33 | generator = generate.fsm(model, difference_fsm)
34 | response = generator("Don't talk about pink elephants")
35 | ```
36 | 
37 | To see the other operations available, consult [interegular's documentation](https://github.com/MegaIng/interegular/blob/master/interegular/fsm.py).
38 | 


--------------------------------------------------------------------------------
/docs/reference/generation/format.md:
--------------------------------------------------------------------------------
 1 | # Type constraints
 2 | 
 3 | We can ask completions to be restricted to valid python types:
 4 | 
 5 | ```python
 6 | from outlines import models, generate
 7 | 
 8 | model = models.transformers("microsoft/Phi-3-mini-4k-instruct")
 9 | generator = generate.format(model, int)
10 | answer = generator("When I was 6 my sister was half my age. Now I’m 70 how old is my sister?")
11 | print(answer)
12 | # 67
13 | ```
14 | 
15 | The following types are currently available:
16 | 
17 | - int
18 | - float
19 | - bool
20 | - datetime.date
21 | - datetime.time
22 | - datetime.datetime
23 | - We also provide [custom types](types.md)
24 | 


--------------------------------------------------------------------------------
/docs/reference/generation/regex.md:
--------------------------------------------------------------------------------
 1 | # Regular expressions
 2 | 
 3 | Outlines can guarantee that the text generated by the LLM will be valid to a regular expression:
 4 | 
 5 | ```python
 6 | from outlines import models, generate
 7 | 
 8 | model = models.transformers("microsoft/Phi-3-mini-4k-instruct")
 9 | 
10 | generator = generate.regex(
11 |     model,
12 |     r"((25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(25[0-5]|2[0-4]\d|[01]?\d\d?)",
13 | )
14 | 
15 | prompt = "What is the IP address of the Google DNS servers? "
16 | answer = generator(prompt, max_tokens=30)
17 | 
18 | print(answer)
19 | # What is the IP address of the Google DNS servers?
20 | # 2.2.6.1
21 | ```
22 | 
23 | If you find yourself using `generate.regex` to restrict the answers' type you can take a look at [type-structured generation](types.md) instead.
24 | 
25 | !!! Note "Performance"
26 | 
27 |     `generate.regex` computes an index that helps Outlines guide generation. This can take some time, but only needs to be done once. If you want to generate several times using the same regular expression make sure that you only call `generate.regex` once.
28 | 


--------------------------------------------------------------------------------
/docs/reference/generation/structured_generation_explanation.md:
--------------------------------------------------------------------------------
 1 | # How does Outlines work?
 2 | 
 3 | 
 4 | Language models generate text token by token, using the previous token sequence as input and sampled logits as output. This document explains the structured generation process, where only legal tokens are considered for the next step based on a predefined automata, e.g. a regex-defined [finite-state machine](https://en.wikipedia.org/wiki/Finite-state_machine) (FSM) or [Lark](https://lark-parser.readthedocs.io/en/stable/) grammar.`
 5 | 
 6 | 
 7 | ## Worked Example
 8 | 
 9 | Let's consider a worked example with a pattern for whole and decimal numbers:
10 | 
11 | `^\d*(\.\d+)?$`.
12 | 
13 | ### Creating Automata
14 | 
15 | The pattern is first converted into an automata. Below is a brief explanation of the automata conversion and its representation.
16 | 
17 | **Automata Diagram:**
18 | 
19 | ```mermaid
20 | graph LR
21 |     node0("1-9") --> node1("1-9")
22 |     node1 --> node1
23 |     node1 --> nodeEND{{END}}
24 |     node1 --> nodePeriod(".")
25 |     nodePeriod --> node2("1-9")
26 |     node2 --> node2
27 |     node2 --> nodeEND{{END}}
28 | ```
29 | 
30 | ### Generating a Token
31 | 
32 | Let's assume that we're in the middle of generation, and so far "748" has been generated. Here is the automata with the current state highlighted in green, with the legal next characters being another number (1-9), a dot (.), or end of sequence.
33 | 
34 | ```mermaid
35 | graph LR
36 |     node0("1-9") --> node1("1-9")
37 |     node1 --> node1
38 |     node1 --> nodeEND{{END}}
39 |     node1 --> nodePeriod(".")
40 |     nodePeriod --> node2("1-9")
41 |     node2 --> node2
42 |     node2 --> nodeEND{{END}}
43 | 
44 |     style node1 fill:#090
45 | ```
46 | 
47 | Generating a token requires the following steps:
48 | 
49 | - Feed the previous input sequence ("748") into the language model.
50 | - Language model runs a forward pass and produces token logits.
51 | - Outlines logits processor sets the probability of illegal tokens to 0%.
52 | - A token is sampled from the set of legal tokens.
53 | 
54 | ![Generation and Logits Processing Flow Chart](../../assets/images/logits_processing_diagram.svg)
55 | 


--------------------------------------------------------------------------------
/docs/reference/generation/types.md:
--------------------------------------------------------------------------------
 1 | # Custom types
 2 | 
 3 | Outlines provides custom Pydantic types so you can focus on your use case rather than on writing regular expressions:
 4 | 
 5 | | Category | Type | Import | Description |
 6 | |:--------:|:----:|:-------|:------------|
 7 | | ISBN | 10 & 13 | `outlines.types.ISBN` | There is no guarantee that the [check digit][wiki-isbn] will be correct |
 8 | | Airport | IATA | `outlines.types.airports.IATA` | Valid [airport IATA codes][wiki-airport-iata] |
 9 | | Country | alpha-2 code | `outlines.types.airports.Alpha2` | Valid [country alpha-2 codes][wiki-country-alpha-2] |
10 | |  | alpha-3 code | `outlines.types.countries.Alpha3` | Valid [country alpha-3 codes][wiki-country-alpha-3] |
11 | |  | numeric code | `outlines.types.countries.Numeric` | Valid [country numeric codes][wiki-country-numeric] |
12 | |  | name | `outlines.types.countries.Name` | Valid country names |
13 | |  | flag | `outlines.types.countries.Flag` | Valid flag emojis |
14 | | | email | `outlines.types.Email` | Valid email address |
15 | 
16 | Some types require localization. We currently only support US types, but please don't hesitate to create localized versions of the different types and open a Pull Request. Localized types are specified using `types.locale` in the following way:
17 | 
18 | ```python
19 | from outlines import types
20 | 
21 | types.locale("us").ZipCode
22 | types.locale("us").PhoneNumber
23 | ```
24 | 
25 | Here are the localized types that are currently available:
26 | 
27 | | Category | Locale | Import | Description |
28 | |:--------:|:----:|:-------|:------------|
29 | | Zip code | US | `ZipCode` | Generate US Zip(+4) codes |
30 | | Phone number  | US | `PhoneNumber` | Generate valid US phone numbers |
31 | 
32 | 
33 | You can use these types in Pydantic schemas for JSON-structured generation:
34 | 
35 | ```python
36 | from pydantic import BaseModel
37 | 
38 | from outlines import models, generate, types
39 | 
40 | # Specify the locale for types
41 | locale = types.locale("us")
42 | 
43 | class Client(BaseModel):
44 |     name: str
45 |     phone_number: locale.PhoneNumber
46 |     zip_code: locale.ZipCode
47 | 
48 | 
49 | model = models.transformers("microsoft/Phi-3-mini-4k-instruct")
50 | generator = generate.json(model, Client)
51 | result = generator(
52 |     "Create a client profile with the fields name, phone_number and zip_code"
53 | )
54 | print(result)
55 | # name='Tommy' phone_number='129-896-5501' zip_code='50766'
56 | ```
57 | 
58 | Or simply with `outlines.generate.format`:
59 | 
60 | ```python
61 | from pydantic import BaseModel
62 | 
63 | from outlines import models, generate, types
64 | 
65 | 
66 | model = models.transformers("microsoft/Phi-3-mini-4k-instruct")
67 | generator = generate.format(model, types.locale("us").PhoneNumber)
68 | result = generator(
69 |     "Return a US Phone number: "
70 | )
71 | print(result)
72 | # 334-253-2630
73 | ```
74 | 
75 | 
76 | We plan on adding many more custom types. If you have found yourself writing regular expressions to generate fields of a given type, or if you could benefit from more specific types don't hesite to [submit a PR](https://github.com/dottxt-ai/outlines/pulls) or [open an issue](https://github.com/dottxt-ai/outlines/issues/new/choose).
77 | 
78 | 
79 | [wiki-isbn]: https://en.wikipedia.org/wiki/ISBN#Check_digits
80 | [wiki-airport-iata]: https://en.wikipedia.org/wiki/IATA_airport_code
81 | [wiki-country-alpha-2]: https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
82 | [wiki-country-alpha-3]: https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3
83 | [wiki-country-numeric]: https://en.wikipedia.org/wiki/ISO_3166-1_numeric
84 | 


--------------------------------------------------------------------------------
/docs/reference/index.md:
--------------------------------------------------------------------------------
 1 | # Reference
 2 | 
 3 | ## Structured generation
 4 | 
 5 | While LLM capabilities are increasingly impressive, we can make their output more reliable by steering the generation. Outlines thus offers mechanisms to specify high level constraints on text completions by generative language models.
 6 | 
 7 | Stopping sequence
 8 | By default, language models stop generating tokens after and <EOS> token was generated, or after a set maximum number of tokens. Their output can be verbose, and for practical purposes it is often necessary to stop the generation after a given sequence has been found instead. You can use the stop_at keyword argument when calling the model with a prompt:
 9 | 
10 | ```python
11 | import outlines.models as models
12 | 
13 | complete = models.openai("gpt-4o-mini")
14 | expert = complete("Name an expert in quantum gravity.", stop_at=["\n", "."])
15 | ```
16 | 


--------------------------------------------------------------------------------
/docs/reference/models/exllamav2.md:
--------------------------------------------------------------------------------
 1 | # ExllamaV2
 2 | 
 3 | The `outlines.models.exllamav2` model requires a Logits Processor component for compatibility with Outlines structured generation. While ExLlamaV2 doesn't natively support this feature, a third-party fork provides the necessary functionality. You can install it with the following command:
 4 | 
 5 | ```bash
 6 | pip install git+https://github.com/lapp0/exllamav2@sampler-logits-processor
 7 | ```
 8 | 
 9 | Install other requirements:
10 | 
11 | ```bash
12 | pip install transformers torch
13 | ```
14 | 
15 | *Coming soon*
16 | 


--------------------------------------------------------------------------------
/docs/reference/models/mlxlm.md:
--------------------------------------------------------------------------------
 1 | # mlx-lm
 2 | 
 3 | Outlines provides an integration with [mlx-lm](https://github.com/ml-explore/mlx-examples/tree/main/llms), allowing models to be run quickly on Apple Silicon via the [mlx](https://ml-explore.github.io/mlx/build/html/index.html) library.
 4 | 
 5 | !!! Note "Installation"
 6 | 
 7 |     You need to install the `mlx` and `mlx-lm` libraries on a device which [supports Metal](https://support.apple.com/en-us/102894) to use the mlx-lm integration. To get started quickly you can also run:
 8 | 
 9 |     ```bash
10 |     pip install "outlines[mlxlm]"
11 |     ```
12 | 
13 | 
14 | ## Load the model
15 | 
16 | You can initialize the model by passing the name of the repository on the HuggingFace Hub. The official repository for mlx-lm supported models is [mlx-community](https://huggingface.co/mlx-community).
17 | 
18 | ```python
19 | from outlines import models
20 | 
21 | model = models.mlxlm("mlx-community/Meta-Llama-3.1-8B-Instruct-8bit")
22 | ```
23 | 
24 | This will download the model files to the hub cache folder and load the weights in memory.
25 | 
26 | The arguments `model_config` and `tokenizer_config` are available to modify loading behavior. For example, per the `mlx-lm` [documentation](https://github.com/ml-explore/mlx-examples/tree/main/llms#supported-models), you must set an eos_token for `qwen/Qwen-7B`. In outlines you may do so via
27 | 
28 | ```
29 | model = models.mlxlm(
30 |     "mlx-community/Meta-Llama-3.1-8B-Instruct-8bit",
31 |     tokenizer_config={"eos_token": "<|endoftext|>", "trust_remote_code": True},
32 | )
33 | ```
34 | 
35 | **Main parameters:**
36 | 
37 | (Subject to change. Table based on [mlx-lm.load docstring](https://github.com/ml-explore/mlx-examples/blob/main/llms/mlx_lm/utils.py#L429))
38 | 
39 | | Parameters         | Type   | Description                                                                                      | Default |
40 | |--------------------|--------|--------------------------------------------------------------------------------------------------|---------|
41 | | `tokenizer_config` | `dict` | Configuration parameters specifically for the tokenizer. Defaults to an empty dictionary.        | `{}`    |
42 | | `model_config`     | `dict` | Configuration parameters specifically for the model. Defaults to an empty dictionary.            | `{}`    |
43 | | `adapter_path`     | `str`  | Path to the LoRA adapters. If provided, applies LoRA layers to the model.                        | `None`  |
44 | | `lazy`             | `bool` | If False, evaluate the model parameters to make sure they are loaded in memory before returning. | `False` |
45 | 
46 | 
47 | ## Generate text
48 | 
49 | You may generate text using the parameters described in the [text generation documentation](../text.md).
50 | 
51 | With the loaded model, you can generate text or perform structured generation, e.g.
52 | 
53 | ```python
54 | from outlines import models, generate
55 | 
56 | model = models.mlxlm("mlx-community/Meta-Llama-3.1-8B-Instruct-8bit")
57 | generator = generate.text(model)
58 | 
59 | answer = generator("A prompt", temperature=2.0)
60 | ```
61 | 
62 | ## Streaming
63 | 
64 | You may creating a streaming iterable with minimal changes
65 | 
66 | ```python
67 | from outlines import models, generate
68 | 
69 | model = models.mlxlm("mlx-community/Meta-Llama-3.1-8B-Instruct-8bit")
70 | generator = generate.text(model)
71 | 
72 | for token_str in generator.text("A prompt", temperature=2.0):
73 |     print(token_str)
74 | ```
75 | 
76 | ## Structured
77 | 
78 | You may perform structured generation with mlxlm to guarantee your output will match a regex pattern, json schema, or lark grammar.
79 | 
80 | Example: Phone number generation with pattern `"\\+?[1-9][0-9]{7,14}"`:
81 | 
82 | ```python
83 | from outlines import models, generate
84 | 
85 | model = models.mlxlm("mlx-community/Meta-Llama-3.1-8B-Instruct-8bit")
86 | 
87 | phone_number_pattern = "\\+?[1-9][0-9]{7,14}"
88 | generator = generate.regex(model, phone_number_pattern)
89 | 
90 | model_output = generator("What's Jennys Number?\n")
91 | print(model_output)
92 | # '8675309'
93 | ```
94 | 


--------------------------------------------------------------------------------
/docs/reference/models/models.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Models
 3 | ---
 4 | 
 5 | # Models
 6 | 
 7 | Outlines supports generation using a number of inference engines (`outlines.models`). Loading a model using outlines follows a similar interface between inference engines:
 8 | 
 9 | ```python
10 | import outlines
11 | 
12 | model = outlines.models.transformers("microsoft/Phi-3-mini-128k-instruct")
13 | model = outlines.models.transformers_vision("llava-hf/llava-v1.6-mistral-7b-hf")
14 | model = outlines.models.vllm("microsoft/Phi-3-mini-128k-instruct")
15 | model = outlines.models.llamacpp(
16 |     "microsoft/Phi-3-mini-4k-instruct-gguf", "Phi-3-mini-4k-instruct-q4.gguf"
17 | )
18 | model = outlines.models.exllamav2("bartowski/Phi-3-mini-128k-instruct-exl2")
19 | model = outlines.models.mlxlm("mlx-community/Phi-3-mini-4k-instruct-4bit")
20 | 
21 | model = outlines.models.openai(
22 |     "gpt-4o-mini",
23 |     api_key=os.environ["OPENAI_API_KEY"]
24 | )
25 | ```
26 | 
27 | 
28 | # Feature Matrix
29 | |                   | [Transformers](transformers.md) | [Transformers Vision](transformers_vision.md) | [vLLM](vllm.md) | [llama.cpp](llamacpp.md) | [ExLlamaV2](exllamav2.md) | [MLXLM](mlxlm.md) | [OpenAI](openai.md)* |
30 | |-------------------|--------------|---------------------|------|-----------|-----------|-------|---------|
31 | | **Device**        |              |                     |      |           |           |       |         |
32 | | Cuda              | ✅           | ✅                  | ✅   | ✅        | ✅        | ❌    | N/A     |
33 | | Apple Silicon     | ✅           | ✅                  | ❌   | ✅        | ✅        | ✅    | N/A     |
34 | | x86 / AMD64       | ✅           | ✅                  | ❌   | ✅        | ✅        | ❌    | N/A     |
35 | | **Sampling**      |              |                     |      |           |           |       |         |
36 | | Greedy            | ✅           | ✅                  | ✅   | ✅*       | ✅        | ✅    | ❌      |
37 | | Multinomial       | ✅           | ✅                  | ✅   | ✅        | ✅        | ✅    | ✅      |
38 | | Multiple Samples  | ✅           | ✅                  |      | ❌        |           | ❌    | ✅      |
39 | | Beam Search       | ✅           | ✅                  | ✅   | ❌        | ✅        | ❌    | ❌      |
40 | | **Generation**    |              |                     |      |           |           |       |         |
41 | | Batch             | ✅           | ✅                  | ✅   | ❌        | ?         | ❌    | ❌      |
42 | | Stream            | ✅           | ❌                  | ❌   | ✅        | ?         | ✅    | ❌      |
43 | | Text              | ✅           | ✅                  | ✅   | ✅        | ✅        | ✅    | ✅      |
44 | | **Structured**    | ✅           | ✅                  | ✅   | ✅        | ✅        | ✅    | ✅      |
45 | | JSON Schema       | ✅           | ✅                  | ✅   | ✅        | ✅        | ✅    | ✅      |
46 | | Choice            | ✅           | ✅                  | ✅   | ✅        | ✅        | ✅    | ✅      |
47 | | Regex             | ✅           | ✅                  | ✅   | ✅        | ✅        | ✅    | ❌      |
48 | | Grammar           | ✅           | ✅                  | ✅   | ✅        | ✅        | ✅    | ❌      |
49 | 
50 | 
51 | ## Caveats
52 | 
53 | - OpenAI doesn't support structured generation due to limitations in their API and server implementation.
54 | - `outlines.generate` ["Structured"](../generation/generation.md) includes methods such as `outlines.generate.regex`, `outlines.generate.json`, `outlines.generate.cfg`, etc.
55 | - MLXLM only supports Apple Silicon.
56 | - llama.cpp greedy sampling available via multinomial with `temperature = 0.0`.
57 | 


--------------------------------------------------------------------------------
/docs/reference/models/tgi.md:
--------------------------------------------------------------------------------
1 | # Text-generation-inference (TGI)
2 | 
3 | TGI uses Outlines to provide structured generation, see [their documentation](https://huggingface.co/docs/text-generation-inference/en/basic_tutorials/using_guidance).
4 | 


--------------------------------------------------------------------------------
/docs/reference/models/transformers_vision.md:
--------------------------------------------------------------------------------
  1 | # Transformers Vision
  2 | 
  3 | Outlines allows seamless use of [vision models](https://huggingface.co/learn/computer-vision-course/en/unit4/multimodal-models/tasks-models-part1).
  4 | 
  5 | `outlines.models.transformers_vision` shares interfaces with, and is based on [outlines.models.transformers](./transformers.md).
  6 | 
  7 | Tasks supported include
  8 | 
  9 | - image + text -> text
 10 | - video + text -> text
 11 | 
 12 | 
 13 | 
 14 | ## Example: Using [Llava-Next](https://huggingface.co/docs/transformers/en/model_doc/llava_next) Vision Models
 15 | 
 16 | Install dependencies
 17 | `pip install torchvision pillow flash-attn`
 18 | 
 19 | Create the model
 20 | ```python
 21 | import outlines
 22 | from transformers import LlavaNextForConditionalGeneration
 23 | 
 24 | model = outlines.models.transformers_vision(
 25 |     "llava-hf/llava-v1.6-mistral-7b-hf",
 26 |     model_class=LlavaNextForConditionalGeneration,
 27 | 	device="cuda",
 28 | )
 29 | ```
 30 | 
 31 | Create convenience function to load a `PIL.Image` from URL
 32 | ```python
 33 | from PIL import Image
 34 | from io import BytesIO
 35 | from urllib.request import urlopen
 36 | 
 37 | def img_from_url(url):
 38 |     img_byte_stream = BytesIO(urlopen(url).read())
 39 |     return Image.open(img_byte_stream).convert("RGB")
 40 | ```
 41 | 
 42 | ### Describing an image
 43 | 
 44 | ```python
 45 | description_generator = outlines.generate.text(model)
 46 | description_generator(
 47 |     "<image> detailed description:",
 48 |     [img_from_url("https://upload.wikimedia.org/wikipedia/commons/2/25/Siam_lilacpoint.jpg")]
 49 | )
 50 | ```
 51 | 
 52 | > This is a color photograph featuring a Siamese cat with striking blue eyes. The cat has a creamy coat and a light eye color, which is typical for the Siamese breed. Its features include elongated ears, a long, thin tail, and a striking coat pattern. The cat is sitting in an indoor setting, possibly on a cat tower or a similar raised platform, which is covered with a beige fabric, providing a comfortable and soft surface for the cat to rest or perch. The surface of the wall behind the cat appears to be a light-colored stucco or plaster.
 53 | 
 54 | #### Multiple Images
 55 | 
 56 | To include multiple images in your prompt you simply add more `<image>` tokens to the prompt
 57 | 
 58 | ```python
 59 | image_urls = [
 60 | 	"https://cdn1.byjus.com/wp-content/uploads/2020/08/ShapeArtboard-1-copy-3.png",  # triangle
 61 | 	"https://cdn1.byjus.com/wp-content/uploads/2020/08/ShapeArtboard-1-copy-11.png",  # hexagon
 62 | ]
 63 | description_generator = outlines.generate.text(model)
 64 | description_generator(
 65 |     "<image><image>What shapes are present?",
 66 |     list(map(img_from_url, image_urls)),
 67 | )
 68 | ```
 69 | 
 70 | > There are two shapes present. One shape is a hexagon and the other shape is an triangle. '
 71 | 
 72 | 
 73 | ### Classifying an Image
 74 | 
 75 | ```python
 76 | pattern = "Mercury|Venus|Earth|Mars|Saturn|Jupiter|Neptune|Uranus|Pluto"
 77 | planet_generator = outlines.generate.regex(model, pattern)
 78 | 
 79 | planet_generator(
 80 |     "What planet is this: <image>",
 81 |     [img_from_url("https://upload.wikimedia.org/wikipedia/commons/e/e3/Saturn_from_Cassini_Orbiter_%282004-10-06%29.jpg")]
 82 | )
 83 | ```
 84 | 
 85 | > Saturn
 86 | 
 87 | 
 88 | ### Extracting Structured Image data
 89 | 
 90 | ```python
 91 | from pydantic import BaseModel
 92 | from typing import List, Optional
 93 | 
 94 | class ImageData(BaseModel):
 95 |     caption: str
 96 |     tags_list: List[str]
 97 |     object_list: List[str]
 98 |     is_photo: bool
 99 | 
100 | image_data_generator = outlines.generate.json(model, ImageData)
101 | 
102 | image_data_generator(
103 |     "<image> detailed JSON metadata:",
104 |     [img_from_url("https://upload.wikimedia.org/wikipedia/commons/9/98/Aldrin_Apollo_11_original.jpg")]
105 | )
106 | ```
107 | 
108 | > `ImageData(caption='An astronaut on the moon', tags_list=['moon', 'space', 'nasa', 'americanflag'], object_list=['moon', 'moon_surface', 'space_suit', 'americanflag'], is_photo=True)`
109 | 
110 | 
111 | ## Resources
112 | 
113 | ### Choosing a model
114 | - https://mmbench.opencompass.org.cn/leaderboard
115 | - https://huggingface.co/spaces/WildVision/vision-arena
116 | 


--------------------------------------------------------------------------------
/docs/reference/serve/lmstudio.md:
--------------------------------------------------------------------------------
 1 | # Serve with LM Studio
 2 | 
 3 | !!! tip "Would rather not self-host?"
 4 | 
 5 |     If you want to get started quickly with JSON-structured generation you can call instead [.json](https://h1xbpbfsf0w.typeform.com/to/ZgBCvJHF), a [.txt](http://dottxt.co) API that guarantees valid JSON.
 6 | 
 7 | [LM Studio](https://lmstudio.ai/) is an application that runs local LLMs. It flexibly mixes GPU and CPU compute in hardware-constrained environments.
 8 | 
 9 | As of [LM Studio 0.3.4](https://lmstudio.ai/blog/lmstudio-v0.3.4), it natively supports Outlines for structured text generation, using an OpenAI-compatible endpoint.
10 | 
11 | ## Setup
12 | 
13 | 1. Install LM Studio by visiting their [downloads page](https://lmstudio.ai/download).
14 | 2. Enable the LM Studio [server functionality](https://lmstudio.ai/docs/basics/server).
15 | 3. Download [a model](https://lmstudio.ai/docs/basics#1-download-an-llm-to-your-computer).
16 | 4. Install Python dependencies.
17 | ```bash
18 | pip install pydantic openai
19 | ```
20 | 
21 | ## Calling the server
22 | 
23 | By default, LM Studio will serve from `http://localhost:1234`. If you are serving on a different port or host, make sure to change the `base_url` argument in `OpenAI` to the relevant location.
24 | 
25 | ```python
26 | class Testing(BaseModel):
27 |     """
28 |     A class representing a testing schema.
29 |     """
30 |     name: str
31 |     age: int
32 | 
33 | openai_client = openai.OpenAI(
34 |     base_url="http://0.0.0.0:1234/v1",
35 |     api_key="dopeness"
36 | )
37 | 
38 | # Make a request to the local LM Studio server
39 | response = openai_client.beta.chat.completions.parse(
40 |     model="hugging-quants/Llama-3.2-1B-Instruct-Q8_0-GGUF",
41 |     messages=[
42 |         {"role": "system", "content": "You are like so good at whatever you do."},
43 |         {"role": "user", "content": "My name is Cameron and I am 28 years old. What's my name and age?"}
44 |     ],
45 |     response_format=Testing
46 | )
47 | ```
48 | 
49 | You should receive a `ParsedChatCompletion[Testing]` object back:
50 | 
51 | ```python
52 | ParsedChatCompletion[Testing](
53 |     id='chatcmpl-3hykyf0fxus7jc90k6gwlw',
54 |     choices=[
55 |         ParsedChoice[Testing](
56 |             finish_reason='stop',
57 |             index=0,
58 |             logprobs=None,
59 |             message=ParsedChatCompletionMessage[Testing](
60 |                 content='{ "age": 28, "name": "Cameron" }',
61 |                 refusal=None,
62 |                 role='assistant',
63 |                 function_call=None,
64 |                 tool_calls=[],
65 |                 parsed=Testing(name='Cameron', age=28)
66 |             )
67 |         )
68 |     ],
69 |     created=1728595622,
70 |     model='lmstudio-community/Phi-3.1-mini-128k-instruct-GGUF/Phi-3.1-mini-128k-instruct-Q4_K_M.gguf',
71 |     object='chat.completion',
72 |     service_tier=None,
73 |     system_fingerprint='lmstudio-community/Phi-3.1-mini-128k-instruct-GGUF/Phi-3.1-mini-128k-instruct-
74 | Q4_K_M.gguf',
75 |     usage=CompletionUsage(
76 |         completion_tokens=17,
77 |         prompt_tokens=47,
78 |         total_tokens=64,
79 |         completion_tokens_details=None,
80 |         prompt_tokens_details=None
81 |     )
82 | )
83 | ```
84 | 
85 | You can retrieve your `Testing` object with
86 | 
87 | ```python
88 | response.choices[0].message.parsed
89 | ```
90 | 


--------------------------------------------------------------------------------
/docs/reference/serve/vllm.md:
--------------------------------------------------------------------------------
 1 | # Serve with vLLM
 2 | 
 3 | !!! tip "Would rather not self-host?"
 4 | 
 5 |     If you want to get started quickly with JSON-structured generation you can call instead [.json](https://h1xbpbfsf0w.typeform.com/to/ZgBCvJHF), a [.txt](http://dottxt.co) API that guarantees valid JSON.
 6 | 
 7 | Outlines can be deployed as an LLM service using the vLLM inference engine and a FastAPI server. vLLM is not installed by default so will need to install Outlines with:
 8 | 
 9 | ```bash
10 | pip install outlines[serve]
11 | ```
12 | 
13 | You can then start the server with:
14 | 
15 | ```bash
16 | python -m outlines.serve.serve --model="microsoft/Phi-3-mini-4k-instruct"
17 | ```
18 | 
19 | This will by default start a server at `http://127.0.0.1:8000` (check what the console says, though). Without the `--model` argument set, the OPT-125M model is used. The `--model` argument allows you to specify any model of your choosing.
20 | 
21 | To run inference on multiple GPUs you must pass the `--tensor-parallel-size` argument when initializing the server. For instance, to run inference on 2 GPUs:
22 | 
23 | 
24 | ```bash
25 | python -m outlines.serve.serve --model="microsoft/Phi-3-mini-4k-instruct" --tensor-parallel-size 2
26 | ```
27 | 
28 | 
29 | ### Alternative Method: Via Docker
30 | 
31 | You can install and run the server with Outlines' official Docker image using the command
32 | 
33 | ```bash
34 | docker run -p 8000:8000 outlinesdev/outlines --model="microsoft/Phi-3-mini-4k-instruct"
35 | ```
36 | 
37 | ## Querying Endpoint
38 | 
39 | You can then query the model in shell by passing a prompt and either
40 | 
41 | 1. a [JSON Schema][jsonschema]{:target="_blank"} specification or
42 | 2. a [Regex][regex]{:target="_blank"} pattern
43 | 
44 | with the `schema` or `regex` parameters, respectively, to the `/generate` endpoint. If both are specified, the schema will be used. If neither is specified, the generated text will be unconstrained.
45 | 
46 | For example, to generate a string that matches the schema `{"type": "string"}` (any string):
47 | 
48 | ```bash
49 | curl http://127.0.0.1:8000/generate \
50 |     -d '{
51 |         "prompt": "What is the capital of France?",
52 |         "schema": {"type": "string", "maxLength": 5}
53 |         }'
54 | ```
55 | 
56 | To generate a string that matches the regex `(-)?(0|[1-9][0-9]*)(\.[0-9]+)?([eE][+-][0-9]+)?` (a number):
57 | 
58 | ```bash
59 | curl http://127.0.0.1:8000/generate \
60 |     -d '{
61 |         "prompt": "What is Pi? Give me the first 15 digits: ",
62 |         "regex": "(-)?(0|[1-9][0-9]*)(\\.[0-9]+)?([eE][+-][0-9]+)?"
63 |         }'
64 | ```
65 | 
66 | Instead of `curl`, you can also use the [requests][requests]{:target="_blank"} library from another python program.
67 | 
68 | Please consult the [vLLM documentation][vllm]{:target="_blank"} for details on additional request parameters. You can also [read the code](https://github.com/dottxt-ai/outlines/blob/main/outlines/serve/serve.py) in case you need to customize the solution to your needs.
69 | 
70 | [requests]: https://requests.readthedocs.io/en/latest/
71 | [vllm]: https://docs.vllm.ai/en/latest/index.html
72 | [jsonschema]: https://json-schema.org/learn/getting-started-step-by-step
73 | [regex]: https://www.regular-expressions.info/tutorial.html
74 | 


--------------------------------------------------------------------------------
/docs/reference/text.md:
--------------------------------------------------------------------------------
 1 | # Text generation
 2 | 
 3 | Outlines provides a unified interface to generate text with many language models, API-based and local. The same pattern is used throughout the library:
 4 | 
 5 | 1. Instantiate a generator by calling `outlines.generate.text` with the model to be used.
 6 | 2. Call the generator with the prompt and (optionally) some generation parameters.
 7 | 
 8 | 
 9 | ```python
10 | from outlines import models, generate
11 | 
12 | model = models.openai("gpt-4o-mini")
13 | generator = generate.text(model)
14 | answer = generator("What is 2+2?")
15 | 
16 | model = models.transformers("microsoft/Phi-3-mini-4k-instruct")
17 | generator = generate.text(model)
18 | answer = generator("What is 2+2?")
19 | ```
20 | 
21 | By default Outlines uses the multinomial sampler with `temperature=1`. See [this section](samplers.md) to learn how to use different samplers.
22 | 
23 | ## Streaming
24 | 
25 | Outlines allows you to stream the model's response by calling the `.stream` method of the generator with the prompt:
26 | 
27 | 
28 | ```python
29 | from outlines import models, generate
30 | 
31 | model = models.transformers("microsoft/Phi-3-mini-4k-instruct")
32 | generator = generate.text(model)
33 | 
34 | tokens = generator.stream("What is 2+2?")
35 | for token in tokens:
36 |     print(token)
37 | ```
38 | 
39 | ## Parameters
40 | 
41 | ### Limit the number of tokens generated
42 | 
43 | To limit the number of tokens generated you can pass the `max_tokens` positional argument to the generator:
44 | 
45 | ```python
46 | from outlines import models, generate
47 | 
48 | model = models.transformers("microsoft/Phi-3-mini-4k-instruct")
49 | generator = generate.text(model)
50 | 
51 | answer = generator("What is 2+2?", 5)
52 | answer = generator("What is 2+2?", max_tokens=5)
53 | ```
54 | 
55 | ### Stop after a given string is generated
56 | 
57 | You can also ask the model to stop generating text after a given string has been generated, for instance a period or a line break. You can pass a string or a line of string for the `stop_at` argument:
58 | 
59 | 
60 | ```python
61 | from outlines import models, generate
62 | 
63 | model = models.transformers("microsoft/Phi-3-mini-4k-instruct")
64 | generator = generate.text(model)
65 | 
66 | answer = generator("What is 2+2?", stop_at=".")
67 | answer = generator("What is 2+2?", stop_at=[".", "\n"])
68 | ```
69 | 
70 | *The stopping string will be included in the response.*
71 | 
72 | 
73 | ### Seed the generation
74 | 
75 | It can be useful to seed the generation in order to get reproducible results:
76 | 
77 | ```python
78 | import torch
79 | from outlines import models, generate
80 | 
81 | model = models.transformers("microsoft/Phi-3-mini-4k-instruct")
82 | 
83 | seed = 789001
84 | 
85 | answer = generator("What is 2+2?", seed=seed)
86 | ```
87 | 


--------------------------------------------------------------------------------
/docs/stylesheets/extra.css:
--------------------------------------------------------------------------------
  1 | @font-face {
  2 |   font-family: 'Source Code Pro Custom', monospace;
  3 |   src: url(https://fonts.googleapis.com/css2?family=Source+Code+Pro:ital,wght@0,200..900;1,200..900&display=swap);
  4 | }
  5 | 
  6 | :root > * {
  7 |   --md-default-bg-color: #FFFFFF;
  8 |   --md-code-bg-color: #2E3440;
  9 |   --md-code-fg-color: #FFFFFF;
 10 |   --md-text-font-family: "Inter";
 11 |   --md-code-font: "Source Code Pro Custom";
 12 |   --md-typeset-a-color: #d16626; /*this is the brand color*/
 13 | 
 14 |   /* don't inherit white fg color for mermaid diagrams from --md-code-fg-color */
 15 |   --md-mermaid-label-fg-color: #000000;
 16 |   --md-mermaid-edge-color: #000000;
 17 | }
 18 | 
 19 | .index-pre-code {
 20 |   max-width: 700px;
 21 |   left: 50%;
 22 | }
 23 | 
 24 | .index-pre-code pre>code {
 25 |   text-align: left;
 26 | }
 27 | 
 28 | .md-clipboard::after {
 29 |   color: #FFFFFF;
 30 |   transition: color 0.3s ease-in-out;
 31 | }
 32 | 
 33 | .md-clipboard:hover::after {
 34 |   color: #D8DEE9;
 35 | }
 36 | 
 37 | .md-source-file {
 38 |   text-align: center;
 39 |   padding: 24px 0;
 40 | }
 41 | 
 42 | .md-typeset pre>code {
 43 |   border-radius: .2rem;
 44 |   box-shadow: 10px 5px 5px #D8DEE9;
 45 | }
 46 | 
 47 | .md-typeset p > code {
 48 |   background: #ECEFF4;
 49 |   color: #000000;
 50 |   font-weight: 500;
 51 | }
 52 | 
 53 | .md-typeset strong > code {
 54 |   background: #ECEFF4;
 55 |   color: #000000;
 56 |   font-weight: 500;
 57 | }
 58 | 
 59 | .md-content p > code {
 60 |   background: #ECEFF4;
 61 |   color: #000000;
 62 |   font-weight: 500;
 63 | }
 64 | 
 65 | .md-typeset td > code {
 66 |   background: #ECEFF4;
 67 |   color: #000000;
 68 |   font-weight: 500;
 69 | }
 70 | 
 71 | .md-typeset li > code {
 72 |   background: #ECEFF4;
 73 |   color: #000000;
 74 |   font-weight: 500;
 75 | }
 76 | 
 77 | .md-typeset code {
 78 |   font-weight: 500;
 79 | }
 80 | 
 81 | .md-typeset pre {
 82 |   margin-left: .5rem;
 83 |   margin-right: .5rem;
 84 |   margin-top: 2rem;
 85 |   margin-bottom: 2rem;
 86 | }
 87 | 
 88 | .language-python {
 89 |   background: #FFFFFF ! important
 90 | }
 91 | 
 92 | .language-bash {
 93 |   background: #FFFFFF ! important
 94 | }
 95 | 
 96 | .language-toml {
 97 |   background: #FFFFFF ! important
 98 | }
 99 | 
100 | .language-text {
101 |   background: #FFFFFF ! important
102 | }
103 | 
104 | .language-json {
105 |   background: #FFFFFF ! important
106 | }
107 | 
108 | h1.title {
109 |   color: #FFFFFF;
110 |   margin: 0px 0px 5px;
111 | }
112 | 
113 | h2.subtitle {
114 |   margin: 5px 0px 25px;
115 |   font-size: 1rem;
116 |   max-width: 540px;
117 |   margin: 0 auto;
118 | }
119 | 
120 | .md-typeset {
121 |   line-height: 24px;
122 |   font-weight: 400;
123 | }
124 | 
125 | .md-typeset h1 {
126 |   font-weight: bold;
127 |   color: #000000;
128 | }
129 | 
130 | .md-typeset h2 {
131 |   font-weight: bold;
132 |   color: #000000;
133 | }
134 | 
135 | span.md-ellipsis {
136 |   color: black;
137 | }
138 | 
139 | .md-nav__link--active {
140 |   background-color: #ECEFF4;
141 | }
142 | 
143 | .md-typeset ol li {
144 |   margin-bottom: .1rem;
145 | }
146 | 
147 | .md-typeset ul li {
148 |   margin-bottom: .1rem;
149 | }
150 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | # To use:
 2 | #
 3 | #   $ conda env create -f environment.yml  # `mamba` works too for this command
 4 | #   $ conda activate dottxt-ai
 5 | #
 6 | name: dottxt-ai
 7 | channels:
 8 |   - conda-forge
 9 |   - huggingface
10 | dependencies:
11 |   - python==3.10.0
12 |   - jinja2
13 |   - numpy
14 |   - pydantic
15 |   - scipy
16 |   - pytest
17 |   - pre-commit
18 |   - referencing
19 |   - jsonschema
20 |   - transformers
21 |   - pip
22 |   - pip:
23 |     - -e ".[test]"
24 | 


--------------------------------------------------------------------------------
/examples/babyagi.py:
--------------------------------------------------------------------------------
  1 | """This example is a simplified translation of BabyAGI.
  2 | 
  3 | It currently does not use the vector store retrieval
  4 | 
  5 | The original repo can be found at https://github.com/yoheinakajima/babyagi
  6 | """
  7 | 
  8 | from collections import deque
  9 | from typing import Deque, List
 10 | 
 11 | import outlines
 12 | import outlines.models as models
 13 | from outlines import Template
 14 | 
 15 | 
 16 | model = models.openai("gpt-4o-mini")
 17 | complete = outlines.generate.text(model)
 18 | 
 19 | ## Load the prompts
 20 | perform_task_ppt = Template.from_file("prompts/babyagi_perform_task.txt")
 21 | create_tasks_ppt = Template.from_file("prompts/babyagi_create_task.txt")
 22 | prioritize_tasks_ppt = Template.from_file("prompts/babyagi_prioritize_task.txt")
 23 | 
 24 | 
 25 | def create_tasks_fmt(result: str) -> List[str]:
 26 |     new_tasks = result.split("\n")
 27 | 
 28 |     task_list = []
 29 |     for task in new_tasks:
 30 |         parts = task.strip().split(".", 1)
 31 |         if len(parts) == 2:
 32 |             task_list.append(parts[1].strip())
 33 | 
 34 |     return task_list
 35 | 
 36 | 
 37 | def prioritize_tasks_fmt(result: str):
 38 |     new_tasks = result.split("\n")
 39 | 
 40 |     task_list: Deque = deque([])
 41 |     for task in new_tasks:
 42 |         parts = task.strip().split(".", 1)
 43 |         if len(parts) == 2:
 44 |             task_id = int(parts[0].strip())
 45 |             task_name = parts[1].strip()
 46 |             task_list.append({"task_id": task_id, "task_name": task_name})
 47 | 
 48 |     return task_list
 49 | 
 50 | 
 51 | objective = "Becoming rich while doing nothing."
 52 | first_task = {
 53 |     "task_id": 1,
 54 |     "task_name": "Find a repeatable, low-maintainance, scalable business.",
 55 | }
 56 | next_task_id = 1
 57 | task_list = deque([first_task])
 58 | 
 59 | 
 60 | def one_cycle(objective: str, task_list, next_task_id: int):
 61 |     """One BabyAGI cycle.
 62 | 
 63 |     It consists in executing the highest-priority task, creating some new tasks
 64 |     given the result, and re-priotizing the tasks.
 65 | 
 66 |     Parameters
 67 |     ----------
 68 |     objective
 69 |         The overall objective of the session.
 70 |     task_list
 71 |         The current list of tasks to perform.
 72 |     task_id_counter
 73 |         The current task id.
 74 | 
 75 |     """
 76 | 
 77 |     task = task_list.popleft()
 78 | 
 79 |     prompt = perform_task_ppt(objective, task)
 80 |     result = complete(prompt)
 81 | 
 82 |     prompt = create_tasks_ppt(
 83 |         objective, first_task["task_name"], result, [first_task["task_name"]]
 84 |     )
 85 |     new_tasks = complete(prompt)
 86 | 
 87 |     new_tasks = create_tasks_fmt(new_tasks)
 88 | 
 89 |     for task in new_tasks:
 90 |         next_task_id += 1
 91 |         task_list.append({"task_id": next_task_id, "task_name": task})
 92 | 
 93 |     prompt = prioritize_tasks_ppt(
 94 |         objective, [task["task_name"] for task in task_list], next_task_id
 95 |     )
 96 |     prioritized_tasks = complete(prompt)
 97 | 
 98 |     prioritized_tasks = prioritize_tasks_fmt(prioritized_tasks)
 99 | 
100 |     return task, result, prioritized_tasks, next_task_id
101 | 
102 | 
103 | # Let's run it for 5 cycles to see how it works without spending a fortune.
104 | for _ in range(5):
105 |     print("\033[95m\033[1m" + "\n*****TASK LIST*****\n" + "\033[0m\033[0m")
106 |     for t in task_list:
107 |         print(" • " + str(t["task_name"]))
108 | 
109 |     task, result, task_list, next_task_id = one_cycle(
110 |         objective, task_list, next_task_id
111 |     )
112 | 
113 |     print("\033[92m\033[1m" + "\n*****NEXT TASK*****\n" + "\033[0m\033[0m")
114 |     print(task)
115 |     print("\033[93m\033[1m" + "\n*****TASK RESULT*****\n" + "\033[0m\033[0m")
116 |     print(result)
117 | 


--------------------------------------------------------------------------------
/examples/beam-cloud/README.md:
--------------------------------------------------------------------------------
1 | ## Deploy Outlines on Beam
2 | 
3 | 1. Create an account [here](https://beam.cloud) and install the Beam SDK
4 | 2. Download the `app.py` file to your computer
5 | 3. Deploy it as a serverless API by running: `beam deploy app.py:predict`
6 | 


--------------------------------------------------------------------------------
/examples/beam-cloud/app.py:
--------------------------------------------------------------------------------
 1 | from beam import Image, endpoint, env
 2 | 
 3 | if env.is_remote():
 4 |     import outlines
 5 | 
 6 | 
 7 | # Pre-load models when the container first starts
 8 | def load_models():
 9 |     import outlines
10 | 
11 |     model = outlines.models.transformers("microsoft/Phi-3-mini-4k-instruct")
12 |     return model
13 | 
14 | 
15 | @endpoint(
16 |     name="outlines-serverless",
17 |     gpu="A10G",
18 |     cpu=1,
19 |     memory="16Gi",
20 |     on_start=load_models,
21 |     image=Image().add_python_packages(
22 |         ["outlines", "torch", "transformers", "accelerate"]
23 |     ),
24 | )
25 | def predict(context, **inputs):
26 |     default_prompt = """You are a sentiment-labelling assistant.
27 |     Is the following review positive or negative?
28 | 
29 |     Review: This restaurant is just awesome!
30 |     """
31 | 
32 |     prompt = inputs.get("prompt", default_prompt)
33 | 
34 |     # Unpack cached model from context
35 |     model = context.on_start_value
36 |     # Inference
37 |     generator = outlines.generate.choice(model, ["Positive", "Negative"])
38 |     answer = generator(prompt)
39 |     return {"answer": answer}
40 | 


--------------------------------------------------------------------------------
/examples/bentoml/.bentoignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | *.py[cod]
3 | *$py.class
4 | .ipynb_checkpoints
5 | venv/
6 | 


--------------------------------------------------------------------------------
/examples/bentoml/bentofile.yaml:
--------------------------------------------------------------------------------
 1 | service: "service:Outlines"
 2 | labels:
 3 |   owner: bentoml-team
 4 |   stage: demo
 5 | include:
 6 | - "*.py"
 7 | python:
 8 |   requirements_txt: "./requirements.txt"
 9 |   lock_packages: false
10 | 


--------------------------------------------------------------------------------
/examples/bentoml/import_model.py:
--------------------------------------------------------------------------------
 1 | import bentoml
 2 | 
 3 | MODEL_ID = "mistralai/Mistral-7B-v0.1"
 4 | BENTO_MODEL_TAG = MODEL_ID.lower().replace("/", "--")
 5 | 
 6 | 
 7 | def import_model(model_id, bento_model_tag):
 8 |     import torch
 9 |     from transformers import AutoModelForCausalLM, AutoTokenizer
10 | 
11 |     tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
12 |     model = AutoModelForCausalLM.from_pretrained(
13 |         MODEL_ID,
14 |         torch_dtype=torch.float16,
15 |         low_cpu_mem_usage=True,
16 |     )
17 | 
18 |     with bentoml.models.create(bento_model_tag) as bento_model_ref:
19 |         tokenizer.save_pretrained(bento_model_ref.path)
20 |         model.save_pretrained(bento_model_ref.path)
21 | 
22 | 
23 | if __name__ == "__main__":
24 |     import_model(MODEL_ID, BENTO_MODEL_TAG)
25 | 


--------------------------------------------------------------------------------
/examples/bentoml/requirements.txt:
--------------------------------------------------------------------------------
1 | bentoml>=1.2.11
2 | outlines==0.0.37
3 | transformers==4.38.2
4 | datasets==2.18.0
5 | accelerate==0.27.2
6 | 


--------------------------------------------------------------------------------
/examples/bentoml/service.py:
--------------------------------------------------------------------------------
 1 | import typing as t
 2 | 
 3 | import bentoml
 4 | from import_model import BENTO_MODEL_TAG
 5 | 
 6 | DEFAULT_SCHEMA = """{
 7 |     "title": "Character",
 8 |     "type": "object",
 9 |     "properties": {
10 |         "name": {
11 |             "title": "Name",
12 |             "maxLength": 10,
13 |             "type": "string"
14 |         },
15 |         "age": {
16 |             "title": "Age",
17 |             "type": "integer"
18 |         },
19 |         "armor": {"$ref": "#/definitions/Armor"},
20 |         "weapon": {"$ref": "#/definitions/Weapon"},
21 |         "strength": {
22 |             "title": "Strength",
23 |             "type": "integer"
24 |         }
25 |     },
26 |     "required": ["name", "age", "armor", "weapon", "strength"],
27 |     "definitions": {
28 |         "Armor": {
29 |             "title": "Armor",
30 |             "description": "An enumeration.",
31 |             "enum": ["leather", "chainmail", "plate"],
32 |             "type": "string"
33 |         },
34 |         "Weapon": {
35 |             "title": "Weapon",
36 |             "description": "An enumeration.",
37 |             "enum": ["sword", "axe", "mace", "spear", "bow", "crossbow"],
38 |             "type": "string"
39 |         }
40 |     }
41 | }"""
42 | 
43 | 
44 | @bentoml.service(
45 |     traffic={
46 |         "timeout": 300,
47 |     },
48 |     resources={
49 |         "gpu": 1,
50 |         "gpu_type": "nvidia-l4",
51 |     },
52 | )
53 | class Outlines:
54 |     bento_model_ref = bentoml.models.get(BENTO_MODEL_TAG)
55 | 
56 |     def __init__(self) -> None:
57 |         import torch
58 | 
59 |         import outlines
60 | 
61 |         self.model = outlines.models.transformers(
62 |             self.bento_model_ref.path,
63 |             device="cuda",
64 |             model_kwargs={"torch_dtype": torch.float16},
65 |         )
66 | 
67 |     @bentoml.api
68 |     async def generate(
69 |         self,
70 |         prompt: str = "Give me a character description.",
71 |         json_schema: t.Optional[str] = DEFAULT_SCHEMA,
72 |     ) -> t.Dict[str, t.Any]:
73 |         import outlines
74 | 
75 |         generator = outlines.generate.json(self.model, json_schema)
76 |         character = generator(prompt)
77 | 
78 |         return character
79 | 


--------------------------------------------------------------------------------
/examples/cerebrium/cerebrium.toml:
--------------------------------------------------------------------------------
 1 | [cerebrium.deployment]
 2 | name = "cerebrium"
 3 | python_version = "3.11"
 4 | cuda_version = "12"
 5 | include = "[./*, main.py, cerebrium.toml]"
 6 | exclude = "[.*]"
 7 | shell_commands = []
 8 | 
 9 | [cerebrium.hardware]
10 | cpu = 2
11 | memory = 14.0
12 | gpu = "AMPERE A10"
13 | gpu_count = 1
14 | provider = "aws"
15 | region = "us-east-1"
16 | 
17 | [cerebrium.scaling]
18 | min_replicas = 0
19 | max_replicas = 5
20 | cooldown = 60
21 | 
22 | [cerebrium.dependencies.pip]
23 | outline = "==0.0.37"
24 | transformers = "==4.38.2"
25 | datasets = "==2.18.0"
26 | accelerate = "==0.27.2"
27 | 


--------------------------------------------------------------------------------
/examples/cerebrium/main.py:
--------------------------------------------------------------------------------
 1 | import outlines
 2 | 
 3 | model = outlines.models.transformers("mistralai/Mistral-7B-Instruct-v0.2")
 4 | 
 5 | schema = {
 6 |     "title": "Character",
 7 |     "type": "object",
 8 |     "properties": {
 9 |         "name": {"title": "Name", "maxLength": 10, "type": "string"},
10 |         "age": {"title": "Age", "type": "integer"},
11 |         "armor": {"$ref": "#/definitions/Armor"},
12 |         "weapon": {"$ref": "#/definitions/Weapon"},
13 |         "strength": {"title": "Strength", "type": "integer"},
14 |     },
15 |     "required": ["name", "age", "armor", "weapon", "strength"],
16 |     "definitions": {
17 |         "Armor": {
18 |             "title": "Armor",
19 |             "description": "An enumeration.",
20 |             "enum": ["leather", "chainmail", "plate"],
21 |             "type": "string",
22 |         },
23 |         "Weapon": {
24 |             "title": "Weapon",
25 |             "description": "An enumeration.",
26 |             "enum": ["sword", "axe", "mace", "spear", "bow", "crossbow"],
27 |             "type": "string",
28 |         },
29 |     },
30 | }
31 | 
32 | generator = outlines.generate.json(model, schema)
33 | 
34 | 
35 | def generate(
36 |     prompt: str = "Amiri, a 53 year old warrior woman with a sword and leather armor.",
37 | ):
38 |     character = generator(
39 |         f"<s>[INST]Give me a character description. Describe {prompt}.[/INST]"
40 |     )
41 | 
42 |     print(character)
43 |     return character
44 | 


--------------------------------------------------------------------------------
/examples/cfg.py:
--------------------------------------------------------------------------------
 1 | import outlines.generate as generate
 2 | import outlines.models as models
 3 | 
 4 | nlamb_grammar = r"""
 5 |     start: sentence
 6 | 
 7 |     sentence: noun verb noun        -> simple
 8 |             | noun verb "like" noun -> comparative
 9 | 
10 |     noun: adj? NOUN
11 |     verb: VERB
12 |     adj: ADJ
13 | 
14 |     NOUN: "flies" | "bananas" | "fruit"
15 |     VERB: "like" | "flies"
16 |     ADJ: "fruit"
17 | 
18 |     %import common.WS
19 |     %ignore WS
20 | """
21 | 
22 | calc_grammar = r"""
23 |     ?start: sum
24 |           | NAME "=" sum    -> assign_var
25 | 
26 |     ?sum: product
27 |         | sum "+" product   -> add
28 |         | sum "-" product   -> sub
29 | 
30 |     ?product: atom
31 |         | product "*" atom  -> mul
32 |         | product "/" atom  -> div
33 | 
34 |     ?atom: NUMBER           -> number
35 |          | "-" atom         -> neg
36 |          | NAME             -> var
37 |          | "(" sum ")"
38 | 
39 |     %import common.LETTER -> NAME
40 |     %import common.INT -> NUMBER
41 |     %import common.WS_INLINE
42 | 
43 |     %ignore WS_INLINE
44 | """
45 | 
46 | dyck_grammar = r"""
47 |     start: s
48 |     s: /a+/
49 |     | "(" s ")"
50 |     | "{" s "}"
51 |     | "[" s "]"
52 | """
53 | 
54 | json_grammar = r"""
55 |     ?start: value
56 | 
57 |     ?value: object
58 |           | array
59 |           | string
60 |           | SIGNED_NUMBER      -> number
61 |           | "true"             -> true
62 |           | "false"            -> false
63 |           | "null"             -> null
64 | 
65 |     array  : "[" [value ("," value)*] "]"
66 |     object : "{" [pair ("," pair)*] "}"
67 |     pair   : string ":" value
68 | 
69 |     inner: /([^"]|\\\")+/ |
70 |     string : "\"" inner "\""
71 | 
72 |     %import common.SIGNED_NUMBER
73 |     %import common.WS
74 | 
75 |     %ignore WS
76 | """
77 | 
78 | model = models.transformers("hf-internal-testing/tiny-random-gpt2")
79 | batch_size = 10
80 | for grammar in [nlamb_grammar, calc_grammar, dyck_grammar, json_grammar]:
81 |     generator = generate.cfg(model, grammar, max_tokens=model.model.config.n_positions)
82 |     sequences = generator([" "] * batch_size)
83 |     for seq in sequences:
84 |         try:
85 |             parse = generator.fsm.parser.parse(seq)
86 |             assert parse is not None
87 |             print("SUCCESS", seq)
88 |         except Exception:  # will also fail if goes over max_tokens / context window
89 |             print("FAILURE", seq)
90 | 


--------------------------------------------------------------------------------
/examples/llamacpp_example.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | 
 3 | from pydantic import BaseModel, constr
 4 | 
 5 | import outlines
 6 | 
 7 | 
 8 | class Weapon(str, Enum):
 9 |     sword = "sword"
10 |     axe = "axe"
11 |     mace = "mace"
12 |     spear = "spear"
13 |     bow = "bow"
14 |     crossbow = "crossbow"
15 | 
16 | 
17 | class Armor(str, Enum):
18 |     leather = "leather"
19 |     chainmail = "chainmail"
20 |     plate = "plate"
21 | 
22 | 
23 | class Character(BaseModel):
24 |     name: constr(max_length=10)
25 |     age: int
26 |     armor: Armor
27 |     weapon: Weapon
28 |     strength: int
29 | 
30 | 
31 | if __name__ == "__main__":
32 |     # curl -L -o mistral-7b-instruct-v0.2.Q5_K_M.gguf https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q5_K_M.gguf
33 |     model = outlines.models.llamacpp("./mistral-7b-instruct-v0.2.Q5_K_M.gguf")
34 | 
35 |     # Construct structured sequence generator
36 |     generator = outlines.generate.json(model, Character)
37 | 
38 |     # Draw a sample
39 |     seed = 789005
40 | 
41 |     prompt = "Instruct: You are a leading role play gamer. You have seen thousands of different characters and their attributes.\nPlease return a JSON object with common attributes of an RPG character. Give me a character description\nOutput:"
42 | 
43 |     sequence = generator(prompt, seed=seed, max_tokens=512)
44 |     print(sequence)
45 | 


--------------------------------------------------------------------------------
/examples/llamacpp_processor.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | 
 3 | from llama_cpp import Llama, LogitsProcessorList
 4 | from pydantic import BaseModel, constr
 5 | 
 6 | from outlines.generate.processors import JSONLogitsProcessor
 7 | from outlines.models.llamacpp import LlamaCppTokenizer
 8 | 
 9 | 
10 | class Weapon(str, Enum):
11 |     sword = "sword"
12 |     axe = "axe"
13 |     mace = "mace"
14 |     spear = "spear"
15 |     bow = "bow"
16 |     crossbow = "crossbow"
17 | 
18 | 
19 | class Armor(str, Enum):
20 |     leather = "leather"
21 |     chainmail = "chainmail"
22 |     plate = "plate"
23 | 
24 | 
25 | class Character(BaseModel):
26 |     name: constr(max_length=10)
27 |     age: int
28 |     armor: Armor
29 |     weapon: Weapon
30 |     strength: int
31 | 
32 | 
33 | if __name__ == "__main__":
34 |     llama = Llama("./phi-2.Q4_K_M.gguf")
35 |     tokenizer = LlamaCppTokenizer(llama)
36 | 
37 |     prompt = "Instruct: You are a leading role play gamer. You have seen thousands of different characters and their attributes.\nPlease return a JSON object with common attributes of an RPG character. Give me a character description\nOutput:"
38 | 
39 |     logits_processor = JSONLogitsProcessor(Character, tokenizer)
40 | 
41 |     json_str = llama.create_completion(
42 |         prompt,
43 |         top_k=40,
44 |         top_p=0.95,
45 |         temperature=0.7,
46 |         max_tokens=100,
47 |         logits_processor=LogitsProcessorList([logits_processor]),
48 |     )["choices"][0]["text"]
49 | 
50 |     print(json_str)
51 | 


--------------------------------------------------------------------------------
/examples/math_generate_code.py:
--------------------------------------------------------------------------------
 1 | """Example from https://dust.tt/spolu/a/d12ac33169"""
 2 | 
 3 | import outlines
 4 | import outlines.models as models
 5 | from outlines import Template
 6 | 
 7 | examples = [
 8 |     {"question": "What is 37593 * 67?", "code": "37593 * 67"},
 9 |     {
10 |         "question": "Janet's ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market?",
11 |         "code": "(16-3-4)*2",
12 |     },
13 |     {
14 |         "question": "A robe takes 2 bolts of blue fiber and half that much white fiber. How many bolts in total does it take?",
15 |         "code": " 2 + 2/2",
16 |     },
17 | ]
18 | 
19 | question = "Carla is downloading a 200 GB file. She can download 2 GB/minute, but 40% of the way through the download, the download fails. Then Carla has to restart the download from the beginning. How load did it take her to download the file in minutes?"
20 | 
21 | answer_with_code_prompt = Template.from_string(
22 |     """
23 |     {% for example in examples %}
24 |     QUESTION: {{example.question}}
25 |     CODE: {{example.code}}
26 | 
27 |     {% endfor %}
28 |     QUESTION: {{question}}
29 |     CODE:"""
30 | )
31 | 
32 | 
33 | def execute_code(code):
34 |     result = eval(code)
35 |     return result
36 | 
37 | 
38 | prompt = answer_with_code_prompt(question, examples)
39 | model = models.openai("gpt-4o-mini")
40 | answer = outlines.generate.text(model)(prompt)
41 | result = execute_code(answer)
42 | print(f"It takes Carla {result:.0f} minutes to download the file.")
43 | 


--------------------------------------------------------------------------------
/examples/modal_example.py:
--------------------------------------------------------------------------------
 1 | import modal
 2 | 
 3 | app = modal.App(name="outlines-app")
 4 | 
 5 | 
 6 | outlines_image = modal.Image.debian_slim(python_version="3.11").pip_install(
 7 |     "outlines==0.0.37",
 8 |     "transformers==4.38.2",
 9 |     "datasets==2.18.0",
10 |     "accelerate==0.27.2",
11 | )
12 | 
13 | 
14 | def import_model():
15 |     import outlines
16 | 
17 |     outlines.models.transformers("mistralai/Mistral-7B-Instruct-v0.2")
18 | 
19 | 
20 | outlines_image = outlines_image.run_function(import_model)
21 | 
22 | 
23 | schema = """{
24 |     "title": "Character",
25 |     "type": "object",
26 |     "properties": {
27 |         "name": {
28 |             "title": "Name",
29 |             "maxLength": 10,
30 |             "type": "string"
31 |         },
32 |         "age": {
33 |             "title": "Age",
34 |             "type": "integer"
35 |         },
36 |         "armor": {"$ref": "#/definitions/Armor"},
37 |         "weapon": {"$ref": "#/definitions/Weapon"},
38 |         "strength": {
39 |             "title": "Strength",
40 |             "type": "integer"
41 |         }
42 |     },
43 |     "required": ["name", "age", "armor", "weapon", "strength"],
44 |     "definitions": {
45 |         "Armor": {
46 |             "title": "Armor",
47 |             "description": "An enumeration.",
48 |             "enum": ["leather", "chainmail", "plate"],
49 |             "type": "string"
50 |         },
51 |         "Weapon": {
52 |             "title": "Weapon",
53 |             "description": "An enumeration.",
54 |             "enum": ["sword", "axe", "mace", "spear", "bow", "crossbow"],
55 |             "type": "string"
56 |         }
57 |     }
58 | }"""
59 | 
60 | 
61 | @app.function(image=outlines_image, gpu=modal.gpu.A100(memory=80))
62 | def generate(
63 |     prompt: str = "Amiri, a 53 year old warrior woman with a sword and leather armor.",
64 | ):
65 |     import outlines
66 | 
67 |     model = outlines.models.transformers("mistralai/Mistral-7B-v0.1", device="cuda")
68 | 
69 |     generator = outlines.generate.json(model, schema)
70 |     character = generator(
71 |         f"<s>[INST]Give me a character description. Describe {prompt}.[/INST]"
72 |     )
73 | 
74 |     print(character)
75 | 
76 | 
77 | @app.local_entrypoint()
78 | def main(
79 |     prompt: str = "Amiri, a 53 year old warrior woman with a sword and leather armor.",
80 | ):
81 |     generate.remote(prompt)
82 | 


--------------------------------------------------------------------------------
/examples/parsing.py:
--------------------------------------------------------------------------------
  1 | """An example illustrating parser-based masking."""
  2 | 
  3 | import math
  4 | import time
  5 | from copy import copy
  6 | 
  7 | import torch
  8 | from lark.indenter import DedentError
  9 | from lark.lexer import UnexpectedCharacters, UnexpectedToken
 10 | from transformers import (
 11 |     AutoModelForCausalLM,
 12 |     AutoTokenizer,
 13 |     LogitsProcessor,
 14 |     LogitsProcessorList,
 15 |     set_seed,
 16 | )
 17 | 
 18 | from outlines.fsm.parsing import PartialLark, PartialPythonIndenter
 19 | 
 20 | revision = None
 21 | checkpoint = "Salesforce/codegen-350M-mono"
 22 | device = "cuda"
 23 | 
 24 | tokenizer = AutoTokenizer.from_pretrained(checkpoint)
 25 | 
 26 | model = AutoModelForCausalLM.from_pretrained(
 27 |     checkpoint, trust_remote_code=True, revision=revision
 28 | ).to(device)
 29 | 
 30 | parser = PartialLark.open_from_package(
 31 |     "tests",
 32 |     "partial_python.lark",
 33 |     ["text"],
 34 |     parser="lalr",
 35 |     postlex=PartialPythonIndenter(),
 36 |     start="file_input",
 37 | )
 38 | 
 39 | 
 40 | class ParserLogitsProcessor(LogitsProcessor):
 41 |     """Bias invalid token scores according to a running parse state."""
 42 | 
 43 |     def __init__(self, parser):
 44 |         self.parser = parser
 45 |         self.parser_state = parser.parse("")
 46 |         self.states_stack = [self.parser_state]
 47 |         self.token_seq = None
 48 |         self.token_idx = 0
 49 | 
 50 |     def __call__(
 51 |         self, input_ids: torch.LongTensor, scores: torch.FloatTensor
 52 |     ) -> torch.FloatTensor:
 53 |         if self.token_seq is None:
 54 |             self.token_seq = tokenizer.decode(input_ids[0])
 55 |             self.token_idx = len(input_ids[0]) - 1
 56 |         else:
 57 |             self.token_idx += 1
 58 |             self.token_seq += tokenizer.decode(input_ids[0][self.token_idx])
 59 | 
 60 |         # Process the last sampled token
 61 |         lex_state = self.parser_state.lexer.state
 62 |         lex_state.text = self.token_seq
 63 | 
 64 |         self.parser.parse_from_state(self.parser_state, is_end=False)
 65 | 
 66 |         print(f'parsed:"{self.token_seq}"')
 67 | 
 68 |         mask = torch.full_like(scores, -math.inf)
 69 | 
 70 |         # Determine which tokens in the vocabulary are valid next tokens
 71 |         # given the parser state.
 72 |         #
 73 |         # TODO: This is a very naive and slow approach.  It could be done in
 74 |         # parallel, easily memoized/cached, etc., but there are a few other
 75 |         # approaches to try first that will dramatically reduce the
 76 |         # amount of work needed here.
 77 |         t0 = time.perf_counter()
 78 |         for test_token, token_id in tokenizer.vocab.items():
 79 |             ps = copy(self.parser_state)
 80 |             ls = ps.lexer.state
 81 |             ls.text = self.token_seq + tokenizer.convert_tokens_to_string([test_token])
 82 | 
 83 |             try:
 84 |                 self.parser.parse_from_state(ps, is_end=False)
 85 |                 mask[0][token_id] = 0
 86 |             except (EOFError, UnexpectedToken, UnexpectedCharacters, DedentError):
 87 |                 pass
 88 | 
 89 |         print(f"next token masking duration: {time.perf_counter() - t0}")
 90 | 
 91 |         return scores + mask
 92 | 
 93 | 
 94 | set_seed(20399)
 95 | 
 96 | input_text = "def "
 97 | inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
 98 | 
 99 | outputs = model.generate(
100 |     inputs,
101 |     max_length=100,
102 |     temperature=0.1,
103 |     logits_processor=LogitsProcessorList([ParserLogitsProcessor(parser)]),
104 |     renormalize_logits=True,
105 | )
106 | 
107 | print(tokenizer.decode(outputs[0]))
108 | 


--------------------------------------------------------------------------------
/examples/pick_odd_one_out.py:
--------------------------------------------------------------------------------
 1 | """Chain-of-thought prompting for Odd one out classification.
 2 | 
 3 | Example taken from the LQML library [1]_.
 4 | 
 5 | References
 6 | ----------
 7 | .. [1] Beurer-Kellner, L., Fischer, M., & Vechev, M. (2022).
 8 |        Prompting Is Programming: A Query Language For Large Language Models.
 9 |        arXiv preprint arXiv:2212.06094.
10 | 
11 | """
12 | 
13 | import outlines
14 | import outlines.models as models
15 | 
16 | 
17 | build_ooo_prompt = outlines.Template.from_string(
18 |     """
19 |     Pick the odd word out: skirt, dress, pen, jacket.
20 |     skirt is clothing, dress is clothing, pen is an object, jacket is clothing.
21 |     So the odd one is pen.
22 | 
23 |     Pick the odd word out: Spain, France, German, England, Singapore.
24 |     Spain is a country, France is a country, German is a language, ...
25 |     So the odd one is German.
26 | 
27 |     Pick the odd word out: {{ options | join(", ") }}.
28 | 
29 |     """
30 | )
31 | 
32 | options = ["sea", "mountains", "plains", "sock"]
33 | 
34 | model = models.openai("gpt-4o-mini")
35 | gen_text = outlines.generate.text(model)
36 | gen_choice = outlines.generate.choice(model, options)
37 | 
38 | prompt = build_ooo_prompt(options)
39 | reasoning = gen_text(prompt, stop_at=["Pick the odd word", "So the odd one"])
40 | prompt += reasoning
41 | result = gen_choice(prompt)
42 | prompt += result
43 | print(result)
44 | 


--------------------------------------------------------------------------------
/examples/react.py:
--------------------------------------------------------------------------------
 1 | """ReAct
 2 | 
 3 | This example was inspired by the LQML library [1]_. The ReAct framework was
 4 | first developed in [2]_ and augments Chain-of-Thought prompting with the ability
 5 | for the model to query external sources.
 6 | 
 7 | References
 8 | ----------
 9 | .. [1] Beurer-Kellner, L., Fischer, M., & Vechev, M. (2022). Prompting Is Programming: A Query Language For Large Language Models. arXiv preprint arXiv:2212.06094.
10 | .. [2] Yao, S., Zhao, J., Yu, D., Du, N., Shafran, I., Narasimhan, K., & Cao, Y. (2022). React: Synergizing reasoning and acting in language models. arXiv preprint arXiv:2210.03629.
11 | 
12 | """
13 | 
14 | import requests  # type: ignore
15 | 
16 | import outlines
17 | from outlines import Template
18 | import outlines.generate as generate
19 | import outlines.models as models
20 | 
21 | 
22 | build_reAct_prompt = Template.from_string(
23 |     """What is the elevation range for the area that the eastern sector of the Colorado orogeny extends into?
24 | Tho 1: I need to search Colorado orogeny, find the area that the eastern sector of the Colorado ...
25 | Act 2: Search 'Colorado orogeny'
26 | Obs 2: The Colorado orogeny was an episode of mountain building (an orogeny) ...
27 | Tho 3: It does not mention the eastern sector. So I need to look up eastern sector.
28 | ...
29 | Tho 4: High Plains rise in elevation from around 1,800 to 7,000 ft, so the answer is 1,800 to 7,000 ft.
30 | Act 5: Finish '1,800 to 7,000 ft'
31 | {{ question }}
32 | """
33 | )
34 | 
35 | 
36 | add_mode = Template.from_string(
37 |     """{{ prompt }}
38 | {{ mode }} {{ i }}: {{ result }}
39 | """
40 | )
41 | 
42 | 
43 | def search_wikipedia(query: str):
44 |     url = f"https://en.wikipedia.org/w/api.php?format=json&action=query&prop=extracts&exintro&explaintext&redirects=1&titles={query}&origin=*"
45 |     response = requests.get(url)
46 |     page = response.json()["query"]["pages"]
47 |     return ".".join(list(page.values())[0]["extract"].split(".")[:2])
48 | 
49 | 
50 | prompt = build_reAct_prompt("Where is Apple Computers headquarted? ")
51 | model = models.openai("gpt-4o-mini")
52 | 
53 | mode_generator = generate.choice(model, choices=["Tho", "Act"])
54 | action_generator = generate.choice(model, choices=["Search", "Finish"])
55 | text_generator = generate.text(model)
56 | 
57 | for i in range(1, 10):
58 |     mode = mode_generator(prompt, max_tokens=128)
59 |     prompt = add_mode(i, mode, "", prompt)
60 | 
61 |     if mode == "Tho":
62 |         thought = text_generator(prompt, stop_at="\n", max_tokens=128)
63 |         prompt += f"{thought}"
64 |     elif mode == "Act":
65 |         action = action_generator(prompt, max_tokens=128)
66 |         prompt += f"{action} '"
67 | 
68 |         subject = text_generator(prompt, stop_at=["'"], max_tokens=128)
69 |         # Apple Computers headquartered
70 |         subject = " ".join(subject.split()[:2])
71 |         prompt += f"{subject}'"
72 | 
73 |         if action == "Search":
74 |             result = search_wikipedia(subject)
75 |             prompt = add_mode(i, "Obs", result, prompt)
76 |         else:
77 |             break
78 | 
79 | print(prompt)
80 | 


--------------------------------------------------------------------------------
/examples/self_consistency.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | import numpy as np
 4 | 
 5 | import outlines
 6 | import outlines.models as models
 7 | from outlines import Template
 8 | 
 9 | examples = [
10 |     {
11 |         "question": "There are 15 trees in the grove. Grove workers will plant trees in the grove today. After they are done, there will be 21 trees. How many trees did the grove workers plant today?",
12 |         "answer": "We start with 15 trees. Later we have 21 trees. The difference must be the number of trees they planted. So, they must have planted 21 - 15 = 6 trees. The answer is 6.",
13 |     },
14 |     {
15 |         "question": "If there are 3 cars in the parking lot and 2 more cars arrive, how many cars are in the parking lot?",
16 |         "answer": "There are 3 cars in the parking lot already. 2 more arrive. Now there are 3 + 2 = 5 cars. The answer is 5.",
17 |     },
18 |     {
19 |         "question": "Leah had 32 chocolates and her sister had 42. If they ate 35, how many pieces do they have left in total?",
20 |         "answer": "Leah had 32 chocolates and Leah’s sister had 42. That means there were originally 32 + 42 = 74 chocolates. 35 have been eaten. So in total they still have 74 - 35 = 39 chocolates. The answer is 39.",
21 |     },
22 |     {
23 |         "question": "Jason had 20 lollipops. He gave Denny some lollipops. Now Jason has 12 lollipops. How many lollipops did Jason give to Denny?",
24 |         "answer": "Jason had 20 lollipops. Since he only has 12 now, he must have given the rest to Denny. The number of lollipops he has given to Denny must have been 20 - 12 = 8 lollipops. The answer is 8.",
25 |     },
26 |     {
27 |         "question": "Shawn has five toys. For Christmas, he got two toys each from his mom and dad. How many toys does he have now?",
28 |         "answer": "He has 5 toys. He got 2 from mom, so after that he has 5 + 2 = 7 toys. Then he got 2 more from dad, so in total he has 7 + 2 = 9 toys. The answer is 9.",
29 |     },
30 |     {
31 |         "question": "There were nine computers in the server room. Five more computers were installed each day, from monday to thursday. How many computers are now in the server room?",
32 |         "answer": "There are 4 days from monday to thursday. 5 computers were added each day. That means in total 4 * 5 = 20 computers were added. There were 9 computers in the beginning, so now there are 9 + 20 = 29 computers. The answer is 29.",
33 |     },
34 |     {
35 |         "question": "Michael had 58 golf balls. On tuesday, he lost 23 golf balls. On wednesday, he lost 2 more. How many golf balls did he have at the end of wednesday?",
36 |         "answer": "Michael initially had 58 balls. He lost 23 on Tuesday, so after that he has 58 - 23 = 35 balls. On Wednesday he lost 2 more so now he has 35 - 2 = 33 balls. The answer is 33.",
37 |     },
38 |     {
39 |         "question": "Olivia has $23. She bought five bagels for $3 each. How much money does she have left?",
40 |         "answer": "She bought 5 bagels for $3 each. This means she spent 5",
41 |     },
42 | ]
43 | 
44 | question = "When I was 6 my sister was half my age. Now I’m 70 how old is my sister?"
45 | 
46 | 
47 | few_shots = Template.from_string(
48 |     """
49 |     {% for example in examples %}
50 |     Q: {{ example.question }}
51 |     A: {{ example.answer }}
52 |     {% endfor %}
53 |     Q: {{ question }}
54 |     A:
55 |     """
56 | )
57 | 
58 | model = models.openai("gpt-4o-mini")
59 | generator = outlines.generate.text(model)
60 | prompt = few_shots(question, examples)
61 | answers = generator(prompt, samples=10)
62 | 
63 | digits = []
64 | for answer in answers:
65 |     try:
66 |         match = re.findall(r"\d+", answer)[-1]
67 |         if match is not None:
68 |             digit = int(match)
69 |             digits.append(digit)
70 |     except AttributeError:
71 |         print(f"Could not parse the completion: '{answer}'")
72 | 
73 | unique_digits, counts = np.unique(digits, return_counts=True)
74 | results = {d: c for d, c in zip(unique_digits, counts)}
75 | print(results)
76 | 
77 | max_count = max(results.values())
78 | answer_value = [key for key, value in results.items() if value == max_count][0]
79 | total_count = sum(results.values())
80 | print(
81 |     f"The most likely answer is {answer_value} ({max_count / total_count * 100}% consensus)"
82 | )
83 | 


--------------------------------------------------------------------------------
/examples/transformers_integration.py:
--------------------------------------------------------------------------------
 1 | """Example of integrating `outlines` with `transformers`."""
 2 | 
 3 | from pydantic import BaseModel
 4 | from transformers import pipeline
 5 | 
 6 | from outlines.integrations.transformers import JSONPrefixAllowedTokens
 7 | 
 8 | 
 9 | class Person(BaseModel):
10 |     first_name: str
11 |     surname: str
12 | 
13 | 
14 | pipe = pipeline("text-generation", model="mistralai/Mistral-7B-v0.1")
15 | prefix_allowed_tokens_fn = JSONPrefixAllowedTokens(
16 |     schema=Person, tokenizer_or_pipe=pipe, whitespace_pattern=r" ?"
17 | )
18 | results = pipe(
19 |     ["He is Tom Jones", "She saw Linda Smith"],
20 |     return_full_text=False,
21 |     do_sample=False,
22 |     max_new_tokens=50,
23 |     prefix_allowed_tokens_fn=prefix_allowed_tokens_fn,
24 | )
25 | print(results)
26 | 


--------------------------------------------------------------------------------
/examples/vllm_integration.py:
--------------------------------------------------------------------------------
 1 | """Example of integrating `outlines` with `vllm`."""
 2 | 
 3 | import vllm
 4 | from pydantic import BaseModel
 5 | from transformers import AutoTokenizer
 6 | 
 7 | from outlines.models.vllm import adapt_tokenizer
 8 | from outlines.processors import JSONLogitsProcessor
 9 | 
10 | 
11 | class Person(BaseModel):
12 |     first_name: str
13 |     surname: str
14 | 
15 | 
16 | MODEL_ID = "mistralai/Mistral-7B-v0.1"
17 | llm = vllm.LLM(model=MODEL_ID, max_model_len=512)
18 | tokenizer = adapt_tokenizer(AutoTokenizer.from_pretrained(MODEL_ID))
19 | logits_processor = JSONLogitsProcessor(
20 |     schema=Person, tokenizer=tokenizer, whitespace_pattern=r" ?"
21 | )
22 | result = llm.generate(
23 |     ["He is Tom Jones", "She saw Linda Smith"],
24 |     sampling_params=vllm.SamplingParams(
25 |         temperature=0.0,
26 |         max_tokens=50,
27 |         logits_processors=[logits_processor],
28 |     ),
29 | )
30 | print(result)
31 | 


--------------------------------------------------------------------------------
/flake.lock:
--------------------------------------------------------------------------------
 1 | {
 2 |   "nodes": {
 3 |     "flake-utils": {
 4 |       "inputs": {
 5 |         "systems": "systems"
 6 |       },
 7 |       "locked": {
 8 |         "lastModified": 1731533236,
 9 |         "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
10 |         "owner": "numtide",
11 |         "repo": "flake-utils",
12 |         "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
13 |         "type": "github"
14 |       },
15 |       "original": {
16 |         "owner": "numtide",
17 |         "repo": "flake-utils",
18 |         "type": "github"
19 |       }
20 |     },
21 |     "nixpkgs": {
22 |       "locked": {
23 |         "lastModified": 1738136902,
24 |         "narHash": "sha256-pUvLijVGARw4u793APze3j6mU1Zwdtz7hGkGGkD87qw=",
25 |         "owner": "NixOS",
26 |         "repo": "nixpkgs",
27 |         "rev": "9a5db3142ce450045840cc8d832b13b8a2018e0c",
28 |         "type": "github"
29 |       },
30 |       "original": {
31 |         "id": "nixpkgs",
32 |         "type": "indirect"
33 |       }
34 |     },
35 |     "root": {
36 |       "inputs": {
37 |         "flake-utils": "flake-utils",
38 |         "nixpkgs": "nixpkgs"
39 |       }
40 |     },
41 |     "systems": {
42 |       "locked": {
43 |         "lastModified": 1681028828,
44 |         "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
45 |         "owner": "nix-systems",
46 |         "repo": "default",
47 |         "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
48 |         "type": "github"
49 |       },
50 |       "original": {
51 |         "owner": "nix-systems",
52 |         "repo": "default",
53 |         "type": "github"
54 |       }
55 |     }
56 |   },
57 |   "root": "root",
58 |   "version": 7
59 | }
60 | 


--------------------------------------------------------------------------------
/flake.nix:
--------------------------------------------------------------------------------
 1 | {
 2 |   inputs.flake-utils.url = "github:numtide/flake-utils";
 3 |   outputs = { self, nixpkgs, flake-utils }:
 4 |     flake-utils.lib.eachDefaultSystem (system:
 5 |       let
 6 |         pkgs = import nixpkgs {
 7 |           inherit system;
 8 |           config.allowUnfree = true;
 9 |         };
10 |       in { devShells.default = import ./shell.nix { inherit pkgs; }; });
11 | }
12 | 


--------------------------------------------------------------------------------
/outlines/__init__.py:
--------------------------------------------------------------------------------
 1 | """Outlines is a Generative Model Programming Framework."""
 2 | 
 3 | import outlines.generate
 4 | import outlines.grammars
 5 | import outlines.models
 6 | import outlines.processors
 7 | import outlines.types
 8 | from outlines.base import vectorize
 9 | from outlines.caching import clear_cache, disable_cache, get_cache
10 | from outlines.function import Function
11 | from outlines.templates import Template, prompt
12 | 
13 | __all__ = [
14 |     "clear_cache",
15 |     "disable_cache",
16 |     "get_cache",
17 |     "Function",
18 |     "prompt",
19 |     "Prompt",
20 |     "vectorize",
21 |     "grammars",
22 | ]
23 | 


--------------------------------------------------------------------------------
/outlines/fsm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/outlines/fsm/__init__.py


--------------------------------------------------------------------------------
/outlines/fsm/json_schema.py:
--------------------------------------------------------------------------------
 1 | import inspect
 2 | import json
 3 | import warnings
 4 | from enum import Enum
 5 | from typing import Callable, Type, Union
 6 | 
 7 | from pydantic import BaseModel, create_model
 8 | 
 9 | 
10 | def convert_json_schema_to_str(json_schema: Union[dict, str, Type[BaseModel]]) -> str:
11 |     """Convert a JSON schema to a string.
12 | 
13 |     Parameters
14 |     ----------
15 |     json_schema
16 |         The JSON schema.
17 | 
18 |     Returns
19 |     -------
20 |     str
21 |         The JSON schema converted to a string.
22 | 
23 |     Raises
24 |     ------
25 |     ValueError
26 |         If the schema is not a dictionary, a string or a Pydantic class.
27 |     """
28 |     if isinstance(json_schema, dict):
29 |         schema_str = json.dumps(json_schema)
30 |     elif isinstance(json_schema, str):
31 |         schema_str = json_schema
32 |     elif issubclass(json_schema, BaseModel):
33 |         schema_str = json.dumps(json_schema.model_json_schema())
34 |     else:
35 |         raise ValueError(
36 |             f"Cannot parse schema {json_schema}. The schema must be either "
37 |             + "a Pydantic class, a dictionary or a string that contains the JSON "
38 |             + "schema specification"
39 |         )
40 |     return schema_str
41 | 
42 | 
43 | def get_schema_from_signature(fn: Callable) -> dict:
44 |     """Turn a function signature into a JSON schema.
45 | 
46 |     Every JSON object valid to the output JSON Schema can be passed
47 |     to `fn` using the ** unpacking syntax.
48 | 
49 |     """
50 |     signature = inspect.signature(fn)
51 |     arguments = {}
52 |     for name, arg in signature.parameters.items():
53 |         if arg.annotation == inspect._empty:
54 |             raise ValueError("Each argument must have a type annotation")
55 |         else:
56 |             arguments[name] = (arg.annotation, ...)
57 | 
58 |     try:
59 |         fn_name = fn.__name__
60 |     except Exception as e:
61 |         fn_name = "Arguments"
62 |         warnings.warn(
63 |             f"The function name could not be determined. Using default name 'Arguments' instead. For debugging, here is exact error:\n{e}",
64 |             category=UserWarning,
65 |         )
66 |     model = create_model(fn_name, **arguments)
67 | 
68 |     return model.model_json_schema()
69 | 
70 | 
71 | def get_schema_from_enum(myenum: type[Enum]) -> dict:
72 |     if len(myenum) == 0:
73 |         raise ValueError(
74 |             f"Your enum class {myenum.__name__} has 0 members. If you are working with an enum of functions, do not forget to register them as callable (using `partial` for instance)"
75 |         )
76 |     choices = [
77 |         get_schema_from_signature(elt.value.func)
78 |         if callable(elt.value)
79 |         else {"const": elt.value}
80 |         for elt in myenum
81 |     ]
82 |     schema = {"title": myenum.__name__, "oneOf": choices}
83 |     return schema
84 | 


--------------------------------------------------------------------------------
/outlines/fsm/types.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | from enum import EnumMeta
 3 | from typing import Any, Protocol, Tuple, Type
 4 | 
 5 | from outlines.types import Regex, boolean as boolean_regex, date as date_regex
 6 | from outlines.types import datetime as datetime_regex
 7 | from outlines.types import (
 8 |     integer as integer_regex,
 9 |     number as number_regex,
10 |     time as time_regex,
11 | )
12 | 
13 | 
14 | class FormatFunction(Protocol):
15 |     def __call__(self, sequence: str) -> Any: ...
16 | 
17 | 
18 | def python_types_to_regex(python_type: Type) -> Tuple[Regex, FormatFunction]:
19 |     # If it is a custom type
20 |     if isinstance(python_type, Regex):
21 |         custom_regex_str = python_type.pattern
22 | 
23 |         def custom_format_fn(sequence: str) -> str:
24 |             return str(sequence)
25 | 
26 |         return Regex(custom_regex_str), custom_format_fn
27 | 
28 |     if isinstance(python_type, EnumMeta):
29 |         values = python_type.__members__.keys()
30 |         enum_regex_str: str = "(" + "|".join(values) + ")"
31 | 
32 |         def enum_format_fn(sequence: str) -> str:
33 |             return str(sequence)
34 | 
35 |         return Regex(enum_regex_str), enum_format_fn
36 | 
37 |     if python_type is float:
38 | 
39 |         def float_format_fn(sequence: str) -> float:
40 |             return float(sequence)
41 | 
42 |         return number_regex, float_format_fn
43 |     elif python_type is int:
44 | 
45 |         def int_format_fn(sequence: str) -> int:
46 |             return int(sequence)
47 | 
48 |         return integer_regex, int_format_fn
49 |     elif python_type is bool:
50 | 
51 |         def bool_format_fn(sequence: str) -> bool:
52 |             return bool(sequence)
53 | 
54 |         return boolean_regex, bool_format_fn
55 |     elif python_type == datetime.date:
56 | 
57 |         def date_format_fn(sequence: str) -> datetime.date:
58 |             return datetime.datetime.strptime(sequence, "%Y-%m-%d").date()
59 | 
60 |         return date_regex, date_format_fn
61 |     elif python_type == datetime.time:
62 | 
63 |         def time_format_fn(sequence: str) -> datetime.time:
64 |             return datetime.datetime.strptime(sequence, "%H:%M:%S").time()
65 | 
66 |         return time_regex, time_format_fn
67 |     elif python_type == datetime.datetime:
68 | 
69 |         def datetime_format_fn(sequence: str) -> datetime.datetime:
70 |             return datetime.datetime.strptime(sequence, "%Y-%m-%d %H:%M:%S")
71 | 
72 |         return datetime_regex, datetime_format_fn
73 |     else:
74 |         raise NotImplementedError(
75 |             f"The Python type {python_type} is not supported. Please open an issue."
76 |         )
77 | 


--------------------------------------------------------------------------------
/outlines/function.py:
--------------------------------------------------------------------------------
  1 | import importlib.util
  2 | from dataclasses import dataclass
  3 | from typing import TYPE_CHECKING, Callable, Optional, Tuple, Union
  4 | 
  5 | import requests
  6 | 
  7 | from outlines import generate, models
  8 | 
  9 | if TYPE_CHECKING:
 10 |     from outlines.generate.api import SequenceGenerator
 11 |     from outlines.templates import Template
 12 | 
 13 | 
 14 | @dataclass
 15 | class Function:
 16 |     """Represents an Outlines function.
 17 | 
 18 |     Functions are a convenient way to encapsulate a prompt template, a language
 19 |     model and a Pydantic model that define the output structure. Once defined,
 20 |     the function can be called with arguments that will be used to render the
 21 |     prompt template.
 22 | 
 23 |     """
 24 | 
 25 |     prompt_template: "Template"
 26 |     schema: Union[str, Callable, object]
 27 |     model_name: str
 28 |     generator: Optional["SequenceGenerator"] = None
 29 | 
 30 |     @classmethod
 31 |     def from_github(cls, program_path: str, function_name: str = "fn"):
 32 |         """Load a function stored on GitHub"""
 33 |         program_content = download_from_github(program_path)
 34 |         function = extract_function_from_file(program_content, function_name)
 35 | 
 36 |         return function
 37 | 
 38 |     def init_generator(self):
 39 |         """Load the model and initialize the generator."""
 40 |         model = models.transformers(self.model_name)
 41 |         self.generator = generate.json(model, self.schema)
 42 | 
 43 |     def __call__(self, *args, **kwargs):
 44 |         """Call the function.
 45 | 
 46 |         .. warning::
 47 | 
 48 |            This currently does not support batching.
 49 | 
 50 |         Parameters
 51 |         ----------
 52 |         args
 53 |             Values to pass to the prompt template as positional arguments.
 54 |         kwargs
 55 |             Values to pass to the prompt template as keyword arguments.
 56 | 
 57 |         """
 58 |         if self.generator is None:
 59 |             self.init_generator()
 60 | 
 61 |         prompt = self.prompt_template(*args, **kwargs)
 62 |         return self.generator(prompt)
 63 | 
 64 | 
 65 | def download_from_github(short_path: str):
 66 |     """Download the file in which the function is stored on GitHub."""
 67 |     GITHUB_BASE_URL = "https://raw.githubusercontent.com"
 68 |     BRANCH = "main"
 69 | 
 70 |     path = short_path.split("/")
 71 |     if len(path) < 3:
 72 |         raise ValueError(
 73 |             "Please provide a valid path in the form {USERNAME}/{REPO_NAME}/{PATH_TO_FILE}."
 74 |         )
 75 |     elif short_path[-3:] == ".py":
 76 |         raise ValueError("Do not append the `.py` extension to the program name.")
 77 | 
 78 |     username = path[0]
 79 |     repo = path[1]
 80 |     path_to_file = path[2:]
 81 | 
 82 |     url = "/".join([GITHUB_BASE_URL, username, repo, BRANCH] + path_to_file) + ".py"
 83 |     result = requests.get(url)
 84 | 
 85 |     if result.status_code == 200:
 86 |         return result.text
 87 |     elif result.status_code == 404:
 88 |         raise ValueError(
 89 |             f"Program could not be found at {url}. Please make sure you entered the GitHub username, repository name and path to the program correctly."
 90 |         )
 91 |     else:
 92 |         result.raise_for_status()
 93 | 
 94 | 
 95 | def extract_function_from_file(content: str, function_name: str) -> Tuple[Callable]:
 96 |     """Extract a function object from a downloaded file."""
 97 | 
 98 |     spec = importlib.util.spec_from_loader(
 99 |         "outlines_function", loader=None, origin="github"
100 |     )
101 |     if spec is not None:
102 |         module = importlib.util.module_from_spec(spec)
103 |         exec(content, module.__dict__)
104 | 
105 |         try:
106 |             fn = getattr(module, function_name)
107 |         except AttributeError:
108 |             raise AttributeError(
109 |                 "Could not find an `outlines.Function` instance in the remote file. Make sure that the path you specified is correct."
110 |             )
111 | 
112 |         if not isinstance(fn, module.outlines.Function):
113 |             raise TypeError(
114 |                 f"The `{function_name}` variable in the program must be an instance of `outlines.Function`"
115 |             )
116 | 
117 |     return fn
118 | 


--------------------------------------------------------------------------------
/outlines/generate/__init__.py:
--------------------------------------------------------------------------------
1 | from .api import SequenceGenerator
2 | from .cfg import cfg
3 | from .choice import choice
4 | from .format import format
5 | from .fsm import fsm
6 | from .json import json
7 | from .regex import regex
8 | from .text import text
9 | 


--------------------------------------------------------------------------------
/outlines/generate/cfg.py:
--------------------------------------------------------------------------------
 1 | from functools import singledispatch
 2 | 
 3 | from outlines.generate.api import (
 4 |     SequenceGeneratorAdapter,
 5 |     VisionSequenceGeneratorAdapter,
 6 | )
 7 | from outlines.models import LlamaCpp, OpenAI, TransformersVision
 8 | from outlines.samplers import Sampler, multinomial
 9 | 
10 | 
11 | @singledispatch
12 | def cfg(
13 |     model, cfg_str: str, sampler: Sampler = multinomial()
14 | ) -> SequenceGeneratorAdapter:
15 |     """Generate text in the language of a Context-Free Grammar
16 | 
17 |     Arguments
18 |     ---------
19 |     model:
20 |         An `outlines.model` instance.
21 |     sampler:
22 |         The sampling algorithm to use to generate token ids from the logits
23 |         distribution.
24 | 
25 |     Returns
26 |     -------
27 |     A `SequenceGeneratorAdapter` instance that generates text.
28 | 
29 |     """
30 |     from outlines.processors import CFGLogitsProcessor
31 | 
32 |     logits_processor = CFGLogitsProcessor(cfg_str, tokenizer=model.tokenizer)
33 |     return SequenceGeneratorAdapter(model, logits_processor, sampler)
34 | 
35 | 
36 | @cfg.register(TransformersVision)
37 | def cfg_vision(model, cfg_str: str, sampler: Sampler = multinomial()):
38 |     from outlines.processors import CFGLogitsProcessor
39 | 
40 |     logits_processor = CFGLogitsProcessor(cfg_str, tokenizer=model.tokenizer)
41 |     return VisionSequenceGeneratorAdapter(model, logits_processor, sampler)
42 | 
43 | 
44 | @cfg.register(LlamaCpp)
45 | def cfg_llamacpp(model, cfg_str: str, sampler: Sampler = multinomial()):
46 |     raise NotImplementedError("Not yet available due to bug in llama_cpp tokenizer")
47 | 
48 | 
49 | @cfg.register(OpenAI)
50 | def cfg_openai(model, cfg_str: str, sampler: Sampler = multinomial()):
51 |     raise NotImplementedError(
52 |         "Cannot use grammar-structured generation with an OpenAI model"
53 |         + "due to the limitations of the OpenAI API."
54 |     )
55 | 


--------------------------------------------------------------------------------
/outlines/generate/choice.py:
--------------------------------------------------------------------------------
 1 | import json as pyjson
 2 | import re
 3 | from enum import Enum
 4 | from functools import singledispatch
 5 | from typing import Callable, List, Union
 6 | 
 7 | from outlines_core.fsm.json_schema import build_regex_from_schema
 8 | 
 9 | from outlines.fsm.json_schema import get_schema_from_enum
10 | from outlines.generate.api import SequenceGeneratorAdapter
11 | from outlines.models import OpenAI
12 | from outlines.samplers import Sampler, multinomial
13 | 
14 | from .json import json
15 | from .regex import regex
16 | 
17 | 
18 | @singledispatch
19 | def choice(
20 |     model, choices: Union[List[str], type[Enum]], sampler: Sampler = multinomial()
21 | ) -> SequenceGeneratorAdapter:
22 |     if isinstance(choices, type(Enum)):
23 |         regex_str = build_regex_from_schema(pyjson.dumps(get_schema_from_enum(choices)))
24 |     else:
25 |         choices = [re.escape(choice) for choice in choices]  # type: ignore
26 |         regex_str = r"(" + r"|".join(choices) + r")"
27 | 
28 |     generator = regex(model, regex_str, sampler)
29 |     if isinstance(choices, type(Enum)):
30 |         generator.format_sequence = lambda x: pyjson.loads(x)
31 |     else:
32 |         generator.format_sequence = lambda x: x
33 | 
34 |     return generator
35 | 
36 | 
37 | @choice.register(OpenAI)
38 | def choice_openai(
39 |     model: OpenAI, choices: List[str], sampler: Sampler = multinomial()
40 | ) -> Callable:
41 |     """
42 |     Call OpenAI API with response_format of a dict:
43 |     {"result": <one of choices>}
44 |     """
45 | 
46 |     choices_schema = pyjson.dumps(
47 |         {
48 |             "type": "object",
49 |             "properties": {"result": {"type": "string", "enum": choices}},
50 |             "additionalProperties": False,
51 |             "required": ["result"],
52 |         }
53 |     )
54 |     generator = json(model, choices_schema, sampler)
55 | 
56 |     def generate_choice(*args, **kwargs):
57 |         return generator(*args, **kwargs)["result"]
58 | 
59 |     return generate_choice
60 | 


--------------------------------------------------------------------------------
/outlines/generate/format.py:
--------------------------------------------------------------------------------
 1 | from functools import singledispatch
 2 | 
 3 | from outlines.fsm.types import python_types_to_regex
 4 | from outlines.generate.api import SequenceGeneratorAdapter
 5 | from outlines.models import OpenAI
 6 | from outlines.samplers import Sampler, multinomial
 7 | 
 8 | from .regex import regex
 9 | 
10 | 
11 | @singledispatch
12 | def format(
13 |     model, python_type, sampler: Sampler = multinomial()
14 | ) -> SequenceGeneratorAdapter:
15 |     """Generate structured data that can be parsed as a Python type.
16 | 
17 |     Parameters
18 |     ----------
19 |     model:
20 |         An instance of `Transformer` that represents a model from the
21 |         `transformers` library.
22 |     python_type:
23 |         A Python type. The output of the generator must be parseable into
24 |         this type.
25 |     sampler:
26 |         The sampling algorithm to use to generate token ids from the logits
27 |         distribution.
28 | 
29 |     Returns
30 |     -------
31 |     A `SequenceGenerator` instance that generates text constrained by the Python type
32 |     and translates this text into the corresponding type.
33 | 
34 |     """
35 |     regex_str, format_fn = python_types_to_regex(python_type)
36 |     regex_str = regex_str.pattern
37 |     generator = regex(model, regex_str, sampler)
38 |     generator.format_sequence = format_fn
39 | 
40 |     return generator
41 | 
42 | 
43 | @format.register(OpenAI)
44 | def format_openai(model, python_type, sampler: Sampler = multinomial()):
45 |     raise NotImplementedError(
46 |         "Cannot use Python type-structured generation with an OpenAI model"
47 |         + " due to the limitations of the OpenAI API."
48 |     )
49 | 


--------------------------------------------------------------------------------
/outlines/generate/fsm.py:
--------------------------------------------------------------------------------
 1 | from functools import singledispatch
 2 | 
 3 | import interegular
 4 | 
 5 | from outlines.fsm.guide import RegexGuide
 6 | from outlines.generate.api import (
 7 |     SequenceGeneratorAdapter,
 8 |     VisionSequenceGeneratorAdapter,
 9 | )
10 | from outlines.models import TransformersVision
11 | from outlines.samplers import Sampler, multinomial
12 | 
13 | 
14 | @singledispatch
15 | def fsm(
16 |     model, fsm: interegular.fsm.FSM, sampler: Sampler = multinomial()
17 | ) -> SequenceGeneratorAdapter:
18 |     from outlines.processors import GuideLogitsProcessor
19 | 
20 |     guide = RegexGuide.from_interegular_fsm(fsm, model.tokenizer)
21 |     logits_processor = GuideLogitsProcessor(tokenizer=model.tokenizer, guide=guide)
22 |     return SequenceGeneratorAdapter(model, logits_processor, sampler)
23 | 
24 | 
25 | @fsm.register(TransformersVision)
26 | def fsm_vision(model, fsm: interegular.fsm.FSM, sampler: Sampler = multinomial()):
27 |     from outlines.processors import GuideLogitsProcessor
28 | 
29 |     guide = RegexGuide.from_interegular_fsm(fsm, model.tokenizer)
30 |     logits_processor = GuideLogitsProcessor(tokenizer=model.tokenizer, guide=guide)
31 |     return VisionSequenceGeneratorAdapter(model, logits_processor, sampler)
32 | 


--------------------------------------------------------------------------------
/outlines/generate/regex.py:
--------------------------------------------------------------------------------
 1 | from functools import singledispatch
 2 | 
 3 | from outlines.generate.api import (
 4 |     SequenceGeneratorAdapter,
 5 |     VisionSequenceGeneratorAdapter,
 6 | )
 7 | from outlines.models import OpenAI, TransformersVision
 8 | from outlines.samplers import Sampler, multinomial
 9 | from outlines.types import Regex
10 | 
11 | 
12 | @singledispatch
13 | def regex(model, regex_str: str | Regex, sampler: Sampler = multinomial()):
14 |     """Generate structured text in the language of a regular expression.
15 | 
16 |     Parameters
17 |     ----------
18 |     model:
19 |         An instance of `Transformer` that represents a model from the
20 |         `transformers` library.
21 |     regex_str:
22 |         The regular expression that the output must follow.
23 |     sampler:
24 |         The sampling algorithm to use to generate token ids from the logits
25 |         distribution.
26 | 
27 |     Returns
28 |     -------
29 |     A `SequenceGeneratorAdapter` instance that generates text constrained by the
30 |     regular expression.
31 | 
32 |     """
33 |     from outlines.processors import RegexLogitsProcessor
34 | 
35 |     if isinstance(regex_str, Regex):
36 |         regex_str = regex_str.pattern
37 | 
38 |     logits_processor = RegexLogitsProcessor(regex_str, tokenizer=model.tokenizer)
39 |     return SequenceGeneratorAdapter(model, logits_processor, sampler)
40 | 
41 | 
42 | @regex.register(TransformersVision)
43 | def regex_vision(
44 |     model,
45 |     regex_str: str | Regex,
46 |     sampler: Sampler = multinomial(),
47 | ):
48 |     from outlines.processors import RegexLogitsProcessor
49 | 
50 |     if isinstance(regex_str, Regex):
51 |         regex_str = regex_str.pattern
52 | 
53 |     logits_processor = RegexLogitsProcessor(regex_str, tokenizer=model.tokenizer)
54 |     return VisionSequenceGeneratorAdapter(model, logits_processor, sampler)
55 | 
56 | 
57 | @regex.register(OpenAI)
58 | def regex_openai(
59 |     model: OpenAI,
60 |     regex_str: str,
61 |     sampler: Sampler = multinomial(),
62 | ):
63 |     raise NotImplementedError(
64 |         "Cannot use regex-structured generation with an OpenAI model"
65 |         + "due to the limitations of the OpenAI API."
66 |     )
67 | 


--------------------------------------------------------------------------------
/outlines/generate/text.py:
--------------------------------------------------------------------------------
 1 | from functools import singledispatch
 2 | 
 3 | from outlines.generate.api import (
 4 |     SequenceGeneratorAdapter,
 5 |     VisionSequenceGeneratorAdapter,
 6 | )
 7 | from outlines.models import OpenAI, TransformersVision
 8 | from outlines.samplers import Sampler, multinomial
 9 | 
10 | 
11 | @singledispatch
12 | def text(model, sampler: Sampler = multinomial()) -> SequenceGeneratorAdapter:
13 |     """Generate text with a `Transformer` model.
14 | 
15 |     Note
16 |     ----
17 |     Python 3.11 allows dispatching on Union types and
18 |     this should greatly simplify the code.
19 | 
20 |     Arguments
21 |     ---------
22 |     model:
23 |         An instance of `Transformer` that represents a model from the
24 |         `transformers` library.
25 |     sampler:
26 |         The sampling algorithm to use to generate token ids from the logits
27 |         distribution.
28 | 
29 |     Returns
30 |     -------
31 |     A `SequenceGeneratorAdapter` instance that generates text.
32 | 
33 |     """
34 |     return SequenceGeneratorAdapter(model, None, sampler)
35 | 
36 | 
37 | @text.register(TransformersVision)
38 | def text_vision(model, sampler: Sampler = multinomial()):
39 |     return VisionSequenceGeneratorAdapter(model, None, sampler)
40 | 
41 | 
42 | @text.register(OpenAI)
43 | def text_openai(model: OpenAI, sampler: Sampler = multinomial()) -> OpenAI:
44 |     if not isinstance(sampler, multinomial):
45 |         raise NotImplementedError(
46 |             r"The OpenAI API does not support any other sampling algorithm "
47 |             + "than the multinomial sampler."
48 |         )
49 | 
50 |     return model
51 | 


--------------------------------------------------------------------------------
/outlines/grammars.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | GRAMMAR_PATH = Path(__file__).parent / "grammars"
 4 | 
 5 | 
 6 | def read_grammar(grammar_file_name, base_grammar_path=GRAMMAR_PATH):
 7 |     """Read grammar file from default grammar path"""
 8 |     full_path = base_grammar_path / grammar_file_name
 9 |     with open(full_path) as file:
10 |         return file.read()
11 | 
12 | 
13 | arithmetic = read_grammar("arithmetic.lark")
14 | json = read_grammar("json.lark")
15 | 


--------------------------------------------------------------------------------
/outlines/grammars/arithmetic.lark:
--------------------------------------------------------------------------------
 1 | ?start: sum
 2 | 
 3 | ?sum: product
 4 | | sum "+" product   -> add
 5 | | sum "-" product   -> sub
 6 | 
 7 | ?product: atom
 8 | | product "*" atom  -> mul
 9 | | product "/" atom  -> div
10 | 
11 | ?atom: NUMBER           -> number
12 | | "-" atom         -> neg
13 | | "(" sum ")"
14 | 
15 | %import common.NUMBER
16 | %import common.WS_INLINE
17 | 
18 | %ignore WS_INLINE
19 | 


--------------------------------------------------------------------------------
/outlines/grammars/common.lark:
--------------------------------------------------------------------------------
 1 | // Adapted from https://github.com/lark-parser/lark/blob/master/lark/grammars/common.lark
 2 | 
 3 | // Lark License:
 4 | // Copyright © 2017 Erez Shinan
 5 | //
 6 | // Permission is hereby granted, free of charge, to any person obtaining a copy of
 7 | // this software and associated documentation files (the "Software"), to deal in
 8 | // the Software without restriction, including without limitation the rights to
 9 | // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
10 | // the Software, and to permit persons to whom the Software is furnished to do so,
11 | // subject to the following conditions:
12 | //
13 | // The above copyright notice and this permission notice shall be included in all
14 | // copies or substantial portions of the Software.
15 | //
16 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
18 | // FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR
19 | // COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
20 | // IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 | // CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 | 
23 | 
24 | // Basic terminals for common use
25 | 
26 | 
27 | //
28 | // Numbers
29 | //
30 | 
31 | DIGIT: "0".."9"
32 | HEXDIGIT: "a".."f"|"A".."F"|DIGIT
33 | 
34 | INT: DIGIT+
35 | SIGNED_INT: ["+"|"-"] INT
36 | DECIMAL: INT "." INT? | "." INT
37 | 
38 | // float = /-?\d+(\.\d+)?([eE][+-]?\d+)?/
39 | _EXP: ("e"|"E") SIGNED_INT
40 | FLOAT: INT _EXP | DECIMAL _EXP?
41 | SIGNED_FLOAT: ["+"|"-"] FLOAT
42 | 
43 | NUMBER: FLOAT | INT
44 | SIGNED_NUMBER: ["+"|"-"] NUMBER
45 | 
46 | UNESCAPED_STRING: /\"[^"]*\"/
47 | 
48 | // based on `outlines/fsm/json_schema.py`
49 | _NON_CONTROL_CHAR: /([^"\\\x00-\x1F\x7F-\x9F])/
50 | _ESCAPED_CHAR: /\\/ (_NON_CONTROL_CHAR | /\\/ | /"/)
51 | ESCAPED_STRING_INNER: _NON_CONTROL_CHAR | _ESCAPED_CHAR
52 | ESCAPED_STRING: /"/ ESCAPED_STRING_INNER* /"/
53 | 
54 | 
55 | 
56 | //
57 | // Names (Variables)
58 | //
59 | LCASE_LETTER: "a".."z"
60 | UCASE_LETTER: "A".."Z"
61 | 
62 | LETTER: UCASE_LETTER | LCASE_LETTER
63 | WORD: LETTER+
64 | 
65 | CNAME: ("_"|LETTER) ("_"|LETTER|DIGIT)*
66 | 
67 | 
68 | //
69 | // Whitespace
70 | //
71 | WS_INLINE: (" "|/\t/)+
72 | WS: /[ \t\f\r\n]/+
73 | 
74 | CR : /\r/
75 | LF : /\n/
76 | NEWLINE: (CR? LF)+
77 | 
78 | 
79 | // Comments
80 | SH_COMMENT: /#[^\n]*/
81 | CPP_COMMENT: /\/\/[^\n]*/
82 | C_COMMENT: "/*" /(.|\n)*?/ "*/"
83 | SQL_COMMENT: /--[^\n]*/
84 | 


--------------------------------------------------------------------------------
/outlines/grammars/json.lark:
--------------------------------------------------------------------------------
 1 | ?start: value
 2 | 
 3 | ?value: object
 4 | | array
 5 | | ESCAPED_STRING
 6 | | SIGNED_NUMBER      -> number
 7 | | "true"             -> true
 8 | | "false"            -> false
 9 | | "null"             -> null
10 | 
11 | array  : "[" [value ("," value)*] "]"
12 | object : "{" [pair ("," pair)*] "}"
13 | pair   : ESCAPED_STRING ":" value
14 | 
15 | %import common.ESCAPED_STRING
16 | %import common.SIGNED_NUMBER
17 | %import common.WS
18 | 
19 | %ignore WS
20 | 


--------------------------------------------------------------------------------
/outlines/models/__init__.py:
--------------------------------------------------------------------------------
 1 | """Module that contains all the models integrated in outlines.
 2 | 
 3 | We group the models in submodules by provider instead of theme (completion, chat
 4 | completion, diffusers, etc.) and use routing functions everywhere else in the
 5 | codebase.
 6 | 
 7 | """
 8 | 
 9 | from typing import Union
10 | 
11 | from .exllamav2 import ExLlamaV2Model, exl2
12 | from .llamacpp import LlamaCpp, llamacpp
13 | from .mlxlm import MLXLM, mlxlm
14 | from .openai import OpenAI, azure_openai, openai
15 | from .transformers import Transformers, TransformerTokenizer, mamba, transformers
16 | from .transformers_vision import TransformersVision, transformers_vision
17 | from .vllm import VLLM, vllm
18 | 
19 | LogitsGenerator = Union[Transformers, LlamaCpp, OpenAI, ExLlamaV2Model, MLXLM, VLLM]
20 | 


--------------------------------------------------------------------------------
/outlines/models/tokenizer.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, Hashable, List, Protocol, Set, Tuple, Union
 2 | 
 3 | import numpy as np
 4 | from numpy.typing import NDArray
 5 | 
 6 | 
 7 | class Tokenizer(Hashable, Protocol):
 8 |     eos_token: str
 9 |     eos_token_id: int
10 |     pad_token_id: int
11 |     vocabulary: Dict[str, int]
12 |     special_tokens: Set[str]
13 | 
14 |     def encode(
15 |         self, prompt: Union[str, List[str]]
16 |     ) -> Tuple[NDArray[np.int64], NDArray[np.int64]]:
17 |         """Translate the input prompts into arrays of token ids and attention mask."""
18 |         ...
19 | 
20 |     def decode(self, token_ids: NDArray[np.int64]) -> List[str]:
21 |         """Translate an array of token ids to a string or list of strings."""
22 |         ...
23 | 
24 |     def convert_token_to_string(self, token: str) -> str:
25 |         """Convert a token to its equivalent string.
26 | 
27 |         This is for instance useful for BPE tokenizers where whitespaces are
28 |         represented by the special characted `Ġ`. This prevents matching a raw
29 |         token that includes `Ġ` with a string.
30 |         """
31 |         ...
32 | 


--------------------------------------------------------------------------------
/outlines/processors/__init__.py:
--------------------------------------------------------------------------------
1 | from .structured import (
2 |     CFGLogitsProcessor,
3 |     GuideLogitsProcessor,
4 |     JSONLogitsProcessor,
5 |     OutlinesLogitsProcessor,
6 |     RegexLogitsProcessor,
7 | )
8 | 


--------------------------------------------------------------------------------
/outlines/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/outlines/py.typed


--------------------------------------------------------------------------------
/outlines/serve/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/outlines/serve/__init__.py


--------------------------------------------------------------------------------
/outlines/types/__init__.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | 
 3 | from . import airports, countries, locale
 4 | from outlines.types.dsl import (
 5 |     Regex,
 6 |     json_schema,
 7 |     regex,
 8 |     either,
 9 |     optional,
10 |     exactly,
11 |     at_least,
12 |     at_most,
13 |     between,
14 |     one_or_more,
15 |     zero_or_more,
16 | )
17 | 
18 | # Python types
19 | integer = Regex(r"[+-]?(0|[1-9][0-9]*)")
20 | boolean = Regex("(True|False)")
21 | number = Regex(rf"{integer.pattern}(\.[0-9]+)?([eE][+-][0-9]+)?")
22 | date = Regex(r"(\d{4})-(0[1-9]|1[0-2])-([0-2][0-9]|3[0-1])")
23 | time = Regex(r"([0-1][0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9])")
24 | datetime = Regex(rf"({date.pattern})(\s)({time.pattern})")
25 | 
26 | # Basic regex types
27 | digit = Regex(r"\d")
28 | char = Regex(r"\w")
29 | newline = Regex(r"(\r\n|\r|\n)")  # Matched new lines on Linux, Windows & MacOS
30 | whitespace = Regex(r"\s")
31 | 
32 | # Document-specific types
33 | sentence = Regex(r"[A-Z].*\s*[.!?]")
34 | paragraph = Regex(rf"{sentence.pattern}(?:\s+{sentence.pattern})*\n+")
35 | 
36 | 
37 | # The following regex is FRC 5322 compliant and was found at:
38 | # https://emailregex.com/
39 | email = Regex(
40 |     r"""(?:[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*|"(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])*")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\[(?:(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9]))\.){3}(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9])|[a-z0-9-]*[a-z0-9]:(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x53-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])+)\])"""
41 | )
42 | 
43 | # Matches any ISBN number. Note that this is not completely correct as not all
44 | # 10 or 13 digits numbers are valid ISBNs. See https://en.wikipedia.org/wiki/ISBN
45 | # Taken from O'Reilly's Regular Expression Cookbook:
46 | # https://www.oreilly.com/library/view/regular-expressions-cookbook/9781449327453/ch04s13.html
47 | #
48 | # TODO: The check digit can only be computed by calling a function to compute it dynamically
49 | isbn = Regex(
50 |     r"(?:ISBN(?:-1[03])?:? )?(?=[0-9X]{10}$|(?=(?:[0-9]+[- ]){3})[- 0-9X]{13}$|97[89][0-9]{10}$|(?=(?:[0-9]+[- ]){4})[- 0-9]{17}$)(?:97[89][- ]?)?[0-9]{1,5}[- ]?[0-9]+[- ]?[0-9]+[- ]?[0-9X]"
51 | )
52 | 


--------------------------------------------------------------------------------
/outlines/types/airports.py:
--------------------------------------------------------------------------------
 1 | """Generate valid airport codes."""
 2 | 
 3 | from enum import Enum
 4 | 
 5 | import airportsdata
 6 | 
 7 | AIRPORT_IATA_LIST = [
 8 |     (v["iata"], v["iata"]) for v in airportsdata.load().values() if v["iata"]
 9 | ]
10 | IATA = Enum("Airport", AIRPORT_IATA_LIST)  # type:ignore
11 | 


--------------------------------------------------------------------------------
/outlines/types/countries.py:
--------------------------------------------------------------------------------
 1 | """Generate valid country codes and names."""
 2 | 
 3 | from enum import Enum
 4 | 
 5 | from iso3166 import countries
 6 | 
 7 | 
 8 | def get_country_flags():
 9 |     """Generate Unicode flags for all ISO 3166-1 alpha-2 country codes in Alpha2 Enum."""
10 |     base = ord("🇦")
11 |     return {
12 |         code.name: chr(base + ord(code.name[0]) - ord("A"))
13 |         + chr(base + ord(code.name[1]) - ord("A"))
14 |         for code in Alpha2
15 |     }
16 | 
17 | 
18 | ALPHA_2_CODE = [(country.alpha2, country.alpha2) for country in countries]
19 | Alpha2 = Enum("Alpha_2", ALPHA_2_CODE)  # type:ignore
20 | 
21 | ALPHA_3_CODE = [(country.alpha3, country.alpha3) for country in countries]
22 | Alpha3 = Enum("Alpha_3", ALPHA_3_CODE)  # type:ignore
23 | 
24 | NUMERIC_CODE = [(str(country.numeric), str(country.numeric)) for country in countries]
25 | Numeric = Enum("Numeric_code", NUMERIC_CODE)  # type:ignore
26 | 
27 | NAME = [(country.name, country.name) for country in countries]
28 | Name = Enum("Name", NAME)  # type:ignore
29 | 
30 | flag_mapping = get_country_flags()
31 | FLAG = [(flag, flag) for code, flag in flag_mapping.items()]
32 | Flag = Enum("Flag", FLAG)  # type:ignore
33 | 


--------------------------------------------------------------------------------
/outlines/types/locale/__init__.py:
--------------------------------------------------------------------------------
1 | from . import us
2 | 


--------------------------------------------------------------------------------
/outlines/types/locale/us.py:
--------------------------------------------------------------------------------
1 | from outlines.types.dsl import Regex
2 | 
3 | zip_code = Regex(r"\d{5}(?:-\d{4})?")
4 | phone_number = Regex(r"(\([0-9]{3}\) |[0-9]{3}-)[0-9]{3}-[0-9]{4}")
5 | 


--------------------------------------------------------------------------------
/requirements-doc.txt:
--------------------------------------------------------------------------------
 1 | mkdocs
 2 | mkdocs-material
 3 | mkdocs-material[imaging]
 4 | mkdocs-mermaid2-plugin
 5 | mkdocs-section-index
 6 | mkdocstrings[python]
 7 | mkdocs-git-committers-plugin-2
 8 | mkdocs-git-revision-date-localized-plugin
 9 | mike
10 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length = 88
3 | select = C,E,F,W
4 | ignore = E203,E231,E501,E741,W503,W504,C901,E731
5 | per-file-ignores =
6 |     **/__init__.py:F401,F403
7 | exclude =
8 |     normalai/_version.py
9 | 


--------------------------------------------------------------------------------
/shell.nix:
--------------------------------------------------------------------------------
 1 | { pkgs ? import <nixpkgs> { config = { allowUnfree = true; }; } }:
 2 | 
 3 | (pkgs.buildFHSEnv {
 4 |   name = "dottxt-ai";
 5 |   targetPkgs = pkgs:
 6 |     with pkgs; [
 7 |       autoconf
 8 |       binutils
 9 |       cmake
10 |       cudatoolkit
11 |       curl
12 |       freeglut
13 |       gcc13
14 |       git
15 |       gitRepo
16 |       gnumake
17 |       gnupg
18 |       gperf
19 |       libGL
20 |       libGLU
21 |       linuxPackages.nvidia_x11
22 |       m4
23 |       ncurses5
24 |       procps
25 |       python311
26 |       stdenv.cc
27 |       unzip
28 |       util-linux
29 |       uv
30 |       xorg.libX11
31 |       xorg.libXext
32 |       xorg.libXi
33 |       xorg.libXmu
34 |       xorg.libXrandr
35 |       xorg.libXv
36 |       zlib
37 |     ];
38 | 
39 |   multiPkgs = pkgs: with pkgs; [ zlib ];
40 | 
41 |   runScript = "bash";
42 | 
43 |   profile = ''
44 |     # CUDA paths
45 |     export CUDA_HOME=${pkgs.cudatoolkit}
46 |     export CUDA_PATH=${pkgs.cudatoolkit}
47 | 
48 |     # Ensure proper binary paths are included
49 |     export PATH=${pkgs.gcc13}/bin:${pkgs.cudatoolkit}/bin:$PATH
50 | 
51 |     # Set library paths, including additional directories for CUPTI
52 |     export LD_LIBRARY_PATH=${pkgs.cudatoolkit}/lib64:${pkgs.cudatoolkit}/extras/CUPTI/lib64:${pkgs.linuxPackages.nvidia_x11}/lib:$LD_LIBRARY_PATH
53 | 
54 |     # Add static library paths to EXTRA_LDFLAGS for the linker
55 |     export EXTRA_LDFLAGS="-L${pkgs.cudatoolkit}/lib64 -L${pkgs.cudatoolkit}/extras/CUPTI/lib64 -L${pkgs.linuxPackages.nvidia_x11}/lib -L${pkgs.cudatoolkit}/libdevice $EXTRA_LDFLAGS"
56 |     export EXTRA_CCFLAGS="-I${pkgs.cudatoolkit}/include $EXTRA_CCFLAGS"
57 | 
58 |     # Set CMake paths
59 |     export CMAKE_PREFIX_PATH=${pkgs.cudatoolkit}:${pkgs.linuxPackages.nvidia_x11}:$CMAKE_PREFIX_PATH
60 | 
61 |     # C++ and CC flags
62 |     export CXXFLAGS="--std=c++17 $EXTRA_CCFLAGS"
63 |     export CC=${pkgs.gcc13}/bin/gcc
64 |     export CXX=${pkgs.gcc13}/bin/g++
65 | 
66 |     # NVCC flags to use the right compiler
67 |     export NVCC_FLAGS="-ccbin ${pkgs.gcc13}/bin/gcc"
68 |   '';
69 | 
70 |   structuredAttrs__ = {
71 |     stdenv = pkgs.stdenv.overrideCC pkgs.stdenv.cc pkgs.gcc13;
72 |   };
73 | }).env
74 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/tests/__init__.py


--------------------------------------------------------------------------------
/tests/cfg_samples/arithmetic/lots_of_ops.arithmetic.test:
--------------------------------------------------------------------------------
1 | 5+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1
2 | 


--------------------------------------------------------------------------------
/tests/cfg_samples/arithmetic/simple_math.arithmetic.test:
--------------------------------------------------------------------------------
1 | (1 * 2) - (0.1 * 2 * 9.42)
2 | 


--------------------------------------------------------------------------------
/tests/cfg_samples/json/simple_fruit.json.test:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "ID": "1",
 4 |         "Name": "Andrew \"The Escaper\" Lapp",
 5 |         "Age": "30",
 6 |         "FavFruit": "Banana"
 7 |     },
 8 |     {
 9 |         "ID": "2",
10 |         "Name": "Mohammad",
11 |         "Age": "40",
12 |         "FavFruit": "\"Any Fruit As Long as It's In Quotes!\""
13 |     },
14 |     {
15 |         "ID": "3",
16 |         "Name": "Alice",
17 |         "Age": "61",
18 |         "FavFruit": "Peaches, but only \n newline separated peaches"
19 |     }
20 | ]
21 | 


--------------------------------------------------------------------------------
/tests/cfg_samples/json/simple_fruit_no_indent.json.test:
--------------------------------------------------------------------------------
1 | [{"ID": "1", "Name": "Andrew", "Age": "30", "FavFruit": "Banana"}, {"ID": "2", "Name": "Mohammad", "Age": "40", "FavFruit": "Apple"}, {"ID": "3", "Name": "Alice", "Age": "61", "FavFruit": "Peach"}]
2 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | import pytest
 4 | 
 5 | 
 6 | def pytest_collection_modifyitems(config, items):
 7 |     if sys.platform != "linux":
 8 |         if not config.option.keyword or (
 9 |             config.option.keyword and "test_integration_vllm" in config.option.keyword
10 |         ):
11 |             print(
12 |                 "WARNING: test_integration_vllm tests are skipped because vLLM only supports Linux platform (including WSL)."
13 |             )
14 |         skip_vllm = pytest.mark.skip(reason="vLLM models can only be run on Linux.")
15 |         for item in items:
16 |             if "test_integration_vllm" in item.nodeid:
17 |                 item.add_marker(skip_vllm)
18 | 


--------------------------------------------------------------------------------
/tests/fsm/test_json_schema.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from contextlib import nullcontext
 3 | from enum import Enum
 4 | from functools import partial
 5 | from typing import List
 6 | 
 7 | import pytest
 8 | from outlines_core.fsm.json_schema import build_regex_from_schema
 9 | from pydantic import BaseModel, constr
10 | 
11 | from outlines.fsm.json_schema import get_schema_from_enum, get_schema_from_signature
12 | 
13 | 
14 | def test_function_basic():
15 |     def test_function(foo: str, bar: List[int]):
16 |         pass
17 | 
18 |     result = get_schema_from_signature(test_function)
19 |     assert result["type"] == "object"
20 |     assert list(result["properties"].keys()) == ["foo", "bar"]
21 |     assert result["properties"]["foo"]["type"] == "string"
22 |     assert result["properties"]["bar"]["type"] == "array"
23 |     assert result["properties"]["bar"]["items"]["type"] == "integer"
24 | 
25 | 
26 | def test_function_no_type():
27 |     def test_function(foo, bar: List[int]):
28 |         pass
29 | 
30 |     with pytest.raises(ValueError):
31 |         get_schema_from_signature(test_function)
32 | 
33 | 
34 | def test_from_pydantic():
35 |     class User(BaseModel):
36 |         user_id: int
37 |         name: str
38 |         maxlength_name: constr(max_length=10)
39 |         minlength_name: constr(min_length=10)
40 |         value: float
41 |         is_true: bool
42 | 
43 |     schema = json.dumps(User.model_json_schema())
44 |     regex_str = build_regex_from_schema(schema)
45 |     assert isinstance(regex_str, str)
46 | 
47 | 
48 | def add(a: float, b: float) -> float:
49 |     return a + b
50 | 
51 | 
52 | class MyEnum(Enum):
53 |     add = partial(add)
54 |     a = "a"
55 |     b = 2
56 | 
57 | 
58 | # if you don't register your function as callable, you will get an empty enum
59 | class EmptyEnum(Enum):
60 |     add = add
61 | 
62 | 
63 | @pytest.mark.parametrize(
64 |     "enum,expectation",
65 |     [
66 |         (MyEnum, nullcontext()),
67 |         (EmptyEnum, pytest.raises(ValueError)),
68 |     ],
69 | )
70 | def test_enum_schema(enum, expectation):
71 |     with expectation:
72 |         schema = get_schema_from_enum(enum)
73 |         regex_str = build_regex_from_schema(json.dumps(schema))
74 |         assert isinstance(regex_str, str)
75 |         assert schema["title"] == enum.__name__
76 |         assert len(schema["oneOf"]) == len(enum)
77 |         for elt in schema["oneOf"]:
78 |             assert type(elt) in [int, float, bool, type(None), str, dict]
79 | 


--------------------------------------------------------------------------------
/tests/fsm/test_types.py:
--------------------------------------------------------------------------------
 1 | import datetime as pydatetime
 2 | 
 3 | import pytest
 4 | 
 5 | from outlines.fsm.types import python_types_to_regex
 6 | from outlines import types
 7 | 
 8 | 
 9 | @pytest.mark.parametrize(
10 |     "python_type,custom_type",
11 |     [
12 |         (int, types.integer),
13 |         (float, types.number),
14 |         (bool, types.boolean),
15 |         (pydatetime.date, types.date),
16 |         (pydatetime.time, types.time),
17 |         (pydatetime.datetime, types.datetime),
18 |     ],
19 | )
20 | def test_python_types(python_type, custom_type):
21 |     test_regex, _ = python_types_to_regex(python_type)
22 |     assert custom_type.pattern == test_regex.pattern
23 | 


--------------------------------------------------------------------------------
/tests/generate/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dottxt-ai/outlines/2fd02438846d84fcdaf969c88a851627ecdc9c3b/tests/generate/__init__.py


--------------------------------------------------------------------------------
/tests/generate/conftest.py:
--------------------------------------------------------------------------------
 1 | from importlib import reload
 2 | 
 3 | import pytest
 4 | import torch
 5 | 
 6 | 
 7 | def is_metal_available():
 8 |     try:
 9 |         import mlx.core as mx
10 |         import mlx_lm  # noqa: F401
11 | 
12 |         assert mx.metal.is_available()
13 |     except (ImportError, AssertionError):
14 |         return False
15 |     return True
16 | 
17 | 
18 | def pytest_collection_modifyitems(config, items):
19 |     """
20 |     If mlxlm and Metal aren't available, skip mlxlm tests
21 |     If CUDA isn't available, skip vllm and transformers_vision
22 |     """
23 |     if not torch.cuda.is_available():
24 |         skip_marker = pytest.mark.skip(
25 |             reason="Skipping test because CUDA is not available"
26 |         )
27 |         for item in items:
28 |             if "model_fixture" in item.fixturenames:
29 |                 model_param = item.callspec.params.get("model_fixture", None)
30 |                 if (
31 |                     model_param.startswith("model_transformers_vision")
32 |                     or model_param.startswith("model_vllm")
33 |                     or model_param.startswith("model_exllamav2")
34 |                 ):
35 |                     item.add_marker(skip_marker)
36 | 
37 |     if not is_metal_available():
38 |         skip_marker = pytest.mark.skip(
39 |             reason="Skipping test because mlx-lm or Metal are not available"
40 |         )
41 |         for item in items:
42 |             if "model_fixture" in item.fixturenames:
43 |                 model_param = item.callspec.params.get("model_fixture", None)
44 |                 if model_param.startswith("model_mlxlm"):
45 |                     item.add_marker(skip_marker)
46 | 
47 | 
48 | @pytest.fixture
49 | def temp_cache_dir():
50 |     import os
51 |     import tempfile
52 | 
53 |     import outlines.caching
54 |     import outlines.fsm.guide
55 | 
56 |     with tempfile.TemporaryDirectory() as tempdir:
57 |         os.environ["OUTLINES_CACHE_DIR"] = tempdir
58 |         outlines.caching.get_cache.cache_clear()
59 |         reload(outlines)
60 |         reload(outlines.fsm.guide)
61 |         cache_status = outlines.caching._caching_enabled
62 |         try:
63 |             outlines.caching._caching_enabled = True
64 |             yield
65 |         finally:
66 |             outlines.caching._caching_enabled = cache_status
67 | 


--------------------------------------------------------------------------------
/tests/generate/test_api.py:
--------------------------------------------------------------------------------
 1 | from io import BytesIO
 2 | from urllib.request import urlopen
 3 | 
 4 | import pytest
 5 | from PIL import Image  # type: ignore
 6 | 
 7 | from outlines.generate.api import VisionSequenceGeneratorAdapter
 8 | 
 9 | IMG_URI = "https://upload.wikimedia.org/wikipedia/en/a/a9/Example.jpg"
10 | PIL_IMG = Image.open(BytesIO(urlopen(IMG_URI).read())).convert("RGB")
11 | 
12 | 
13 | @pytest.mark.parametrize(
14 |     "prompts,media,type_error",
15 |     [
16 |         ("single prompt", [PIL_IMG], False),
17 |         (["prompt0", "prompt1"], [[PIL_IMG], [PIL_IMG]], False),
18 |         ("single prompt", [PIL_IMG, PIL_IMG], False),
19 |         (["prompt0", "prompt1"], [[PIL_IMG, PIL_IMG], [PIL_IMG]], False),
20 |         ("single prompt", "this isn't an image, it's a string", True),
21 |         ("single prompt", PIL_IMG, True),
22 |         (["prompt0", "prompt1"], [PIL_IMG], True),
23 |         (["prompt0", "prompt1"], [[PIL_IMG]], True),
24 |         (["prompt0", "prompt1"], [[[PIL_IMG]], [[PIL_IMG]]], True),
25 |     ],
26 | )
27 | def test_vision_sequence_generator_validate_types(prompts, media, type_error):
28 |     """Ensure inputs are validated correctly"""
29 |     if type_error:
30 |         with pytest.raises(TypeError):
31 |             VisionSequenceGeneratorAdapter._validate_prompt_media_types(prompts, media)
32 |     else:
33 |         VisionSequenceGeneratorAdapter._validate_prompt_media_types(prompts, media)
34 | 


--------------------------------------------------------------------------------
/tests/generate/test_integration_transformers_vision.py:
--------------------------------------------------------------------------------
  1 | from io import BytesIO
  2 | from urllib.request import urlopen
  3 | 
  4 | import pytest
  5 | from PIL import Image
  6 | from transformers import AutoProcessor, LlavaForConditionalGeneration
  7 | 
  8 | import outlines
  9 | from outlines.models.transformers_vision import transformers_vision
 10 | 
 11 | IMAGE_URLS = [
 12 |     "https://upload.wikimedia.org/wikipedia/commons/2/25/Siam_lilacpoint.jpg",
 13 |     "https://upload.wikimedia.org/wikipedia/commons/7/71/2010-kodiak-bear-1.jpg",
 14 |     "https://upload.wikimedia.org/wikipedia/commons/b/be/Tamias-rufus-001.jpg",
 15 | ]
 16 | 
 17 | 
 18 | def img_from_url(url):
 19 |     img_byte_stream = BytesIO(urlopen(url).read())
 20 |     return Image.open(img_byte_stream).convert("RGB")
 21 | 
 22 | 
 23 | @pytest.fixture(scope="session")
 24 | def model(tmp_path_factory):
 25 |     return transformers_vision(
 26 |         "trl-internal-testing/tiny-LlavaForConditionalGeneration",
 27 |         model_class=LlavaForConditionalGeneration,
 28 |         device="cpu",
 29 |     )
 30 | 
 31 | 
 32 | @pytest.fixture(scope="session")
 33 | def processor(tmp_path_factory):
 34 |     return AutoProcessor.from_pretrained("llava-hf/llava-interleave-qwen-0.5b-hf")
 35 | 
 36 | 
 37 | def test_single_image_text_gen(model, processor):
 38 |     conversation = [
 39 |         {
 40 |             "role": "user",
 41 |             "content": [{"type": "text", "text": "What is this?"}, {"type": "image"}],
 42 |         },
 43 |     ]
 44 |     generator = outlines.generate.text(model)
 45 |     sequence = generator(
 46 |         processor.apply_chat_template(conversation),
 47 |         [img_from_url(IMAGE_URLS[0])],
 48 |         seed=10000,
 49 |         max_tokens=10,
 50 |     )
 51 |     assert isinstance(sequence, str)
 52 | 
 53 | 
 54 | def test_multi_image_text_gen(model, processor):
 55 |     """If the length of image tags and number of images we pass are > 1 and equal,
 56 |     we should yield a successful generation.
 57 |     """
 58 |     conversation = [
 59 |         {
 60 |             "role": "user",
 61 |             "content": [
 62 |                 {"type": "text", "text": "What do all these have in common?"},
 63 |             ]
 64 |             + [{"type": "image"} for _ in range(len(IMAGE_URLS))],
 65 |         },
 66 |     ]
 67 |     generator = outlines.generate.text(model)
 68 |     sequence = generator(
 69 |         processor.apply_chat_template(conversation),
 70 |         [img_from_url(i) for i in IMAGE_URLS],
 71 |         seed=10000,
 72 |         max_tokens=10,
 73 |     )
 74 |     assert isinstance(sequence, str)
 75 | 
 76 | 
 77 | def test_mismatched_image_text_gen(model, processor):
 78 |     """If the length of image tags and number of images we pass are unequal,
 79 |     we should raise an error.
 80 |     """
 81 |     conversation = [
 82 |         {
 83 |             "role": "user",
 84 |             "content": [
 85 |                 {"type": "text", "text": "I'm passing 3 images, but only 1 image tag"},
 86 |                 {"type": "image"},
 87 |             ],
 88 |         },
 89 |     ]
 90 |     generator = outlines.generate.text(model)
 91 |     with pytest.raises(ValueError):
 92 |         _ = generator(
 93 |             processor.apply_chat_template(conversation),
 94 |             [img_from_url(i) for i in IMAGE_URLS],
 95 |             seed=10000,
 96 |             max_tokens=10,
 97 |         )
 98 | 
 99 | 
100 | def test_single_image_choice(model, processor):
101 |     conversation = [
102 |         {
103 |             "role": "user",
104 |             "content": [{"type": "text", "text": "What is this?"}, {"type": "image"}],
105 |         },
106 |     ]
107 |     choices = ["cat", "dog"]
108 |     generator = outlines.generate.choice(model, choices)
109 |     sequence = generator(
110 |         processor.apply_chat_template(conversation),
111 |         [img_from_url(IMAGE_URLS[0])],
112 |         seed=10000,
113 |         max_tokens=10,
114 |     )
115 |     assert isinstance(sequence, str)
116 |     assert sequence in choices
117 | 


--------------------------------------------------------------------------------
/tests/models/test_mlxlm.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | from outlines.models.mlxlm import mlxlm
  4 | from outlines.models.transformers import TransformerTokenizer
  5 | 
  6 | try:
  7 |     import mlx.core as mx
  8 | 
  9 |     HAS_MLX = mx.metal.is_available()
 10 | except ImportError:
 11 |     HAS_MLX = False
 12 | 
 13 | 
 14 | TEST_MODEL = "mlx-community/SmolLM-135M-Instruct-4bit"
 15 | 
 16 | 
 17 | @pytest.mark.skipif(not HAS_MLX, reason="MLX tests require Apple Silicon")
 18 | def test_mlxlm_model():
 19 |     model = mlxlm(TEST_MODEL)
 20 |     assert hasattr(model, "model")
 21 |     assert hasattr(model, "tokenizer")
 22 |     assert isinstance(model.tokenizer, TransformerTokenizer)
 23 | 
 24 | 
 25 | @pytest.mark.skipif(not HAS_MLX, reason="MLX tests require Apple Silicon")
 26 | def test_mlxlm_tokenizer():
 27 |     model = mlxlm(TEST_MODEL)
 28 | 
 29 |     # Test single string encoding/decoding
 30 |     test_text = "Hello, world!"
 31 |     token_ids = mx.array(model.mlx_tokenizer.encode(test_text))
 32 |     assert isinstance(token_ids, mx.array)
 33 | 
 34 | 
 35 | @pytest.mark.skipif(not HAS_MLX, reason="MLX tests require Apple Silicon")
 36 | def test_mlxlm_generate():
 37 |     from outlines.generate.api import GenerationParameters, SamplingParameters
 38 | 
 39 |     model = mlxlm(TEST_MODEL)
 40 |     prompt = "Write a haiku about programming:"
 41 | 
 42 |     # Test with basic generation parameters
 43 |     gen_params = GenerationParameters(max_tokens=50, stop_at=None, seed=None)
 44 | 
 45 |     # Test with different sampling parameters
 46 |     sampling_params = SamplingParameters(
 47 |         sampler="multinomial", num_samples=1, top_p=0.9, top_k=None, temperature=0.7
 48 |     )
 49 | 
 50 |     # Test generation
 51 |     output = model.generate(prompt, gen_params, None, sampling_params)
 52 |     assert isinstance(output, str)
 53 |     assert len(output) > 0
 54 | 
 55 | 
 56 | @pytest.mark.skipif(not HAS_MLX, reason="MLX tests require Apple Silicon")
 57 | def test_mlxlm_stream():
 58 |     from outlines.generate.api import GenerationParameters, SamplingParameters
 59 | 
 60 |     model = mlxlm(TEST_MODEL)
 61 |     prompt = "Count from 1 to 5:"
 62 | 
 63 |     gen_params = GenerationParameters(max_tokens=20, stop_at=None, seed=None)
 64 | 
 65 |     sampling_params = SamplingParameters(
 66 |         sampler="greedy",  # Use greedy sampling for deterministic output
 67 |         num_samples=1,
 68 |         top_p=None,
 69 |         top_k=None,
 70 |         temperature=0.0,
 71 |     )
 72 | 
 73 |     # Test streaming
 74 |     stream = model.stream(prompt, gen_params, None, sampling_params)
 75 |     tokens = list(stream)
 76 |     assert len(tokens) > 0
 77 |     assert all(isinstance(token, str) for token in tokens)
 78 | 
 79 |     # Test that concatenated streaming output matches generate output
 80 |     streamed_text = "".join(tokens)
 81 |     generated_text = model.generate(prompt, gen_params, None, sampling_params)
 82 |     assert streamed_text == generated_text
 83 | 
 84 | 
 85 | @pytest.mark.skipif(not HAS_MLX, reason="MLX tests require Apple Silicon")
 86 | def test_mlxlm_errors():
 87 |     model = mlxlm(TEST_MODEL)
 88 | 
 89 |     # Test batch inference (should raise NotImplementedError)
 90 |     with pytest.raises(NotImplementedError):
 91 |         from outlines.generate.api import GenerationParameters, SamplingParameters
 92 | 
 93 |         gen_params = GenerationParameters(max_tokens=10, stop_at=None, seed=None)
 94 |         sampling_params = SamplingParameters("multinomial", 1, None, None, 1.0)
 95 |         model.generate(["prompt1", "prompt2"], gen_params, None, sampling_params)
 96 | 
 97 |     # Test beam search (should raise NotImplementedError)
 98 |     with pytest.raises(NotImplementedError):
 99 |         sampling_params = SamplingParameters("beam_search", 1, None, None, 1.0)
100 |         model.generate("test prompt", gen_params, None, sampling_params)
101 | 


--------------------------------------------------------------------------------
/tests/models/test_tokenizer.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | 
3 | from outlines.models.tokenizer import Tokenizer
4 | 
5 | 
6 | def test_tokenizer():
7 |     with pytest.raises(TypeError, match="instantiate abstract"):
8 |         Tokenizer()
9 | 


--------------------------------------------------------------------------------
/tests/processors/test_base_processor.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | import jax.numpy as jnp
 4 | import numpy as np
 5 | import pytest
 6 | import torch
 7 | 
 8 | from outlines.processors.base_logits_processor import OutlinesLogitsProcessor
 9 | 
10 | arrays = {
11 |     "list": [[1.0, 2.0], [3.0, 4.0]],
12 |     "np": np.array([[1, 2], [3, 4]], dtype=np.float32),
13 |     "jax": jnp.array([[1, 2], [3, 4]], dtype=jnp.float32),
14 |     "torch": torch.tensor([[1, 2], [3, 4]], dtype=torch.float32),
15 | }
16 | 
17 | try:
18 |     import mlx.core as mx
19 | 
20 |     arrays["mlx"] = mx.array([[1, 2], [3, 4]], dtype=mx.float32)
21 |     arrays["mlx_bfloat16"] = mx.array([[1, 2], [3, 4]], dtype=mx.bfloat16)
22 | except ImportError:
23 |     pass
24 | 
25 | try:
26 |     import jax.numpy as jnp
27 | 
28 |     arrays["jax"] = jnp.array([[1, 2], [3, 4]], dtype=jnp.float32)
29 | except ImportError:
30 |     pass
31 | 
32 | 
33 | # Mock implementation of the abstract class for testing
34 | class MockLogitsProcessor(OutlinesLogitsProcessor):
35 |     def process_logits(
36 |         self, input_ids: List[List[int]], logits: torch.Tensor
37 |     ) -> torch.Tensor:
38 |         # For testing purposes, let's just return logits multiplied by 2
39 |         return logits * 2
40 | 
41 | 
42 | @pytest.fixture
43 | def processor():
44 |     """Fixture for creating an instance of the MockLogitsProcessor."""
45 |     return MockLogitsProcessor()
46 | 
47 | 
48 | @pytest.mark.parametrize("array_type", arrays.keys())
49 | def test_to_torch(array_type, processor):
50 |     data = arrays[array_type]
51 |     torch_tensor = processor._to_torch(data)
52 |     assert isinstance(torch_tensor, torch.Tensor)
53 |     assert torch.allclose(
54 |         torch_tensor.cpu(), torch.tensor([[1, 2], [3, 4]], dtype=torch.float32)
55 |     )
56 | 
57 | 
58 | @pytest.mark.parametrize("array_type", arrays.keys())
59 | def test_from_torch(array_type, processor):
60 |     torch_tensor = torch.tensor([[1, 2], [3, 4]], dtype=torch.float32)
61 |     data = processor._from_torch(torch_tensor, type(arrays[array_type]))
62 |     assert isinstance(data, type(arrays[array_type]))
63 |     if array_type == "mlx_bfloat16":
64 |         # For bfloat16, we expect the output to be float32 due to the conversion
65 |         assert data.dtype == mx.float32
66 |         assert np.allclose(np.array(data), np.array([[1, 2], [3, 4]], dtype=np.float32))
67 |     else:
68 |         assert np.allclose(data, arrays[array_type])
69 | 
70 | 
71 | @pytest.mark.parametrize("array_type", arrays.keys())
72 | def test_call(array_type, processor):
73 |     input_ids = arrays[array_type]
74 |     logits = arrays[array_type]
75 |     processed_logits = processor(input_ids, logits)
76 | 
77 |     assert isinstance(processed_logits, type(arrays[array_type]))
78 |     assert np.allclose(
79 |         np.array(processed_logits), np.array([[2.0, 4.0], [6.0, 8.0]], dtype=np.float32)
80 |     )
81 | 


--------------------------------------------------------------------------------
/tests/test_function.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | import responses
  3 | from pydantic import BaseModel
  4 | from requests.exceptions import HTTPError
  5 | 
  6 | import outlines
  7 | from outlines.function import Function, download_from_github, extract_function_from_file
  8 | 
  9 | 
 10 | def test_function_basic():
 11 |     with pytest.deprecated_call(match="The @prompt decorator"):
 12 | 
 13 |         @outlines.prompt
 14 |         def test_template(text: str):
 15 |             """{{ text }}"""
 16 | 
 17 |     class Foo(BaseModel):
 18 |         id: int
 19 | 
 20 |     fn = Function(test_template, Foo, "hf-internal-testing/tiny-random-GPTJForCausalLM")
 21 | 
 22 |     assert fn.generator is None
 23 | 
 24 |     result = fn("test")
 25 |     assert isinstance(result, BaseModel)
 26 | 
 27 | 
 28 | def test_download_from_github_invalid():
 29 |     with pytest.raises(ValueError, match="Please provide"):
 30 |         download_from_github("outlines/program")
 31 | 
 32 |     with pytest.raises(ValueError, match="Do not append"):
 33 |         download_from_github("dottxt-ai/outlines/program.py")
 34 | 
 35 | 
 36 | @responses.activate
 37 | def test_download_from_github_success():
 38 |     responses.add(
 39 |         responses.GET,
 40 |         "https://raw.githubusercontent.com/dottxt-ai/outlines/main/program.py",
 41 |         body="import outlines\n",
 42 |         status=200,
 43 |     )
 44 | 
 45 |     file = download_from_github("dottxt-ai/outlines/program")
 46 |     assert file == "import outlines\n"
 47 | 
 48 |     responses.add(
 49 |         responses.GET,
 50 |         "https://raw.githubusercontent.com/dottxt-ai/outlines/main/foo/bar/program.py",
 51 |         body="import outlines\n",
 52 |         status=200,
 53 |     )
 54 | 
 55 |     file = download_from_github("dottxt-ai/outlines/foo/bar/program")
 56 |     assert file == "import outlines\n"
 57 | 
 58 | 
 59 | @responses.activate
 60 | def test_download_from_github_error():
 61 |     responses.add(
 62 |         responses.GET,
 63 |         "https://raw.githubusercontent.com/foo/bar/main/program.py",
 64 |         json={"error": "not found"},
 65 |         status=404,
 66 |     )
 67 | 
 68 |     with pytest.raises(ValueError, match="Program could not be found at"):
 69 |         download_from_github("foo/bar/program")
 70 | 
 71 |     responses.add(
 72 |         responses.GET,
 73 |         "https://raw.githubusercontent.com/foo/bar/main/program.py",
 74 |         json={"error": "Internal Server Error"},
 75 |         status=500,
 76 |     )
 77 | 
 78 |     with pytest.raises(HTTPError, match="500 Server Error"):
 79 |         download_from_github("foo/bar/program")
 80 | 
 81 | 
 82 | def test_extract_function_from_file():
 83 |     content = """
 84 | import outlines
 85 | from pydantic import BaseModel
 86 | 
 87 | model = "gpt2"
 88 | 
 89 | 
 90 | @outlines.prompt
 91 | def prompt():
 92 |     '''Hello'''
 93 | 
 94 | 
 95 | class User(BaseModel):
 96 |     id: int
 97 |     name: str
 98 | 
 99 | 
100 | function = outlines.Function(
101 |     prompt,
102 |     User,
103 |     "gpt2",
104 | )
105 |     """
106 | 
107 |     with pytest.deprecated_call(match="The @prompt decorator"):
108 |         fn = extract_function_from_file(content, "function")
109 |         assert (
110 |             str(type(fn)) == "<class 'outlines.function.Function'>"
111 |         )  # because imported via `exec`
112 | 
113 | 
114 | def test_extract_function_from_file_no_function():
115 |     content = """
116 | import outlines
117 | from pydantic import BaseModel
118 | 
119 | @outlines.prompt
120 | def prompt():
121 |     '''Hello'''
122 | 
123 | 
124 | class User(BaseModel):
125 |     id: int
126 |     name: str
127 | 
128 | program = outlines.Function(
129 |     prompt,
130 |     User,
131 |     "gpt2",
132 | )
133 |     """
134 | 
135 |     with pytest.deprecated_call(match="The @prompt decorator"):
136 |         with pytest.raises(AttributeError, match="Could not find"):
137 |             extract_function_from_file(content, "function")
138 | 


--------------------------------------------------------------------------------
/tests/test_grammars.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | import outlines.grammars as grammars
 4 | from outlines.fsm.guide import CFGGuide
 5 | 
 6 | 
 7 | @pytest.mark.parametrize("grammar", [grammars.json, grammars.arithmetic])
 8 | def test_grammar_module(grammar):
 9 |     class MockTokenizer:
10 |         vocabulary = {"(": 1, ")": 2, "a": 3, "eos": 4}
11 |         special_tokens = {"eos"}
12 |         eos_token = "eos"
13 |         eos_token_id = 4
14 | 
15 |         def convert_token_to_string(self, token):
16 |             return token
17 | 
18 |         @property
19 |         def inverse_vocabulary(self):
20 |             return {v: k for k, v in self.vocabulary.items()}
21 | 
22 |         def decode(self, token_ids):
23 |             return [self.inverse_vocabulary[t] for t in token_ids]
24 | 
25 |     cfg_str = """
26 |         start: s
27 |         s: "(" s ")" | /a+/
28 |     """
29 |     tokenizer = MockTokenizer()
30 |     fsm = CFGGuide(cfg_str, tokenizer)
31 |     assert isinstance(fsm, CFGGuide)
32 | 


--------------------------------------------------------------------------------
/tests/types/test_to_regex.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | 
 4 | from outlines.types.dsl import (
 5 |     String,
 6 |     Regex,
 7 |     JsonSchema,
 8 |     KleeneStar,
 9 |     KleenePlus,
10 |     QuantifyBetween,
11 |     QuantifyExact,
12 |     QuantifyMaximum,
13 |     QuantifyMinimum,
14 |     Sequence,
15 |     Alternatives,
16 |     Optional,
17 |     Term,
18 |     to_regex,
19 | )
20 | 
21 | 
22 | def test_to_regex_simple():
23 |     a = String("a")
24 |     assert to_regex(a) == "a"
25 |     assert a.matches("a") is True
26 | 
27 |     a = Regex("[0-9]")
28 |     assert to_regex(a) == "([0-9])"
29 |     assert a.matches(0) is True
30 |     assert a.matches(10) is False
31 |     assert a.matches("a") is False
32 | 
33 |     a = JsonSchema({"type": "integer"})
34 |     assert to_regex(a) == r"((-)?(0|[1-9][0-9]*))"
35 |     assert a.matches(1) is True
36 |     assert a.matches("1") is True
37 |     assert a.matches("a") is False
38 | 
39 |     a = Optional(String("a"))
40 |     assert to_regex(a) == "(a)?"
41 |     assert a.matches("") is True
42 |     assert a.matches("a") is True
43 | 
44 |     a = KleeneStar(String("a"))
45 |     assert to_regex(a) == "(a)*"
46 |     assert a.matches("") is True
47 |     assert a.matches("a") is True
48 |     assert a.matches("aaaaa") is True
49 | 
50 |     a = KleenePlus(String("a"))
51 |     assert to_regex(a) == "(a)+"
52 |     assert a.matches("") is False
53 |     assert a.matches("a") is True
54 |     assert a.matches("aaaaa") is True
55 | 
56 |     a = QuantifyExact(String("a"), 2)
57 |     assert to_regex(a) == "(a){2}"
58 |     assert a.matches("a") is False
59 |     assert a.matches("aa") is True
60 |     assert a.matches("aaa") is False
61 | 
62 |     a = QuantifyMinimum(String("a"), 2)
63 |     assert to_regex(a) == "(a){2,}"
64 |     assert a.matches("a") is False
65 |     assert a.matches("aa") is True
66 |     assert a.matches("aaa") is True
67 | 
68 |     a = QuantifyMaximum(String("a"), 2)
69 |     assert to_regex(a) == "(a){,2}"
70 |     assert a.matches("aa") is True
71 |     assert a.matches("aaa") is False
72 | 
73 |     a = QuantifyBetween(String("a"), 1, 2)
74 |     assert to_regex(a) == "(a){1,2}"
75 |     assert a.matches("") is False
76 |     assert a.matches("a") is True
77 |     assert a.matches("aa") is True
78 |     assert a.matches("aaa") is False
79 | 
80 |     with pytest.raises(TypeError, match="Cannot convert"):
81 |         to_regex(Term())
82 | 
83 | 
84 | def test_to_regex_combinations():
85 |     a = Sequence([Regex("dog|cat"), String("fish")])
86 |     assert to_regex(a) == "(dog|cat)fish"
87 | 


--------------------------------------------------------------------------------