├── .github
    ├── FUNDING.yml
    ├── dependabot.yml
    └── workflows
    │   ├── clearcache.yml
    │   ├── coverage.yml
    │   ├── pages.yml
    │   ├── rustbench.yml
    │   ├── rustcheck.yml
    │   ├── rustdoc.yml
    │   ├── rustlib.yml
    │   ├── rustlints.yml
    │   └── rustmsrv.yml
├── .gitignore
├── .pre-commit-config.yaml
├── Cargo.lock
├── Cargo.toml
├── LICENSE-APACHE
├── LICENSE-MIT
├── README.md
├── RELEASE-PROCESS.md
├── book
    ├── book.toml
    ├── mdbook-admonish.css
    └── src
    │   ├── SUMMARY.md
    │   ├── assets
    │       ├── calculator_example_flow.png
    │       └── calculator_example_how_evaluator_works.png
    │   ├── attributes.md
    │   ├── attributes
    │       ├── logos.md
    │       └── token_and_regex.md
    │   ├── callbacks.md
    │   ├── common-regex.md
    │   ├── context-dependent-lexing.md
    │   ├── contributing.md
    │   ├── contributing
    │       ├── fuzzing.md
    │       ├── internals.md
    │       └── setup.md
    │   ├── debugging.md
    │   ├── examples.md
    │   ├── examples
    │       ├── brainfuck.md
    │       ├── calculator.md
    │       ├── json.md
    │       ├── json_borrowed.md
    │       └── string-interpolation.md
    │   ├── extras.md
    │   ├── getting-help.md
    │   ├── getting-started.md
    │   ├── intro.md
    │   ├── token-disambiguation.md
    │   └── unsafe.md
├── examples
    ├── brainfuck.rs
    ├── calculator.rs
    ├── custom_error.rs
    ├── example.json
    ├── extras.rs
    ├── hello_world.bf
    ├── json.rs
    ├── json_borrowed.rs
    └── string-interpolation.rs
├── fuzz
    ├── Cargo.toml
    ├── in
    │   ├── literal
    │   └── regex
    └── src
    │   └── main.rs
├── logos-cli
    ├── Cargo.toml
    ├── LICENSE-APACHE
    ├── LICENSE-MIT
    ├── src
    │   └── main.rs
    └── tests
    │   ├── data
    │       ├── fmt_output.rs
    │       ├── input.rs
    │       └── output.rs
    │   └── tests.rs
├── logos-codegen
    ├── Cargo.toml
    ├── LICENSE-APACHE
    ├── LICENSE-MIT
    ├── build.rs
    ├── src
    │   ├── error.rs
    │   ├── generator
    │   │   ├── context.rs
    │   │   ├── fork.rs
    │   │   ├── leaf.rs
    │   │   ├── mod.rs
    │   │   ├── rope.rs
    │   │   └── tables.rs
    │   ├── graph
    │   │   ├── fork.rs
    │   │   ├── impls.rs
    │   │   ├── meta.rs
    │   │   ├── mod.rs
    │   │   ├── range.rs
    │   │   ├── regex.rs
    │   │   └── rope.rs
    │   ├── leaf.rs
    │   ├── lib.rs
    │   ├── macros.rs
    │   ├── mir.rs
    │   ├── parser
    │   │   ├── definition.rs
    │   │   ├── ignore_flags.rs
    │   │   ├── mod.rs
    │   │   ├── nested.rs
    │   │   ├── subpattern.rs
    │   │   └── type_params.rs
    │   └── util.rs
    └── tests
    │   ├── codegen.rs
    │   └── data
    │       ├── no_error_lut
    │           ├── input.rs
    │           └── output.rs
    │       └── simple
    │           ├── input.rs
    │           └── output.rs
├── logos-derive
    ├── Cargo.toml
    ├── LICENSE-APACHE
    ├── LICENSE-MIT
    └── src
    │   └── lib.rs
├── logos.png
├── logos.svg
├── release.toml
├── src
    ├── internal.rs
    ├── lexer.rs
    ├── lib.rs
    └── source.rs
└── tests
    ├── Cargo.toml
    ├── benches
        └── bench.rs
    ├── src
        └── lib.rs
    └── tests
        ├── advanced.rs
        ├── binary.rs
        ├── callbacks.rs
        ├── clone.rs
        ├── crate_.rs
        ├── css.rs
        ├── custom_error.rs
        ├── edgecase.rs
        ├── ignore_case.rs
        ├── lexer_modes.rs
        ├── properties.rs
        ├── simple.rs
        ├── source.rs
        ├── string.rs
        └── unicode_dot.rs


/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 | 
3 | github: [maciejhirsz]
4 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | # To get started with Dependabot version updates, you'll need to specify which
 2 | # package ecosystems to update and where the package manifests are located.
 3 | # Please see the documentation for all configuration options:
 4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
 5 | 
 6 | version: 2
 7 | updates:
 8 | - package-ecosystem: github-actions
 9 |       # Workflow files stored in the
10 |       # default location of `.github/workflows`
11 |   directory: /
12 |   schedule:
13 |     interval: daily
14 |   labels:
15 |   - github-actions
16 |   - dependencies
17 | 


--------------------------------------------------------------------------------
/.github/workflows/clearcache.yml:
--------------------------------------------------------------------------------
 1 | # From: https://docs.github.com/en/actions/using-workflows/caching-dependencies-to-speed-up-workflows#force-deleting-cache-entries
 2 | name: Cleanup caches by a branch
 3 | on:
 4 |   pull_request:
 5 |     types:
 6 |     - closed
 7 | 
 8 | jobs:
 9 |   cleanup:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |     - name: Check out code
13 |       uses: actions/checkout@v4
14 | 
15 |     - name: Cleanup
16 |       run: |
17 |         gh extension install actions/gh-actions-cache
18 | 
19 |         REPO=${{ github.repository }}
20 |         BRANCH="refs/pull/${{ github.event.pull_request.number }}/merge"
21 | 
22 |         echo "Fetching list of cache key"
23 |         cacheKeysForPR=$(gh actions-cache list -R $REPO -B $BRANCH | cut -f 1 )
24 | 
25 |         ## Setting this to not fail the workflow while deleting cache keys.
26 |         set +e
27 |         echo "Deleting caches..."
28 |         for cacheKey in $cacheKeysForPR
29 |         do
30 |             gh actions-cache delete $cacheKey -R $REPO -B $BRANCH --confirm
31 |         done
32 |         echo "Done"
33 |       env:
34 |         GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
35 | 


--------------------------------------------------------------------------------
/.github/workflows/coverage.yml:
--------------------------------------------------------------------------------
 1 | name: Code Coverage
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     paths:
 6 |     - '**.rs'
 7 |     - .github/workflows/coverage.yml
 8 |     - '**/Cargo.toml'
 9 |   push:
10 |     branches: [master]
11 |   workflow_dispatch:
12 | 
13 | jobs:
14 |   test:
15 |     name: Coverage
16 |     runs-on: ubuntu-latest
17 |     container:
18 |       image: xd009642/tarpaulin:develop-nightly
19 |       options: --security-opt seccomp=unconfined
20 |     steps:
21 |     - name: Checkout repository
22 |       uses: actions/checkout@v4
23 | 
24 |     - name: Install rustfmt
25 |       run: |
26 |         rustup component add rustfmt
27 |     
28 |     - name: Generate code coverage
29 |       run: |
30 |         cargo +nightly tarpaulin --verbose --features debug --workspace --timeout 120 --out Xml
31 | 
32 |     - name: Upload to codecov.io
33 |       uses: codecov/codecov-action@v5
34 |       with:
35 |         token: ${{ secrets.CODECOV_TOKEN }}
36 |         fail_ci_if_error: true
37 | 


--------------------------------------------------------------------------------
/.github/workflows/pages.yml:
--------------------------------------------------------------------------------
 1 | # Workflow for building and deploying a mdBook site to GitHub Pages
 2 | name: Book
 3 | 
 4 | on:
 5 |   # Runs on pushes targeting the default branch
 6 |   push:
 7 |     branches: [book, master]
 8 | 
 9 |   # Also runs on PR
10 |   pull_request:
11 | 
12 |   # Allows you to run this workflow manually from the Actions tab
13 |   workflow_dispatch:
14 | 
15 | # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
16 | permissions:
17 |   contents: read
18 |   pages: write
19 |   id-token: write
20 | 
21 | # Allow one concurrent deployment
22 | concurrency:
23 |   group: pages
24 |   cancel-in-progress: true
25 | 
26 | jobs:
27 |   # Build job
28 |   build-book:
29 |     runs-on: ubuntu-latest
30 |     steps:
31 |     - name: Checkout
32 |       uses: actions/checkout@v4
33 | 
34 |     - name: Setup mdBook
35 |       uses: peaceiris/actions-mdbook@v2
36 |       with:
37 |         mdbook-version: 0.4.28
38 | 
39 |     - name: Install mbBook-admonish
40 |       uses: taiki-e/install-action@v2
41 |       with:
42 |         tool: mdbook-admonish
43 | 
44 |     - name: Download assets
45 |       run: |
46 |         cd book/
47 |         mdbook-admonish install
48 | 
49 |     - name: Build book
50 |       run: mdbook build book
51 | 
52 |     - name: Upload artifact
53 |       uses: actions/upload-pages-artifact@v3
54 |       with:
55 |         path: ./book/book
56 | 
57 |   # Deployment job
58 |   deploy:
59 |     if: github.event_name != 'pull_request'
60 |     environment:
61 |       name: github-pages
62 |       url: ${{ steps.deployment.outputs.page_url }}
63 |     runs-on: ubuntu-latest
64 |     needs: [build-book]
65 |     steps:
66 |     - name: Deploy to GitHub Pages
67 |       id: deployment
68 |       uses: actions/deploy-pages@v4
69 | 


--------------------------------------------------------------------------------
/.github/workflows/rustbench.yml:
--------------------------------------------------------------------------------
 1 | name: Benchmark
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     paths:
 6 |     - '**.rs'
 7 |     - .github/workflows/rustbench.yml
 8 |     - '**/Cargo.toml'
 9 |   push:
10 |     branches: [master]
11 |   workflow_dispatch:
12 | 
13 | permissions:
14 |   pull-requests: write
15 | 
16 | jobs:
17 |   benchmark:
18 |     runs-on: ubuntu-latest
19 |     steps:
20 |     - uses: actions/checkout@v4
21 | 
22 |     - name: Setup rust toolchain, cache and cargo-codspeed binary
23 |       uses: moonrepo/setup-rust@v1
24 |       with:
25 |         channel: stable
26 |         cache-target: release
27 |         bins: cargo-codspeed
28 | 
29 |     - name: Build the benchmark target(s)
30 |       run: cargo codspeed build --workspace
31 | 
32 |     - name: Run the benchmarks
33 |       uses: CodSpeedHQ/action@v3
34 |       with:
35 |         run: cargo codspeed run --workspace


--------------------------------------------------------------------------------
/.github/workflows/rustcheck.yml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   pull_request:
 3 |     paths:
 4 |     - '**.rs'
 5 |     - '**/Cargo.toml'
 6 |   workflow_dispatch:
 7 | 
 8 | name: Cargo check
 9 | 
10 | jobs:
11 |   cargo_check:
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |     - uses: actions/checkout@v4
15 |     - uses: taiki-e/install-action@cargo-hack
16 |     - run: cargo hack check --feature-powerset --no-dev-deps
17 | 


--------------------------------------------------------------------------------
/.github/workflows/rustdoc.yml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   pull_request:
 3 |     paths:
 4 |     - '**.rs'
 5 |     - '**/Cargo.toml'
 6 |   workflow_dispatch:
 7 | 
 8 | name: Rustdoc
 9 | 
10 | jobs:
11 |   publish:
12 |     name: Build docs
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |     - name: Checkout sources
16 |       uses: actions/checkout@v4
17 | 
18 |     - name: Install nightly toolchain
19 |       uses: dtolnay/rust-toolchain@nightly
20 | 
21 |     - name: Cache dependencies
22 |       uses: Swatinem/rust-cache@v2
23 | 
24 |     - name: Check rustdoc build
25 |       run: RUSTDOCFLAGS='--cfg docsrs' cargo +nightly doc --features debug -Zunstable-options -Zrustdoc-scrape-examples
26 | 


--------------------------------------------------------------------------------
/.github/workflows/rustlib.yml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   pull_request:
 3 |     paths:
 4 |     - '**.rs'
 5 |     - '**/Cargo.toml'
 6 |   workflow_dispatch:
 7 | 
 8 | name: Library testing
 9 | 
10 | jobs:
11 |   rustdoc:
12 |     name: Rustdoc
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |     - name: Checkout sources
16 |       uses: actions/checkout@v4
17 | 
18 |     - name: Install nightly toolchain
19 |       uses: dtolnay/rust-toolchain@nightly
20 | 
21 |     - name: Cache dependencies
22 |       uses: Swatinem/rust-cache@v2
23 | 
24 |     - name: Check rustdoc build
25 |       run: RUSTDOCFLAGS='--cfg docsrs' cargo +nightly doc --features debug -Zunstable-options -Zrustdoc-scrape-examples
26 | 
27 |   tests:
28 |     name: Tests
29 |     strategy:
30 |       matrix:
31 |         rust:
32 |         - 1.74.0 # current MSRV
33 |         - 1.82.0 # precise capturing
34 |         - stable
35 |         - beta
36 |         - nightly
37 |         os:
38 |         - macos-latest
39 |         - ubuntu-latest
40 |         - windows-latest
41 |         features:
42 |         - ""  # default features
43 |         - "--features forbid_unsafe"
44 | 
45 |     runs-on: ${{ matrix.os }}
46 |     steps:
47 |     - name: Checkout sources
48 |       uses: actions/checkout@v4
49 | 
50 |     - name: Install stable toolchain
51 |       uses: dtolnay/rust-toolchain@stable
52 |       with:
53 |         toolchain: ${{ matrix.rust }}
54 |         components: rustfmt
55 | 
56 |     - name: Cache dependencies
57 |       uses: Swatinem/rust-cache@v2
58 | 
59 |     - name: Check that tests run
60 |       run: cargo test --workspace --verbose ${{ matrix.features }}
61 | 


--------------------------------------------------------------------------------
/.github/workflows/rustlints.yml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   pull_request:
 3 |     paths:
 4 |     - '**.rs'
 5 |     - '**/Cargo.toml'
 6 |   workflow_dispatch:
 7 | 
 8 | name: Rust lints
 9 | 
10 | jobs:
11 |   clippy:
12 |     name: Clippy
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |     - name: Checkout sources
16 |       uses: actions/checkout@v4
17 | 
18 |     - name: Install stable toolchain
19 |       uses: dtolnay/rust-toolchain@stable
20 |       with:
21 |         components: clippy
22 | 
23 |     - name: Check clippy
24 |       run: cargo clippy --features debug -- -D warnings
25 | 
26 |   rustfmt:
27 |     name: Rustfmt
28 |     runs-on: ubuntu-latest
29 |     steps:
30 |     - name: Checkout sources
31 |       uses: actions/checkout@v4
32 | 
33 |     - name: Install stable toolchain
34 |       uses: dtolnay/rust-toolchain@stable
35 |       with:
36 |         components: rustfmt
37 | 
38 |     - name: Check format
39 |       run: cargo fmt --check
40 | 


--------------------------------------------------------------------------------
/.github/workflows/rustmsrv.yml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   pull_request:
 3 |     paths:
 4 |     - '**.rs'
 5 |     - '**/Cargo.toml'
 6 |   workflow_dispatch:
 7 | 
 8 | name: MSRV check
 9 | 
10 | jobs:
11 |   msrv_check:
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |     - uses: actions/checkout@v4
15 | 
16 |     - name: Install stable toolchain
17 |       uses: dtolnay/rust-toolchain@stable
18 | 
19 |     - name: Install Cargo MSRV
20 |       uses: baptiste0928/cargo-install@v3
21 |       with:
22 |         crate: cargo-msrv
23 |         args: --no-default-features
24 |         version: ^0.18.1
25 | 
26 |     - name: Check MSRV
27 |       run: cargo msrv verify -- cargo check --workspace --features debug
28 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | target
2 | /book/book
3 | **/*.rs.bk
4 | 
5 | # ignore output fuzzing
6 | out
7 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | - repo: https://github.com/pre-commit/pre-commit-hooks
 3 |   rev: v5.0.0
 4 |   hooks:
 5 |   - id: check-yaml
 6 |   - id: check-toml
 7 |   - id: end-of-file-fixer
 8 |   - id: trailing-whitespace
 9 | - repo: https://github.com/macisamuele/language-formatters-pre-commit-hooks
10 |   rev: v2.14.0
11 |   hooks:
12 |   - id: pretty-format-yaml
13 |     args: [--autofix]
14 |   - id: pretty-format-toml
15 |     exclude: Cargo.lock
16 |     args: [--autofix, --trailing-commas]
17 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
  1 | [workspace]
  2 | members = ["logos-cli", "logos-codegen", "logos-derive", "tests"]
  3 | exclude = ["fuzz"]
  4 | resolver = "2"
  5 | 
  6 | [workspace.package]
  7 | authors = [
  8 |     "Maciej Hirsz <hello@maciej.codes>",
  9 |     "Jérome Eertmans (maintainer) <jeertmans@icloud.com>",
 10 | ]
 11 | categories = ["parsing", "text-processing"]
 12 | description = "Create ridiculously fast Lexers"
 13 | edition = "2021"
 14 | homepage = "https://logos.maciej.codes/"
 15 | keywords = ["lexer", "lexical", "tokenizer", "parser", "no_std"]
 16 | license = "MIT OR Apache-2.0"
 17 | readme = "README.md"
 18 | repository = "https://github.com/maciejhirsz/logos"
 19 | rust-version = "1.74.0"
 20 | version = "0.15.0"
 21 | 
 22 | [package]
 23 | name = "logos"
 24 | authors.workspace = true
 25 | categories.workspace = true
 26 | description.workspace = true
 27 | edition.workspace = true
 28 | homepage.workspace = true
 29 | keywords.workspace = true
 30 | license.workspace = true
 31 | readme.workspace = true
 32 | repository.workspace = true
 33 | rust-version.workspace = true
 34 | version.workspace = true
 35 | 
 36 | [package.metadata.release]
 37 | pre-release-replacements = [
 38 |   {file="book/src/getting-started.md", search="logos = \"[0-9]+\\.[0-9]+\\.[0-9]+\"", replace="logos = \"{{version}}\"", exactly=1},
 39 | ]
 40 | shared-version = true
 41 | 
 42 | [lib]
 43 | bench = false
 44 | 
 45 | [features]
 46 | # Enables debug messages
 47 | debug = ["logos-derive?/debug"]
 48 | default = ["export_derive", "std"]
 49 | # Re-exports the `Logos` derive macro, so that end user only needs to
 50 | # import this crate and `use logos::Logos` to get both the trait and
 51 | # derive proc macro.
 52 | export_derive = ["logos-derive"]
 53 | # Should the crate use the standard library?
 54 | std = []
 55 | # Use safe alternatives for unsafe code (may impact performance)?
 56 | forbid_unsafe = ["logos-derive?/forbid_unsafe"]
 57 | 
 58 | [package.metadata.docs.rs]
 59 | features = ["debug"]
 60 | cargo-args = ["-Zunstable-options", "-Zrustdoc-scrape-examples"]
 61 | rustdoc-args = ["--cfg", "docsrs"]
 62 | 
 63 | [profile]
 64 | bench = {lto = true}
 65 | release = {lto = true}
 66 | 
 67 | [dependencies]
 68 | logos-derive = {version = "0.15.0", path = "./logos-derive", optional = true}
 69 | 
 70 | [dev-dependencies]
 71 | ariadne = {version = "0.4", features = ["auto-color"]}
 72 | chumsky = {version = "0.9.3" }
 73 | 
 74 | [[example]]
 75 | doc-scrape-examples = true  # Only needed once, because requires dev-dependencies
 76 | name = "brainfuck"
 77 | path = "examples/brainfuck.rs"
 78 | 
 79 | [[example]]
 80 | name = "calculator"
 81 | path = "examples/calculator.rs"
 82 | 
 83 | [[example]]
 84 | name = "string-interpolation"
 85 | path = "examples/string-interpolation.rs"
 86 | 
 87 | [[example]]
 88 | name = "custom_error"
 89 | path = "examples/custom_error.rs"
 90 | 
 91 | [[example]]
 92 | name = "extras"
 93 | path = "examples/extras.rs"
 94 | 
 95 | [[example]]
 96 | name = "json"
 97 | path = "examples/json.rs"
 98 | 
 99 | [[example]]
100 | name = "json-borrowed"
101 | path = "examples/json_borrowed.rs"
102 | 


--------------------------------------------------------------------------------
/LICENSE-MIT:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2018 Maciej Hirsz <maciej.hirsz@gmail.com>
 2 | 
 3 | The MIT License (MIT)
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <img src="https://raw.githubusercontent.com/maciejhirsz/logos/master/logos.svg?sanitize=true" alt="Logos logo" width="250" align="right">
  2 | 
  3 | # Logos
  4 | 
  5 | [![Book](https://github.com/maciejhirsz/logos/actions/workflows/pages.yml/badge.svg?branch=master)](https://logos.maciej.codes/)
  6 | [![Crates.io version shield](https://img.shields.io/crates/v/logos.svg)](https://crates.io/crates/logos)
  7 | [![Docs](https://docs.rs/logos/badge.svg)](https://docs.rs/logos)
  8 | [![Crates.io license shield](https://img.shields.io/crates/l/logos.svg)](https://crates.io/crates/logos)
  9 | [![Code coverage](https://codecov.io/gh/maciejhirsz/logos/branch/master/graph/badge.svg)](https://codecov.io/gh/maciejhirsz/logos)
 10 | 
 11 | _Create ridiculously fast Lexers._
 12 | 
 13 | **Logos** has two goals:
 14 | 
 15 | + To make it easy to create a Lexer, so you can focus on more complex problems.
 16 | + To make the generated Lexer faster than anything you'd write by hand.
 17 | 
 18 | To achieve those, **Logos**:
 19 | 
 20 | + Combines all token definitions into a single [deterministic state machine](https://en.wikipedia.org/wiki/Deterministic_finite_automaton).
 21 | + Optimizes branches into [lookup tables](https://en.wikipedia.org/wiki/Lookup_table) or [jump tables](https://en.wikipedia.org/wiki/Branch_table).
 22 | + Prevents [backtracking](https://en.wikipedia.org/wiki/ReDoS) inside token definitions.
 23 | + [Unwinds loops](https://en.wikipedia.org/wiki/Loop_unrolling), and batches reads to minimize bounds checking.
 24 | + Does all of that heavy lifting at compile time.
 25 | 
 26 | ## Example
 27 | 
 28 | ```rust
 29 | use logos::Logos;
 30 | 
 31 | #[derive(Logos, Debug, PartialEq)]
 32 | #[logos(skip r"[ \t\n\f]+")] // Ignore this regex pattern between tokens
 33 | enum Token {
 34 |     // Tokens can be literal strings, of any length.
 35 |     #[token("fast")]
 36 |     Fast,
 37 | 
 38 |     #[token(".")]
 39 |     Period,
 40 | 
 41 |     // Or regular expressions.
 42 |     #[regex("[a-zA-Z]+")]
 43 |     Text,
 44 | }
 45 | 
 46 | fn main() {
 47 |     let mut lex = Token::lexer("Create ridiculously fast Lexers.");
 48 | 
 49 |     assert_eq!(lex.next(), Some(Ok(Token::Text)));
 50 |     assert_eq!(lex.span(), 0..6);
 51 |     assert_eq!(lex.slice(), "Create");
 52 | 
 53 |     assert_eq!(lex.next(), Some(Ok(Token::Text)));
 54 |     assert_eq!(lex.span(), 7..19);
 55 |     assert_eq!(lex.slice(), "ridiculously");
 56 | 
 57 |     assert_eq!(lex.next(), Some(Ok(Token::Fast)));
 58 |     assert_eq!(lex.span(), 20..24);
 59 |     assert_eq!(lex.slice(), "fast");
 60 | 
 61 |     assert_eq!(lex.next(), Some(Ok(Token::Text)));
 62 |     assert_eq!(lex.slice(), "Lexers");
 63 |     assert_eq!(lex.span(), 25..31);
 64 | 
 65 |     assert_eq!(lex.next(), Some(Ok(Token::Period)));
 66 |     assert_eq!(lex.span(), 31..32);
 67 |     assert_eq!(lex.slice(), ".");
 68 | 
 69 |     assert_eq!(lex.next(), None);
 70 | }
 71 | ```
 72 | 
 73 | For more examples and documentation, please refer to the
 74 | [Logos handbook](https://maciejhirsz.github.io/logos/) or the
 75 | [crate documentation](https://docs.rs/logos/latest/logos/).
 76 | 
 77 | ## How fast?
 78 | 
 79 | Ridiculously fast!
 80 | 
 81 | ```norust
 82 | test identifiers                       ... bench:         647 ns/iter (+/- 27) = 1204 MB/s
 83 | test keywords_operators_and_punctators ... bench:       2,054 ns/iter (+/- 78) = 1037 MB/s
 84 | test strings                           ... bench:         553 ns/iter (+/- 34) = 1575 MB/s
 85 | ```
 86 | 
 87 | ## Acknowledgements
 88 | 
 89 | + [Pedrors](https://pedrors.pt/) for the **Logos** logo.
 90 | 
 91 | ## Thank you
 92 | 
 93 | **Logos** is very much a labor of love. If you find it useful, consider
 94 | [getting me some coffee](https://github.com/sponsors/maciejhirsz). ☕
 95 | 
 96 | If you'd like to contribute to Logos, then consider reading the
 97 | [Contributing guide](https://maciejhirsz.github.io/logos/contributing).
 98 | 
 99 | ## Contributing
100 | 
101 | **Logos** welcome any kind of contribution: bug reports, suggestions,
102 | or new features!
103 | 
104 | Please use the
105 | [issues](https://github.com/maciejhirsz/logos/issues) or
106 | [pull requests](https://github.com/maciejhirsz/logos/pulls) tabs,
107 | when appropriate.
108 | 
109 | To release a new version, follow the [RELEASE-PROCESS](RELEASE-PROCESS.md)
110 | 
111 | ## License
112 | 
113 | This code is distributed under the terms of both the MIT license
114 | and the Apache License (Version 2.0), choose whatever works for you.
115 | 
116 | See [LICENSE-APACHE](LICENSE-APACHE) and [LICENSE-MIT](LICENSE-MIT) for details.
117 | 


--------------------------------------------------------------------------------
/RELEASE-PROCESS.md:
--------------------------------------------------------------------------------
 1 | # Release process
 2 | 
 3 | First, make sure you are logged-in https://crates.io with: `cargo login`.
 4 | If you don't have write access to **Logos**' crates, you can still
 5 | perform steps 1-4, and ask a maintainer with accesses to perform step 5.
 6 | 
 7 | This project uses `cargo-release` to publish all packages with more ease.
 8 | Note that, by default, every command runs in *dry mode*, and you need to append `--execute`
 9 | to actually perform the action.
10 | 
11 | Here are the following steps to release a new version:
12 | 
13 | 1. create a branch `release-x.y.z` from the `master` branch;
14 | 2. run and commit `cargo release version --workspace <LEVEL>`;
15 | 3. run and commit `cargo release replace --workspace`;
16 | 4. push your branch and create a pull request;
17 | 5. and, once your branch was merged to `master`, run the following:
18 |    ```bash
19 |    cargo release publish --package logos-codegen
20 |    cargo release publish --package logos-derive
21 |    cargo release publish --package logos-cli
22 |    cargo release publish --package logos
23 |    ```
24 | 
25 | And voilà!
26 | 


--------------------------------------------------------------------------------
/book/book.toml:
--------------------------------------------------------------------------------
 1 | [book]
 2 | authors = ["Maciej Hirsz <hello@maciej.codes>", "Jérome Eertmans (maintainer) <jeertmans@icloud.com>"]
 3 | language = "en"
 4 | multilingual = false
 5 | src = "src"
 6 | title = "Logos Handbook"
 7 | 
 8 | [preprocessor.admonish]
 9 | command = "mdbook-admonish"
10 | assets_version = "3.0.2" # do not edit: managed by `mdbook-admonish install`
11 | 
12 | [output]
13 | 
14 | [output.html]
15 | additional-css = ["./mdbook-admonish.css"]
16 | 


--------------------------------------------------------------------------------
/book/src/SUMMARY.md:
--------------------------------------------------------------------------------
 1 | # Summary
 2 | 
 3 | + [Intro](./intro.md)
 4 | + [Getting Started](./getting-started.md)
 5 | + [Help](./getting-help.md)
 6 | + [Attributes](./attributes.md)
 7 |   + [`#[logos]`](./attributes/logos.md)
 8 |   + [`#[token]` and `#[regex]`](./attributes/token_and_regex.md)
 9 | + [Token disambiguation](./token-disambiguation.md)
10 | + [Using `Extras`](./extras.md)
11 | + [Using callbacks](./callbacks.md)
12 | + [Context-dependent lexing](./context-dependent-lexing.md)
13 | + [Common regular expressions](./common-regex.md)
14 | + [Debugging](./debugging.md)
15 | + [Unsafe Code](./unsafe.md)
16 | + [Examples](./examples.md)
17 |   + [Brainfuck interpreter](./examples/brainfuck.md)
18 |   + [Simple calculator](./examples/calculator.md)
19 |   + [JSON parser](./examples/json.md)
20 |   + [JSON-borrowed parser](./examples/json_borrowed.md)
21 |   + [String interpolation](./examples/string-interpolation.md)
22 | + [Contributing](./contributing.md)
23 |   + [Setup](./contributing/setup.md)
24 |   + [Internals](./contributing/internals.md)
25 |   + [Fuzzing](./contributing/fuzzing.md)
26 | 


--------------------------------------------------------------------------------
/book/src/assets/calculator_example_flow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejhirsz/logos/96765c0be188f3e8005b48db82bf1e904d2e6650/book/src/assets/calculator_example_flow.png


--------------------------------------------------------------------------------
/book/src/assets/calculator_example_how_evaluator_works.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejhirsz/logos/96765c0be188f3e8005b48db82bf1e904d2e6650/book/src/assets/calculator_example_how_evaluator_works.png


--------------------------------------------------------------------------------
/book/src/attributes.md:
--------------------------------------------------------------------------------
 1 | # Attributes
 2 | 
 3 | The `#[derive(Logos)]` procedural macro recognizes three different attribute
 4 | names.
 5 | 
 6 | + [`#[logos]`](./attributes/logos.md) is the main attribute which can be
 7 |   attached to the `enum` of your token definition. It allows you to define the
 8 |   `Extras` associated type in order to put custom state into the `Lexer`, or
 9 |   declare concrete types for generic type parameters, if your `enum` uses such.
10 |   It is strictly optional. It also allows to define parts that must be skipped
11 |   by the lexer, the error type, or regex subpatterns.
12 | + And most importantly the
13 |   [`#[token]` and `#[regex]`](./attributes/token_and_regex.md)
14 |   attributes. Those allow you to define patterns to match against the input,
15 |   either plain text strings with `#[token]`, or using regular expression
16 |   syntax with `#[regex]`. Aside from that difference, they are equivalent,
17 |   and any extra arguments you can pass to one, you can pass to the other.
18 | 


--------------------------------------------------------------------------------
/book/src/attributes/logos.md:
--------------------------------------------------------------------------------
  1 | # `#[logos]`
  2 | 
  3 | As previously said, the `#[logos]` attribute can be attached to the `enum`
  4 | of your token definition to customize your lexer. Note that they all are
  5 | **optional**.
  6 | 
  7 | The syntax is as follows:
  8 | 
  9 | ```rust,no_run,no_playground
 10 | #[derive(Logos)]
 11 | #[logos(skip "regex literal")]
 12 | #[logos(extras = ExtrasType)]
 13 | #[logos(error = ErrorType)]
 14 | #[logos(crate = path::to::logos)]
 15 | #[logos(source = SourceType)]
 16 | #[logos(subpattern subpattern_name = "regex literal")]
 17 | enum Token {
 18 |     /* ... */
 19 | }
 20 | ```
 21 | 
 22 | where `"regex literal"` can be any regex supported by
 23 | [`#[regex]`](../common-regex.md), and `ExtrasType` can be of any type!
 24 | 
 25 | An example usage of `skip` is provided in the [JSON parser example](../examples/json.md).
 26 | 
 27 | For more details about extras, read the [eponym section](../extras.md).
 28 | 
 29 | ## Custom error type
 30 | 
 31 | By default, **Logos** uses `()` as the error type, which means that it
 32 | doesn't store any information about the error.
 33 | This can be changed by using `#[logos(error = ErrorType)]` attribute on the enum.
 34 | The type `ErrorType` can be any type that implements `Clone`, `PartialEq`,
 35 | `Default` and `From<E>` for each callback's error type.
 36 | 
 37 | `ErrorType` must implement the `Default` trait because invalid tokens, i.e.,
 38 | literals that do not match any variant, will produce `Err(ErrorType::default())`.
 39 | 
 40 | Here is an example using a custom error type:
 41 | 
 42 | ```rust,no_run,noplayground
 43 | {{#include ../../../examples/custom_error.rs:all}}
 44 | ```
 45 | 
 46 | You can add error variants to `LexingError`,
 47 | and implement `From<E>` for each error type `E` that could
 48 | be returned by a callback. See [callbacks](../callbacks.md).
 49 | 
 50 | ## Specifying path to logos
 51 | 
 52 | You can force the derive macro to use a different path to `Logos`'s crate
 53 | with `#[logos(crate = path::to::logos)]`.
 54 | 
 55 | ## Custom source type
 56 | 
 57 | By default, **Logos**'s lexer will accept `&str` as input, unless any of the
 58 | pattern literals match a non utf-8 bytes sequence. In this case, it will fall
 59 | back to `&[u8]`. You can override this behavior by forcing one of the two
 60 | source types. You can also specify any custom type that implements
 61 | [`Source`](https://docs.rs/logos/latest/logos/source/trait.Source.html).
 62 | 
 63 | ## Subpatterns
 64 | 
 65 | We can use subpatterns to reuse regular expressions in our tokens or other subpatterns.
 66 | 
 67 | The syntax tu use a previously defined subpattern, like `#[logos(subpattern subpattern_name = "regex literal")]`,
 68 | in a new regular expression is `"(?&subpattern_name)"`.
 69 | 
 70 | For example:
 71 | 
 72 | ```rust,no_run,noplayground
 73 | use logos::Logos;
 74 | 
 75 | #[derive(Logos, Debug, PartialEq)]
 76 | #[logos(skip r"\s+")]
 77 | #[logos(subpattern alpha = r"[a-zA-Z]")]
 78 | #[logos(subpattern digit = r"[0-9]")]
 79 | #[logos(subpattern alphanum = r"(?&alpha)|(?&digit)")]
 80 | enum Token {
 81 |     #[regex("(?&alpha)+")]
 82 |     Word,
 83 |     #[regex("(?&digit)+")]
 84 |     Number,
 85 |     #[regex("(?&alphanum){2}")]
 86 |     TwoAlphanum,
 87 |     #[regex("(?&alphanum){3}")]
 88 |     ThreeAlphanum,
 89 | }
 90 | 
 91 | fn main() {
 92 |     let mut lex = Token::lexer("Word 1234 ab3 12");
 93 | 
 94 |     assert_eq!(lex.next(), Some(Ok(Token::Word)));
 95 |     assert_eq!(lex.slice(), "Word");
 96 | 
 97 |     assert_eq!(lex.next(), Some(Ok(Token::Number)));
 98 |     assert_eq!(lex.slice(), "1234");
 99 | 
100 |     assert_eq!(lex.next(), Some(Ok(Token::ThreeAlphanum)));
101 |     assert_eq!(lex.slice(), "ab3");
102 | 
103 |     assert_eq!(lex.next(), Some(Ok(Token::TwoAlphanum)));
104 |     assert_eq!(lex.slice(), "12");
105 | 
106 |     assert_eq!(lex.next(), None);
107 | }
108 | ```
109 | 
110 | (Note that the above supatterns are redundant as the same can be achieved with [existing character classes](https://docs.rs/regex/latest/regex/#ascii-character-classes))
111 | 


--------------------------------------------------------------------------------
/book/src/attributes/token_and_regex.md:
--------------------------------------------------------------------------------
 1 | # `#[token]` and `#[regex]`
 2 | 
 3 | For each variant your declare in your `enum` that uses the `Logos` derive macro,
 4 | you can specify one or more string literal or regex it can match.
 5 | 
 6 | The usage syntax is a follows:
 7 | 
 8 | ```rust,no_run,no_playground
 9 | #[derive(Logos)]
10 | enum Token {
11 |     #[token(literal [, callback, priority = <integer>, ignore(<flag>, ...)]]
12 |     #[regex(literal [, callback, priority = <integer>, ignore(<flag>, ...)]]
13 |     SomeVariant,
14 | }
15 | ```
16 | 
17 | where `literal` can be any `&str` or `&[u8]` string literal,
18 | `callback` can either be a closure, or a literal path to a function
19 | (see [Using callbacks section](../callbacks.md)),
20 | `priority` can be any positive integer
21 | (see [Token disambiguation section](../token-disambiguation.md)),
22 | and `flag` can by of: `case`, `ascii_case`. Only `literal` is **required**,
23 | others are optional.
24 | 
25 | You can stack any number of `#[token]` and or `#[regex]` attributes on top of
26 | the same variant.
27 | 
28 | ```admonish info
29 | For a list of supported `regex` literals, read the
30 | [Common regular expressions section](../common-regex.md).
31 | ```
32 | 


--------------------------------------------------------------------------------
/book/src/callbacks.md:
--------------------------------------------------------------------------------
 1 | # Using callbacks
 2 | 
 3 | **Logos** can also call arbitrary functions whenever a pattern is matched,
 4 | which can be used to put data into a variant:
 5 | 
 6 | ```rust,no_run,no_playground
 7 | use logos::{Logos, Lexer};
 8 | 
 9 | // Note: callbacks can return `Option` or `Result`
10 | fn kilo(lex: &mut Lexer<Token>) -> Option<u64> {
11 |     let slice = lex.slice();
12 |     let n: u64 = slice[..slice.len() - 1].parse().ok()?; // skip 'k'
13 |     Some(n * 1_000)
14 | }
15 | 
16 | fn mega(lex: &mut Lexer<Token>) -> Option<u64> {
17 |     let slice = lex.slice();
18 |     let n: u64 = slice[..slice.len() - 1].parse().ok()?; // skip 'm'
19 |     Some(n * 1_000_000)
20 | }
21 | 
22 | #[derive(Logos, Debug, PartialEq)]
23 | #[logos(skip r"[ \t\n\f]+")]
24 | enum Token {
25 |     // Callbacks can use closure syntax, or refer
26 |     // to a function defined elsewhere.
27 |     //
28 |     // Each pattern can have it's own callback.
29 |     #[regex("[0-9]+", |lex| lex.slice().parse().ok())]
30 |     #[regex("[0-9]+k", kilo)]
31 |     #[regex("[0-9]+m", mega)]
32 |     Number(u64),
33 | }
34 | 
35 | fn main() {
36 |     let mut lex = Token::lexer("5 42k 75m");
37 | 
38 |     assert_eq!(lex.next(), Some(Ok(Token::Number(5))));
39 |     assert_eq!(lex.slice(), "5");
40 | 
41 |     assert_eq!(lex.next(), Some(Ok(Token::Number(42_000))));
42 |     assert_eq!(lex.slice(), "42k");
43 | 
44 |     assert_eq!(lex.next(), Some(Ok(Token::Number(75_000_000))));
45 |     assert_eq!(lex.slice(), "75m");
46 | 
47 |     assert_eq!(lex.next(), None);
48 | }
49 | ```
50 | 
51 | Logos can handle callbacks with following return types:
52 | 
53 | | Return type                                                                       | Produces                                                                                            |
54 | | --------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------- |
55 | | `()`                                                                              | `Ok(Token::Unit)`                                                                                   |
56 | | `bool`                                                                            | `Ok(Token::Unit)` **or** `Err(<Token as Logos>::Error::default())`                                  |
57 | | `Result<(), E>`                                                                   | `Ok(Token::Unit)` **or** `Err(<Token as Logos>::Error::from(err))`                                  |
58 | | `T`                                                                               | `Ok(Token::Value(T))`                                                                               |
59 | | `Option<T>`                                                                       | `Ok(Token::Value(T))` **or** `Err(<Token as Logos>::Error::default())`                              |
60 | | `Result<T, E>`                                                                    | `Ok(Token::Value(T))` **or** `Err(<Token as Logos>::Error::from(err))`                              |
61 | | [`Skip`](https://docs.rs/logos/latest/logos/struct.Skip.html)                     | _skips matched input_                                                                               |
62 | | `Result<Skip, E>`                                                                 | _skips matched input_ **or** `Err(<Token as Logos>::Error::from(err))`                              |
63 | | [`Filter<T>`](https://docs.rs/logos/latest/logos/enum.Filter.html)                | `Ok(Token::Value(T))` **or** _skips matched input_                                                  |
64 | | [`FilterResult<T, E>`](https://docs.rs/logos/latest/logos/enum.FilterResult.html) | `Ok(Token::Value(T))` **or** `Err(<Token as Logos>::Error::from(err))` **or** _skips matched input_ |
65 | 
66 | Callbacks can also be used to perform more specialized lexing in places
67 | where regular expressions are too limiting. For specifics look at
68 | [`Lexer::remainder`](https://docs.rs/logos/latest/logos/struct.Lexer.html#method.remainder) and
69 | [`Lexer::bump`](https://docs.rs/logos/latest/logos/struct.Lexer.html#method.bump).
70 | 


--------------------------------------------------------------------------------
/book/src/common-regex.md:
--------------------------------------------------------------------------------
 1 | # Common regular expressions
 2 | 
 3 | Maybe the most important feature of **Logos** is its ability to accept
 4 | regex patterns in your tokens' definition.
 5 | 
 6 | [Regular expressions](https://en.wikipedia.org/wiki/Regular_expression),
 7 | or regexes for short, are sequences of characters (or bytes) that define a match
 8 | pattern. When constructing lexers, this is especially useful to define tokens
 9 | that should match a set of *similar* literals. E.g., a sequence of
10 | 3 ASCII uppercase letters and 3 digits could define a license plate,
11 | and could be matched with the following regex: `"[A-Z]{3}[0-9]{3}"`.
12 | 
13 | For more details about regexes in Rust, refer to the
14 | [regex](https://crates.io/crates/regex) crate.
15 | 
16 | ## Valid regexes that are not supported
17 | 
18 | Because **Logos** aims at generating high-performance code, it never allows to
19 | do backtracking. This means that anytime a byte is read from the input source,
20 | it will never be read again. This implementation choice comes at a cost: not
21 | all valid regexes are supported by **Logos**[^1].
22 | 
23 | For reference, **Logos** parses regexes using `regex-syntax = 0.8.2`, and
24 | transforms its high-level intermediate representation (HIR) into some
25 | medium intermediate representation (MIR). From HIR, MIR does not support
26 | the following
27 | [`HirKind`](https://docs.rs/regex-syntax/0.8.2/regex_syntax/hir/enum.HirKind.html)s:
28 | 
29 | + Non-greedy repetitions, i.e., matching as little as possible as given pattern.
30 | + `".*"` and `".+"` repetition patterns, because they will potentially consume
31 |   all the input source, breaking the non-backtracking rule.
32 |   For solutions, see footnote[^1] or read the error message.
33 | + Word boundaries, i.e., r`"\b"`.
34 | + Anchors, because input source does not treat lines separately.
35 | 
36 | Additionally, note that capture groups will silently be *ungrouped*,
37 | because **Logos** does not support capturing groups, but the main slice
38 | (`lex.slice()`).
39 | 
40 | [^1]: Most of time, however, it is possible to circumvent this issue by
41 | rewriting your regex another way, or by using callbacks.
42 | E.g., see
43 | [#302](https://github.com/maciejhirsz/logos/issues/302#issuecomment-1521342541).
44 | 
45 | ## Other issues
46 | 
47 | **Logos**' support for regexes is not yet complete, and errors can still exist.
48 | Some are found at compile time, and others will create wrong matches or panic.
49 | 
50 | If you ever feel like your patterns do not match the expected source slices,
51 | please check the
52 | [GitHub issues](https://github.com/maciejhirsz/logos/issues?q=is%3Aissue).
53 | If no issue covers your problem, we encourage
54 | you to create a
55 | [new issue](https://github.com/maciejhirsz/logos/issues/new),
56 | and document it as best as you can so that the issue
57 | can be reproduced locally.
58 | 


--------------------------------------------------------------------------------
/book/src/context-dependent-lexing.md:
--------------------------------------------------------------------------------
 1 | # Context-dependent lexing
 2 | 
 3 | Sometimes, a single lexer is insufficient to properly handle complex grammars. To address this, many lexer generators offer the ability to have separate lexers with their own set of patterns and tokens, allowing you to dynamically switch between them based on the context.
 4 | 
 5 | In Logos, context switching is handled using the [`morph`](https://docs.rs/logos/0.11.0-rc2/logos/struct.Lexer.html#method.morph) method of the `logos::Lexer` struct.
 6 | This method takes ownership of the current lexer and transforms it into a lexer for a new token type.
 7 | 
 8 | It is important to note that:
 9 | 
10 | - Both the original lexer and the new lexer must share the same [`Source`](./attributes/logos.md#custom-source-type) type.
11 | - The [`Extras`](./extras.md) type from the original lexer must be convertible into the `Extras` type of the new lexer.
12 | 
13 | ## Example
14 | 
15 | The following example demonstrates how to use `morph` to handle a C-style language that also supports python blocks:
16 | 
17 | ```rust
18 | #[derive(Logos, Debug, PartialEq, Clone)]
19 | #[logos(skip r"\s+")]
20 | enum CToken {
21 |     /* Tokens supporting C syntax */
22 |     // ...
23 |     #[regex(r#"extern\s+"python"\s*\{"#, python_block_callback)]
24 |     PythonBlock(Vec<PythonToken>),
25 | }
26 | 
27 | #[derive(Logos, Debug, PartialEq, Clone)]
28 | #[logos(skip r"\s+")]
29 | enum PythonToken {
30 |     #[token("}")]
31 |     ExitPythonBlock,
32 |     /* Tokens supporting Python syntax */
33 |     // ...
34 | }
35 | 
36 | fn python_block_callback(lex: &mut Lexer<CToken>) -> Option<Vec<PythonToken>> {
37 |     let mut python_lexer = lex.clone().morph::<PythonToken>();
38 |     let mut tokens = Vec::new();
39 |     while let Some(token) = python_lexer.next() {
40 |         match token {
41 |             Ok(PythonToken::ExitPythonBlock) => break,
42 |             Err(_) => return None,
43 |             Ok(tok) => tokens.push(tok),
44 |         }
45 |     }
46 |     *lex = python_lexer.morph();
47 |     Some(tokens)
48 | }
49 | ```
50 | 
51 | Note that if we want to use `morph` inside a callback we need to be able to clone the original lexer, as `morph` needs to take ownership but the callback receives only a reference to the lexer.
52 | 
53 | For a more in depth example check out [String interpolation](./examples/string-interpolation.md).
54 | 


--------------------------------------------------------------------------------
/book/src/contributing.md:
--------------------------------------------------------------------------------
 1 | # Contributing
 2 | 
 3 | If you are considering to contribute to Logos, then this place it for you!
 4 | 
 5 | First, we really appreciate people that can help this project grow, and we
 6 | would like to guide you through the standard contribution process.
 7 | 
 8 | There are many ways to help us, and here is a short list of some of them:
 9 | 
10 | + fixing an [BUG](https://github.com/maciejhirsz/logos/labels/bug), by providing
11 |   a patch (or suggesting in the comments how one could fix it);
12 | + correcting some typos in the documentation, the book, or anywhere else;
13 | + raising an issue about a problem (i.e.,
14 |   [opening an issue](https://github.com/maciejhirsz/logos/issues/new) on GitHub);
15 | + proposing new features (either with
16 |   [an issue](https://github.com/maciejhirsz/logos/issues/new) or
17 |   [a pull request](https://github.com/maciejhirsz/logos/pulls) on GitHub);
18 | + or improving the documentation (either in the crate or in the book).
19 | 
20 | In any case, GitHub is the place-to-go for anything related to contributing.
21 | 
22 | Below, we provide a few help pages (or links) to contents that can help you
23 | understand Logos' internals and how you can create submit a contribution.
24 | 
25 | + If you are new to GitHub or git, please consider reading those two guides:
26 |   + [GitHub’s Hello World](https://docs.github.com/en/get-started/quickstart/hello-world);
27 |   + and [GitHub Pull Request in 100 Seconds](https://www.youtube.com/watch?v=8lGpZkjnkt4&ab_channel=Fireship)
28 |     (video).
29 | + To setup and test your code locally, see the [Setup](./contributing/setup.md)
30 |   page.
31 | + To know a bit more how Logos works, check the
32 |   [Internals](./contributing/internals.md).
33 | 


--------------------------------------------------------------------------------
/book/src/contributing/fuzzing.md:
--------------------------------------------------------------------------------
 1 | # Fuzzing
 2 | 
 3 | 
 4 | Fuzzing is a technique to test a piece of software by injecting randomly generated inputs. This can be pretty useful to discover bugs, as pointed out in [#407](https://github.com/maciejhirsz/logos/pull/407).
 5 | 
 6 | **Logos**' fuzzing crate is powered by [afl.rs](https://github.com/rust-fuzz/afl.rs) that
 7 | finds panics in **Logos**' methods.
 8 | 
 9 | ## Usage
10 | 
11 | First, make sure you have `cargo-afl` installed,
12 | [see the rust-fuzz afl setup guide for installation information](https://rust-fuzz.github.io/book/afl/setup.html).
13 | 
14 | Next, change your current working directory to be the `fuzz` folder.
15 | 
16 | ### Building
17 | 
18 | Before fuzzing, you need to build the target with:
19 | 
20 | ```bash
21 | cargo afl build
22 | ```
23 | 
24 | ### Fuzzy testing
25 | 
26 | The recommended way the run tests is with:
27 | 
28 | ```bash
29 | cargo afl fuzz -i in -o out ../target/debug/logos-fuzz
30 | ```
31 | 
32 | Note that it may run for a (very) long time before
33 | it encounter any bug.
34 | 
35 | ## Replaying a Crash
36 | 
37 | If you happen to find a bug that crashes the program,
38 | you can reply it with
39 | 
40 | ```bash
41 | cargo afl run logos-fuzz < out/default/crashes/crash_file
42 | ```
43 | 
44 | ### Reporting a Bug
45 | 
46 | If you encounter a crash and you feel the error message
47 | is not appropriate,
48 | please report it by opening
49 | [an issue](https://github.com/maciejhirsz/logos/issues/new).
50 | Don't forget to include your crash file so we can later
51 | reproduce it.
52 | 


--------------------------------------------------------------------------------
/book/src/contributing/internals.md:
--------------------------------------------------------------------------------
 1 | # Internals
 2 | 
 3 | **Logos**' core functionalities are split across five crates:
 4 | 
 5 | - `logos` is the main crate, that you add to your project (in `Cargo.toml`)
 6 |   to obtain the `Logos` derive macro. The public API is limited to this crate,
 7 |   and most users should only use this crate, not the others.
 8 | - `logos-derive` is a very simple but necessary crate to expose `logos-codegen`'s code as a derive macro.
 9 | - `logos-codegen` contains the most technical parts of **Logos**: the code
10 |   that **reads** your token definitions, and **generates** optimized code
11 |   to create blazingly fast lexers.
12 |   You can [read a blog post](https://maciej.codes/2020-04-19-stacking-luts-in-logos.html)
13 |   from the author of **Logos** to get a small insight of what the
14 |   `logos-codegen` crate does. In the future, we hope to provide more documents
15 |   about how this crate works, so people are more likely to understand it and
16 |   improve it with pull requests (see the
17 |   [Contributing section](../contributing.md)).
18 | - `logos-cli` is a separate crate, that installs a binary of the same name,
19 |   and allows to expand the `Logos` derive macro into code.
20 |   It can be installed with `cargo install logos-cli`,
21 |   and usage help can be obtained through the `logos-cli --help` command.
22 |   This tool can be useful if your token definitions stay constant, and
23 |   you want to reduce compilation time overhead caused by derive macros.
24 | - `logos-fuzz` is an internal crate (i.e., unpublished) that uses [afl.rs](https://github.com/rust-fuzz/afl.rs)
25 |   to find confusing panics before they reach the developer.
26 |   To use this tool, see the [Fuzzing guide]('./fuzzing.md')
27 | 


--------------------------------------------------------------------------------
/book/src/contributing/setup.md:
--------------------------------------------------------------------------------
  1 | # Setup
  2 | 
  3 | On this page, you will find all the information needed to run and test your
  4 | own version of the Logos crate, locally.
  5 | 
  6 | We assume you have basic knowledge with git and GitHub. If that is not the
  7 | case, please refer to the link mentioned in [Contributing](./contributing.md).
  8 | 
  9 | ## Prerequisites
 10 | 
 11 | You need to have both git and Rust installed on your computer,
 12 | see installation procedures:
 13 | 
 14 | + for [git](https://git-scm.com/book/en/v2/Getting-Started-Installing-Git);
 15 | + and [Rust](https://www.rust-lang.org/tools/install).
 16 | 
 17 | Once it's done, clone the Logos repository on your computer:
 18 | 
 19 | ```bash
 20 | git clone https://github.com/maciejhirsz/logos.git
 21 | ```
 22 | 
 23 | If you have a fork of this repository, make sure to clone it instead.
 24 | 
 25 | Finally, launch a terminal (i.e., command-line) session and go to the
 26 | `logos` directory.
 27 | 
 28 | ## Checking the code compiles
 29 | 
 30 | A good way to see if you code can compile is to use the eponym command:
 31 | 
 32 | ```bash
 33 | cargo check --workspace
 34 | ```
 35 | 
 36 | ## Formatting and linting your code
 37 | 
 38 | Prior to suggesting changes in a pull request, it is important to both
 39 | format your code:
 40 | 
 41 | ```bash
 42 | cargo fmt
 43 | ```
 44 | 
 45 | and check against Rust's linter:
 46 | 
 47 | ```bash
 48 | cargo clippy
 49 | ```
 50 | 
 51 | Make sure to run those frequently, otherwise your pull request will probably
 52 | fail to pass the automated tests.
 53 | 
 54 | ## Testing your code
 55 | 
 56 | A code that compiles isn't necessarily correct, and testing it against known
 57 | cases is of good practice:
 58 | 
 59 | ```bash
 60 | cargo test --workspace
 61 | ```
 62 | 
 63 | You can also run benchmarks:
 64 | 
 65 | ```bash
 66 | cargo bench --workspace --benches
 67 | ```
 68 | 
 69 | ## Building the documentation
 70 | 
 71 | Logos' documentation needs to be built with Rust's nightly toolchain.
 72 | 
 73 | You can install the latest nightly channel with:
 74 | 
 75 | ```bash
 76 | rustup install nightly
 77 | ```
 78 | 
 79 | Then, use the following command to build the documentation with a similar
 80 | configuration to the one used by [docs.rs](https://docs.rs/logos/latest/logos/):
 81 | 
 82 | ```bash
 83 | RUSTDOCFLAGS="--cfg docsrs" cargo +nightly doc \
 84 |     --features debug \
 85 |     -Zunstable-options \
 86 |     -Zrustdoc-scrape-examples \
 87 |     --no-deps \
 88 |     --open \
 89 | ```
 90 | 
 91 | 
 92 | ## Building the book
 93 | 
 94 | Logos' book can be built with mbBook.
 95 | 
 96 | This tool can be installed with `cargo`:
 97 | 
 98 | ```bash
 99 | cargo install mdbook
100 | ```
101 | 
102 | You also need to install `mdbook-admonish` and its assets:
103 | 
104 | ```bash
105 | cargo install mdbook-admonish
106 | cd book/  # You must run the next command from the book/ directory
107 | mdbook-admonish install
108 | cd ../ # Back to project root
109 | ```
110 | 
111 | Then, you can build the book with:
112 | 
113 | ```bash
114 | mdbook serve book --open
115 | ```
116 | 
117 | Any change in the `./book` folder will automatically trigger a new build,
118 | and the pages will be live-reloaded.
119 | 


--------------------------------------------------------------------------------
/book/src/debugging.md:
--------------------------------------------------------------------------------
  1 | # Debugging
  2 | 
  3 | Instructions on how to debug your Logos lexer.
  4 | 
  5 | ## Visualizing Logos Graph 
  6 | 
  7 | Logos works by creating a graph that gets derived from
  8 | the tokens that you defined.
  9 | This graph describes how the lexer moves through different
 10 | states when processing input.
 11 | 
 12 | Hence, it may be beneficial during debugging to be able to
 13 | visualize this graph, to understand how Logos will match the various tokens. 
 14 | 
 15 | If we take this example:
 16 | 
 17 | ```rust,no_run,noplayground
 18 | use logos::Logos;
 19 | 
 20 | #[derive(Debug, Logos, PartialEq)]
 21 | enum Token {
 22 |     // Tokens can be literal strings, of any length.
 23 |     #[token("fast")]
 24 |     Fast,
 25 | 
 26 |     #[token(".")]
 27 |     Period,
 28 | 
 29 |     // Or regular expressions.
 30 |     #[regex("[a-zA-Z]+")]
 31 |     Text,
 32 | }
 33 | fn main() {
 34 |     let input = "Create ridiculously fast Lexers.";
 35 | 
 36 |     let mut lexer = Token::lexer(input);
 37 |     while let Some(token) = lexer.next() {
 38 |         println!("{:?}", token);
 39 |     }
 40 | }
 41 | ```
 42 | 
 43 | Logos actually constructs a graph that contains the logic for matching tokens:
 44 | 
 45 | ```
 46 | graph = {
 47 |     1: ::Fast,
 48 |     2: ::Period,
 49 |     3: ::Text,
 50 |     4: {
 51 |         [A-Z] ⇒ 4,
 52 |         [a-z] ⇒ 4,
 53 |         _ ⇒ 3,
 54 |     },
 55 |     7: [
 56 |         ast ⇒ 8,
 57 |         _ ⇒ 4*,
 58 |     ],
 59 |     8: {
 60 |         [A-Z] ⇒ 4,
 61 |         [a-z] ⇒ 4,
 62 |         _ ⇒ 1,
 63 |     },
 64 |     9: {
 65 |         . ⇒ 2,
 66 |         [A-Z] ⇒ 4,
 67 |         [a-e] ⇒ 4,
 68 |         f ⇒ 7,
 69 |         [g-z] ⇒ 4,
 70 |     },
 71 | }
 72 | ```
 73 | This graph can help us understand how our patterns are matched,
 74 | and maybe understand why we have a bug at some point.
 75 | 
 76 | Let's get started by trying to understand how Logos is matching the
 77 | `.` character, which we've tokenized as `Token::Period`.
 78 | 
 79 | We can begin our search by looking at number `9` for the character `.`.
 80 | We can see that if Logos matches a `.` it will jump `=>` to number `2`.
 81 | We can then follow that by looking at `2` which resolves to our `::Period` token. 
 82 | 
 83 | Logos will then continue to look for any matches past our `.` character.
 84 | This is required in case there is potential continuation after the `.` character.
 85 | Although, in the *input* we provided, there are no any additional characters,
 86 | since it is the end of our input.
 87 | 
 88 | We also can try to identify how the token `fast` works by looking at `9`,
 89 | first, and seeing that `f` will cause Logos to jump to `7`.
 90 | This will then resolve the last letters of our word *fast* by matching `ast`
 91 | which jumps to `8`. Since our provided _input_ to the lexer does not include
 92 | alphabetic characters after the word "fast", but rather a whitespace,
 93 | the token `::Fast` will be recognized.
 94 | Then, the graph will look for further potential continuation (here, `[g-z] => 4`)
 95 | 
 96 | ## Enabling 
 97 | 
 98 | To enable debugging output you can define a `debug` feature in your
 99 | `Cargo.toml` file, like this:
100 | 
101 | ```
102 | // Cargo.toml
103 | [dependencies]
104 | logos = { version = "1.2.3", features = ["debug"] }
105 | ```
106 | 
107 | Next, you can build your project with `cargo build` and
108 | the output will contain a debug representation of your graph(s).
109 | 


--------------------------------------------------------------------------------
/book/src/examples.md:
--------------------------------------------------------------------------------
 1 | # Examples
 2 | 
 3 | The following examples are ordered by increasing level of complexity.
 4 | 
 5 | **[Brainfuck interpreter](./examples/brainfuck.md)**: Lexers are very powerful tools for parsing code programs into meaningful instructions. We show you how you can build an interpreter for the Brainfuck programming language under 100 lines of code!
 6 | 
 7 | **[Simple calculator](./examples/calculator.md)**: For a relatively large domain-specifc language (DSL), or any programming language, implementing an interpreter typically involves converting the tokens generated by a lexer into an abstract syntax tree (AST) via a parser, and then evaluating it. We show you how you can build a simple calculator that evaluates arithmetic expressions by combining Logos and a parser generator library.
 8 | 
 9 | **[JSON parser](./examples/json.md)**: We present a JSON parser written with Logos that does nice error reporting when invalid values are encountered.
10 | 
11 | **[JSON-borrowed parser](./examples/json_borrowed.md)**: A variant of the previous parser, but that does not own its data.
12 | 
13 | **[String interpolation](./examples/string-interpolation.md)**: Example on using context-dependent lexing to parse a simple language with string interpolation.
14 | 


--------------------------------------------------------------------------------
/book/src/examples/brainfuck.md:
--------------------------------------------------------------------------------
 1 | # Brainfuck interpreter
 2 | 
 3 | In most programming languages, commands can be made of multiple program tokens, where a token is simply string slice that has a particular meaning for the language. For example, in Rust, the function signature `pub fn main()` could be split by the **lexer** into tokens `pub`, `fn`, `main`, `(`, and `)`. Then, the **parser** combines tokens into meaningful program instructions.
 4 | 
 5 | However, there exists programming languages that are so simple, such as Brainfuck, that each token can be mapped to a single instruction. There are actually 8 single-characters tokens:
 6 | 
 7 | ```rust,no_run,noplayground
 8 | {{#include ../../../examples/brainfuck.rs:tokens}}
 9 | ```
10 | 
11 | All other characters must be ignored.
12 | 
13 | Once the tokens are obtained, a Brainfuck interpreter can be easily created using a [Finite-state machine](https://en.wikipedia.org/wiki/Finite-state_machine). For the sake of simpliciy, we collected all the tokens into one vector called `operations`.
14 | 
15 | Now, creating an interpreter becomes straightforward[^1]:
16 | ```rust,no_run,noplayground
17 | {{#include ../../../examples/brainfuck.rs:fsm}}
18 | ```
19 | 
20 | [^1]: There is a small trick to make it easy. As it can be seen in the full code, we first perform a check that all beginning loops (`'['`) have a matching end (`']'`). This way, we can create two maps, `pairs` and `pairs_reverse`, to easily jump back and forth between them.
21 | 
22 | Finally, we provide you the full code that you should be able to run with[^2]:
23 | ```bash
24 | cargo run --example brainfuck examples/hello_word.bf
25 | ```
26 | 
27 | [^2]: You first need to clone [this repository](https://github.com/maciejhirsz/logos).
28 | 
29 | ```rust,no_run,noplayground
30 | {{#include ../../../examples/brainfuck.rs:all}}
31 | ```
32 | 


--------------------------------------------------------------------------------
/book/src/examples/calculator.md:
--------------------------------------------------------------------------------
  1 | # Simple calculator
  2 | 
  3 | <sub>This page (including the images) was contributed by [ynn](https://github.com/your-diary).</sub>
  4 | 
  5 | When you implement an interpreter for a [domain-specific language (DSL)](https://en.wikipedia.org/wiki/Domain-specific_language), or any programming language, the process typically involves the following steps:
  6 | 
  7 | 1. **Lexing**: Splitting the input stream (i.e., source code string) into tokens via a lexer.
  8 | 
  9 | 2. **Parsing**: Converting the tokens into an [abstract syntax tree (AST)](https://en.wikipedia.org/wiki/Abstract_syntax_tree) via a parser.
 10 | 
 11 | 3. **Evaluation**: Evaluating the AST to produce the result.
 12 | 
 13 | In this example, we implement a simple calculator that evaluates arithmetic expressions such as `1 + 2 * 3` or `((1 + 2) * 3 + 4) * 2 + 4 / 3`.
 14 | 
 15 | We use `logos` as the lexer generator and [`chumsky`](https://github.com/zesterer/chumsky) as the parser generator.
 16 | 
 17 | ![flow chart](/assets/calculator_example_flow.png)
 18 | 
 19 | ## 1. Try It
 20 | 
 21 | Before diving into the implementation details, let's play with it[^1].
 22 | 
 23 | ```bash
 24 | $ cargo run --example calculator '1 + 7 * (3 - 4) / 2'
 25 | ```
 26 | 
 27 | [^1]: You first need to clone [this repository](https://github.com/maciejhirsz/logos).
 28 | 
 29 | **Output**:
 30 | 
 31 | ```
 32 | [AST]
 33 | Add(
 34 |     Int(
 35 |         1,
 36 |     ),
 37 |     Div(
 38 |         Mul(
 39 |             Int(
 40 |                 7,
 41 |             ),
 42 |             Sub(
 43 |                 Int(
 44 |                     3,
 45 |                 ),
 46 |                 Int(
 47 |                     4,
 48 |                 ),
 49 |             ),
 50 |         ),
 51 |         Int(
 52 |             2,
 53 |         ),
 54 |     ),
 55 | )
 56 | 
 57 | [result]
 58 | -2
 59 | ```
 60 | 
 61 | ~~~admonish note title="Full Code" collapsible=true
 62 | 
 63 | ```rust,no_run,noplayground
 64 | {{#include ../../../examples/calculator.rs:all}}
 65 | ```
 66 | 
 67 | ~~~
 68 | 
 69 | ## 2. Lexer
 70 | 
 71 | Our calculator supports the following tokens:
 72 | 
 73 | - Integer literals: `0`, `1`, `15`, etc;
 74 | 
 75 | - Unary operator: `-`;
 76 | 
 77 | - Binary operators: `+`, `-`, `*`, `/`;
 78 | 
 79 | - Parenthesized expressions: `(3 + 5) * 2`, `((1 + 2) * 3 + 4) * 2 + 3 / 2`, etc.
 80 | 
 81 | ```rust,no_run,noplayground
 82 | {{#include ../../../examples/calculator.rs:tokens}}
 83 | ```
 84 | 
 85 | ## 3. Parser
 86 | 
 87 | While it is easy enough to manually implement a parser in this case (e.g., [Pratt parsing](https://en.wikipedia.org/wiki/Operator-precedence_parser#Pratt_parsing)), let's just use [`chumsky`](https://github.com/zesterer/chumsky) crate, which is one of the most popular parser generator libraries in Rust.
 88 | 
 89 | ### 3.1 AST Definition
 90 | 
 91 | First, we define the AST.
 92 | 
 93 | ```rust,no_run,noplayground
 94 | {{#include ../../../examples/calculator.rs:ast}}
 95 | ```
 96 | 
 97 | Note that
 98 | 
 99 | - We name the enum not `AST` but `Expr` because an AST is just nested expressions.
100 | 
101 | - There is no `Parenthesized` variant because parentheses only affect the order of operations (i.e., precedence), which is reflected in the AST structure.
102 | 
103 | - `Box` is used as [a recursive enum is not allowed in Rust](https://stackoverflow.com/questions/25296195/why-are-recursive-struct-types-illegal-in-rust).
104 | 
105 | ### 3.2 Parser Implementation
106 | 
107 | Next, we define the parser. The code may look a bit complicated if you are not familiar with parser combinator libraries, but it is actually quite simple. See [Chumsky's official tutorial](https://github.com/zesterer/chumsky/blob/main/tutorial.md) for the details.
108 | 
109 | ```rust,no_run,noplayground
110 | {{#include ../../../examples/calculator.rs:parser}}
111 | ```
112 | 
113 | ## 4. Evaluator
114 | 
115 | Evaluating the AST is straightforward. We just implement it using [depth-first search (DFS)](https://en.wikipedia.org/wiki/Depth-first_search) such that the mathematical operations are processed in the correct order.
116 | 
117 | ```rust,no_run,noplayground
118 | {{#include ../../../examples/calculator.rs:evaluator}}
119 | ```
120 | 
121 | **Example**
122 | 
123 | Evaluating `1 + 3 * 12` will proceed as below.
124 | 
125 | ![how evaluator works](/assets/calculator_example_how_evaluator_works.png)
126 | 
127 | ## 5. `main()` Function
128 | 
129 | Finally, we put everything together in the `main()` function.
130 | 
131 | ```rust,no_run,noplayground
132 | {{#include ../../../examples/calculator.rs:main}}
133 | ```
134 | 
135 | ## 6. Extend the Calculator
136 | 
137 | Now that you've implemented a basic calculator, try extending its functionality with the following tasks:
138 | 
139 | - **Handle zero-division gracefully**: The current evaluator panics when zero-division occurs. Change the return type of the evaluator from `isize` to `Result<isize, String>`, making it possible to return an error message.
140 | 
141 | - **Add support for the modulo operator (`%`)**: Update the lexer, parser, and evaluator to handle expressions like `10 % 3`.
142 | 
143 | - **Add support for built-in functions**: Implement built-in functions such as `abs(x)`, `pow(x, y)` or `rand()`.
144 | 


--------------------------------------------------------------------------------
/book/src/examples/json.md:
--------------------------------------------------------------------------------
 1 | # JSON parser
 2 | 
 3 | JSON is a widely used format for exchanging data between formats, while being human-readable.
 4 | 
 5 | Possible values are defined recursively and can be any of the following:
 6 | 
 7 | ```rust,no_run,noplayground
 8 | {{#include ../../../examples/json.rs:values}}
 9 | ```
10 | 
11 | Object are delimited with braces `{` and `}`, arrays with brackets `[` and `]`, and values with commas `,`. Newlines, tabs or spaces should be ignored by the lexer.
12 | 
13 | Knowing that, we can construct a lexer with `Logos` that will identify all those cases:
14 | 
15 | ```rust,no_run,noplayground
16 | {{#include ../../../examples/json.rs:tokens}}
17 | ```
18 | 
19 | ```admonish note
20 | The hardest part is to define valid regexes for `Number` and `String` variants.
21 | The present solution was inspired by
22 | [this stackoverflow thread](https://stackoverflow.com/questions/32155133/regex-to-match-a-json-string)
23 | and checked against [the JSON specification](https://www.json.org/json-en.html).
24 | ```
25 | 
26 | Once we have our tokens, we must parse them into actual JSON values. We will proceed be creating 3 functions:
27 | 
28 | + `parse_value` for parsing any JSON object, without prior knowledge of its type;
29 | + `parse_array` for parsing an array, assuming we matched `[`;
30 | + and `parse_object` for parsing an object, assuming we matched `{`.
31 | 
32 | Starting with parsing an arbitrary value, we can easily obtain the four scalar types, `Bool`, `Null`, `Number`, and `String`, while we will call the next functions for arrays and objects parsing.
33 | 
34 | ```rust,no_run,noplayground
35 | {{#include ../../../examples/json.rs:value}}
36 | ```
37 | 
38 | To parse an array, we simply loop between tokens, alternating between parsing values and commas, until a closing bracket is found.
39 | 
40 | ```rust,no_run,noplayground
41 | {{#include ../../../examples/json.rs:array}}
42 | ```
43 | 
44 | A similar approach is used for objects, where the only difference is that we expect (key, value) pairs, separated by a colon.
45 | 
46 | ```rust,no_run,noplayground
47 | {{#include ../../../examples/json.rs:object}}
48 | ```
49 | 
50 | Finally, we provide you the full code that you should be able to run with[^1]:
51 | ```bash
52 | cargo run --example json examples/example.json
53 | ```
54 | 
55 | [^1] You first need to clone [this repository](https://github.com/maciejhirsz/logos).
56 | 
57 | ```rust,no_run,noplayground
58 | {{#include ../../../examples/json.rs:all}}
59 | ```
60 | 


--------------------------------------------------------------------------------
/book/src/examples/json_borrowed.md:
--------------------------------------------------------------------------------
 1 | # JSON parser with borrowed values
 2 | 
 3 | The previous parser owned its data by allocating strings. This can require quite
 4 | some memory space, and using borrowed string slices can help use saving space, while
 5 | also maybe increasing performances.
 6 | 
 7 | If you are familiar with Rust's concept of lifetimes,
 8 | using `&str` string slices instead of owned `String`
 9 | is straightforward:
10 | 
11 | ```diff
12 | @ 33c29
13 | - enum Token {
14 | + enum Token<'source> {
15 | @ 62,63c58,59
16 | -     #[regex(r#""([^"\\\x00-\x1F]|\\(["\\bnfrt/]|u[a-fA-F0-9]{4}))*""#, |lex| lex.slice().to_owned())]
17 | -     String(String),
18 | +     #[regex(r#""([^"\\\x00-\x1F]|\\(["\\bnfrt/]|u[a-fA-F0-9]{4}))*""#, |lex| lex.slice())]
19 | +     String(&'source str),
20 | @ 70c66
21 | - enum Value {
22 | + enum Value<'source> {
23 | @ 78c74
24 | -     String(String),
25 | +     String(&'source str),
26 | @ 80c76
27 | -     Array(Vec<Value>),
28 | +     Array(Vec<Value<'source>>),
29 | @ 82c78
30 | -     Object(HashMap<String, Value>),
31 | +     Object(HashMap<&'source str, Value<'source>>),
32 | @ 88c84
33 | - fn parse_value<'source>(lexer: &mut Lexer<'source, Token>) -> Result<Value> {
34 | + fn parse_value<'source>(lexer: &mut Lexer<'source, Token<'source>>) -> Result<Value<'source>> {
35 | @ 113c109
36 | - fn parse_array<'source>(lexer: &mut Lexer<'source, Token>) -> Result<Value> {
37 | + fn parse_array<'source>(lexer: &mut Lexer<'source, Token<'source>>) -> Result<Value<'source>> {
38 | @ 167c163
39 | - fn parse_object<'source>(lexer: &mut Lexer<'source, Token>) -> Result<Value> {
40 | + fn parse_object<'source>(lexer: &mut Lexer<'source, Token<'source>>) -> Result<Value<'source>> {
41 | ```
42 | 
43 | The above code shows the lines you need to change from the previous example
44 | to use borrowed data.
45 | 
46 | Finally, we provide you the full code that you should be able to run with[^1]:
47 | ```bash
48 | cargo run --example json-borrowed examples/example.json
49 | ```
50 | 
51 | [^1] You first need to clone [this repository](https://github.com/maciejhirsz/logos).
52 | 
53 | ```rust,no_run,noplayground
54 | {{#include ../../../examples/json_borrowed.rs:all}}
55 | ```
56 | 


--------------------------------------------------------------------------------
/book/src/extras.md:
--------------------------------------------------------------------------------
 1 | # Using `Extras`
 2 | 
 3 | When deriving the `Logos` traits, you may want to convey some internal state
 4 | between your tokens. That is where `Logos::Extras` comes to the rescue.
 5 | 
 6 | Each `Lexer` has a public field called `extras` that can be accessed and
 7 | mutated to keep track and modify some internal state. By default,
 8 | this field is set to `()`, but its type can by modified using the derive
 9 | attribute `#[logos(extras = <some type>)]` on your `enum` declaration.
10 | 
11 | For example, one may want to know the location, both line and column indices,
12 | of each token. This is especially useful when one needs to report an erroneous
13 | token to the user, in an user-friendly manner.
14 | 
15 | ```rust,no_run,noplayground
16 | {{#include ../../examples/extras.rs:tokens}}
17 | ```
18 | 
19 | The above token definition will hold two tokens: `Newline` and `Word`.
20 | The former is only used to keep track of the line numbering and will be skipped
21 | using `Skip` as a return value from its callback function. The latter will be
22 | a word with `(line, column)` indices.
23 | 
24 | To make it easy, the lexer will contain the following two extras:
25 | 
26 | + `extras.0`: the line number;
27 | + `extras.1`: the char index of the current line.
28 | 
29 | We now have to define the two callback functions:
30 | 
31 | ```rust,no_run,noplayground
32 | {{#include ../../examples/extras.rs:callbacks}}
33 | ```
34 | 
35 | Extras can of course be used for more complicate logic, and there is no limit
36 | to what you can store within the public `extras` field.
37 | 
38 | Finally, we provide you the full code that you should be able to run with[^1]:
39 | ```bash
40 | cargo run --example extras Cargo.toml
41 | ```
42 | 
43 | [^1] You first need to clone [this repository](https://github.com/maciejhirsz/logos).
44 | 
45 | ```rust,no_run,noplayground
46 | {{#include ../../examples/extras.rs:all}}
47 | ```
48 | 


--------------------------------------------------------------------------------
/book/src/getting-help.md:
--------------------------------------------------------------------------------
 1 | # Getting Help
 2 | 
 3 | If you need help using **Logos**, there are three places you can go to depending
 4 | on what you are looking for:
 5 | 
 6 | + [this book](./) for a documented walk through **Logos**' usage, with detailed
 7 |   examples, and more. A **must read** for any newcomer;
 8 | + [the API documentation](https://docs.rs/logos/latest/logos/) to obtain precise
 9 |   information about function signatures and what the Logos crate exposes in
10 |   terms of features;
11 | + and [GitHub issues](https://github.com/maciejhirsz/logos/issues) for anything
12 |   else that is not covered by any of the two above.
13 | 
14 | Regarding [GitHub issues](https://github.com/maciejhirsz/logos/issues),
15 | it's highly recommended to first check if another issue, either open or closed,
16 | already covers the topic you are looking for. If not, then consider creating a
17 | new issue with necessary information about your question, problem or else.
18 | 


--------------------------------------------------------------------------------
/book/src/getting-started.md:
--------------------------------------------------------------------------------
 1 | # Getting Started
 2 | 
 3 | **Logos** can be included in your Rust project using the `cargo add logos` command, or by directly modifying your `Cargo.toml` file:
 4 | 
 5 | ```toml
 6 | [dependencies]
 7 | logos = "0.15.0"
 8 | ```
 9 | 
10 | Then, you can automatically derive the [`Logos`](https://docs.rs/logos/latest/logos/trait.Logos.html) trait on your `enum` using the `Logos` derive macro:
11 | 
12 | ```rust,no_run,no_playground
13 | use logos::Logos;
14 | 
15 | #[derive(Logos, Debug, PartialEq)]
16 | #[logos(skip r"[ \t\n\f]+")] // Ignore this regex pattern between tokens
17 | enum Token {
18 |     // Tokens can be literal strings, of any length.
19 |     #[token("fast")]
20 |     Fast,
21 | 
22 |     #[token(".")]
23 |     Period,
24 | 
25 |     // Or regular expressions.
26 |     #[regex("[a-zA-Z]+")]
27 |     Text,
28 | }
29 | ```
30 | 
31 | Then, you can use `Logos::lexer` method to turn any `&str` into an iterator of tokens[^1]:
32 | 
33 | ```rust,no_run,no_playground
34 | let mut lex = Token::lexer("Create ridiculously fast Lexers.");
35 | 
36 | assert_eq!(lex.next(), Some(Ok(Token::Text)));
37 | assert_eq!(lex.span(), 0..6);
38 | assert_eq!(lex.slice(), "Create");
39 | 
40 | assert_eq!(lex.next(), Some(Ok(Token::Text)));
41 | assert_eq!(lex.span(), 7..19);
42 | assert_eq!(lex.slice(), "ridiculously");
43 | 
44 | assert_eq!(lex.next(), Some(Ok(Token::Fast)));
45 | assert_eq!(lex.span(), 20..24);
46 | assert_eq!(lex.slice(), "fast");
47 | 
48 | assert_eq!(lex.next(), Some(Ok(Token::Text)));
49 | assert_eq!(lex.slice(), "Lexers");
50 | assert_eq!(lex.span(), 25..31);
51 | 
52 | assert_eq!(lex.next(), Some(Ok(Token::Period)));
53 | assert_eq!(lex.span(), 31..32);
54 | assert_eq!(lex.slice(), ".");
55 | 
56 | assert_eq!(lex.next(), None);
57 | ```
58 | 
59 | [^1]: Each item is actually a [`Result<Token, _>`](https://docs.rs/logos/latest/logos/struct.Lexer.html#associatedtype.Item), because the lexer returns an error if some part of the string slice does not match any variant of `Token`.
60 | 
61 | Because [`Lexer`](https://docs.rs/logos/latest/logos/struct.Lexer.html), returned by [`Logos::lexer`](https://docs.rs/logos/latest/logos/trait.Logos.html#method.lexer), implements the `Iterator` trait, you can use a `for .. in` construct:
62 | 
63 | ```rust,no_run,no_playground
64 | for result in Token::lexer("Create ridiculously fast Lexers.") {
65 |     match result {
66 |         Ok(token) => println!("{:#?}", token),
67 |         Err(e) => panic!("some error occurred: {}", e),
68 |     }
69 | }
70 | ```
71 | 


--------------------------------------------------------------------------------
/book/src/intro.md:
--------------------------------------------------------------------------------
 1 | # Logos Handbook
 2 | 
 3 | [![Crates.io version shield](https://img.shields.io/crates/v/logos.svg)](https://crates.io/crates/logos)
 4 | [![Docs](https://docs.rs/logos/badge.svg)](https://docs.rs/logos)
 5 | [![Crates.io license shield](https://img.shields.io/crates/l/logos.svg)](https://crates.io/crates/logos)
 6 | 
 7 | <img src="https://raw.githubusercontent.com/maciejhirsz/logos/master/logos.svg?sanitize=true" alt="Logos logo" width="250" align="right">
 8 | 
 9 | Hi there!
10 | 
11 | **Logos** is a fast and easy to use [lexer](https://en.wikipedia.org/wiki/Lexical_analysis)
12 | generator written in Rust. While Rust has excellent documentation tools (and you can access
13 | the [API docs for Logos at docs.rs](https://docs.rs/logos/)), it's not the easiest thing to
14 | document custom syntax used by procedural macros, of which Logos has a bit. This Handbook
15 | seeks to remedy this!
16 | 
17 | ## In a nut shell
18 | 
19 | There are two main types in **Logos**:
20 | 
21 | + The `Logos` trait, which comes out with it's own derive macro. The derive
22 |   macro uses custom attributes (the things using these brackets: `#[...]`)
23 |   with plain string or [regular expression](https://en.wikipedia.org/wiki/Regular_expression)
24 |   syntax on `enum` variants as _patterns_ for some input.
25 | + The `Lexer<T: Logos>`, which is an iterator that takes some input (`&str`,
26 |   sometimes `&[u8]`) and performs lexical analysis on the input on the go,
27 |   producing variants of the enum `T` matching the defined patterns.
28 | 


--------------------------------------------------------------------------------
/book/src/token-disambiguation.md:
--------------------------------------------------------------------------------
 1 | # Token disambiguation
 2 | 
 3 | When two or more tokens can match a given sequence, **Logos** compute the
 4 | priority of each pattern (`#[token]` or `#[regex]`), and use that priority
 5 | to decide which pattern should match.
 6 | 
 7 | The rule of thumb is:
 8 | 
 9 | + Longer beats shorter.
10 | + Specific beats generic.
11 | 
12 | If any two definitions could match the same input, like `fast` and `[a-zA-Z]+`
13 | in the example above, it's the longer and more specific definition of `Token::Fast`
14 | that will be the result.
15 | 
16 | This is done by comparing numeric priority attached to each definition. Every
17 | consecutive, non-repeating single byte adds 2 to the priority, while every range
18 | or regex class adds 1.
19 | Loops or optional blocks are ignored, while alternations count the shortest alternative:
20 | 
21 | + `[a-zA-Z]+` has a priority of 2 (lowest possible), because at minimum it can
22 |   match a single byte to a class;
23 | + `foobar` has a priority of 12;
24 | + and `(foo|hello)(bar)?` has a priority of 6, `foo` being it's shortest possible match.
25 | 
26 | Generally speaking, equivalent regex patterns have the same priority. E.g.,
27 | `a|b` is equivalent to `[a-b]`, and both have a priority of 2.
28 | 
29 | ```admonish info
30 | When two different patterns have the same priority,
31 | **Logos** will issue an compilation error.
32 | To prevent this from happening, you can manually set the priority of a given
33 | pattern with, e.g., `#[token("foobar", priority = 20)]`.
34 | ```
35 | 


--------------------------------------------------------------------------------
/book/src/unsafe.md:
--------------------------------------------------------------------------------
 1 | # Unsafe Code
 2 | 
 3 | By default, **Logos** uses unsafe code to avoid unnecessary bounds checks while
 4 | accessing slices of the input `Source`.
 5 | 
 6 | This unsafe code also exists in the code generated by the `Logos` derive macro,
 7 | which generates a deterministic finite automata (DFA). Reasoning about the correctness
 8 | of this generated code can be difficult - if the derivation of the DFA in `Logos`
 9 | is correct, then this generated code will be correct and any mistakes in implementation
10 | would be caught given sufficient fuzz testing.
11 | 
12 | Use of unsafe code is the default as this typically provides the fastest parser.
13 | 
14 | ## Disabling Unsafe Code
15 | 
16 | However, for applications accepting untrusted input in a trusted context, this
17 | may not be a sufficient correctness justification.
18 | 
19 | For those applications which cannot tolerate unsafe code, the feature `forbid-unsafe`
20 | may be enabled. This replaces unchecked accesses in the `Logos` crate with safe,
21 | checked alternatives which will panic on out-of-bounds access rather than cause
22 | undefined behavior. Additionally, code generated by the macro will not use the
23 | unsafe keyword, so generated code may be used in a crates using the 
24 | `#![forbid(unsafe_code)]` attribute.
25 | 
26 | When the `forbid-unsafe` feature is added to a direct dependency on the `Logos` crate,
27 | [Feature Unification](https://doc.rust-lang.org/cargo/reference/features.html#feature-unification)
28 | ensures any transitive inclusion of `Logos` via other dependencies also have unsafe
29 | code disabled.
30 | 
31 | Generally, disabling unsafe code will result in a slower parser.
32 | 
33 | However making definitive statements around performance of safe-only code is difficult,
34 | as there are too many variables to consider between compiler optimizations,
35 | the specific grammar being parsed, and the target processor. The automated benchmarks
36 | of this crate show around a 10% slowdown in safe-only code at the time of this writing.
37 | 


--------------------------------------------------------------------------------
/examples/brainfuck.rs:
--------------------------------------------------------------------------------
  1 | //! Brainfuck interpreter written in Rust, using Logos.
  2 | //!
  3 | //! Usage:
  4 | //!     cargo run --example brainfuck <path/to/file>
  5 | //!
  6 | //! Example:
  7 | //!     cargo run --example brainfuck examples/hello_word.bf
  8 | //!
  9 | //! Brainfuck is an esoteric programming language that only
 10 | //! uses 8 single-character commands:
 11 | //! - '>';
 12 | //! - '<';
 13 | //! - '+';
 14 | //! - '-';
 15 | //! - '.';
 16 | //! - ',';
 17 | //! - '[';
 18 | //! - and ']'.
 19 | //!
 20 | //! Despite being very hard to use in practice, this makes
 21 | //! this language very simple to interpret. The following code
 22 | //! defines an [`execute`] function that runs Brainfuck code.
 23 | //!
 24 | //! Logos is used here to directly transform the code stream
 25 | //! into meaningful `Op` operations (or commands).
 26 | //! Errors, i.e., unknown tokens, are discarded using `filter_map`.
 27 | //!
 28 | //! More details can be found on Wikipedia:
 29 | //! <https://en.wikipedia.org/wiki/Brainfuck>.
 30 | //!
 31 | //! or on <http://brainfuck.org/>.
 32 | 
 33 | /* ANCHOR: all */
 34 | use logos::Logos;
 35 | use std::collections::HashMap;
 36 | use std::env;
 37 | use std::fs;
 38 | use std::io::{self, Read};
 39 | 
 40 | /* ANCHOR: tokens */
 41 | /// Each [`Op`] variant is a single character.
 42 | #[derive(Debug, Logos)]
 43 | enum Op {
 44 |     /// Increment pointer.
 45 |     #[token(">")]
 46 |     IncPointer,
 47 |     /// Decrement pointer.
 48 |     #[token("<")]
 49 |     DecPointer,
 50 |     /// Increment data at pointer.
 51 |     #[token("+")]
 52 |     IncData,
 53 |     /// Decrement data at pointer.
 54 |     #[token("-")]
 55 |     DecData,
 56 |     /// Output data at pointer.
 57 |     #[token(".")]
 58 |     OutData,
 59 |     /// Input (read) to data at pointer.
 60 |     #[token(",")]
 61 |     InpData,
 62 |     /// Conditionally jump to matching `']'`.
 63 |     #[token("[")]
 64 |     CondJumpForward,
 65 |     /// Conditionally jump to matching `'['`.
 66 |     #[token("]")]
 67 |     CondJumpBackward,
 68 | }
 69 | /* ANCHOR_END: tokens */
 70 | 
 71 | /// Print one byte to the terminal.
 72 | #[inline(always)]
 73 | fn print_byte(byte: u8) {
 74 |     print!("{}", byte as char);
 75 | }
 76 | 
 77 | /// Read one byte from the terminal.
 78 | #[inline(always)]
 79 | fn read_byte() -> u8 {
 80 |     let mut input = [0u8; 1];
 81 |     io::stdin()
 82 |         .read_exact(&mut input)
 83 |         .expect("An error occurred while reading byte!");
 84 |     input[0]
 85 | }
 86 | 
 87 | /// Execute Brainfuck code from a string slice.
 88 | pub fn execute(code: &str) {
 89 |     let operations: Vec<_> = Op::lexer(code).filter_map(|op| op.ok()).collect();
 90 |     let mut data = [0u8; 30_000]; // Minimum recommended size
 91 |     let mut pointer: usize = 0;
 92 |     let len = operations.len();
 93 | 
 94 |     // We pre-process matching jump commands, and we create
 95 |     // a mapping between them.
 96 |     let mut queue = Vec::new();
 97 |     let mut pairs = HashMap::new();
 98 |     let mut pairs_reverse = HashMap::new();
 99 | 
100 |     for (i, op) in operations.iter().enumerate() {
101 |         match op {
102 |             Op::CondJumpForward => queue.push(i),
103 |             Op::CondJumpBackward => {
104 |                 if let Some(start) = queue.pop() {
105 |                     pairs.insert(start, i);
106 |                     pairs_reverse.insert(i, start);
107 |                 } else {
108 |                     panic!(
109 |                         "Unexpected conditional backward jump at position {}, does not match any '['",
110 |                         i
111 |                     );
112 |                 }
113 |             }
114 |             _ => (),
115 |         }
116 |     }
117 | 
118 |     if !queue.is_empty() {
119 |         panic!("Unmatched conditional forward jump at positions {:?}, expecting a closing ']' for each of them", queue);
120 |     }
121 | 
122 |     /* ANCHOR: fsm */
123 |     let mut i: usize = 0;
124 |     // True program execution.
125 |     loop {
126 |         match operations[i] {
127 |             Op::IncPointer => pointer += 1,
128 |             Op::DecPointer => pointer -= 1,
129 |             Op::IncData => data[pointer] = data[pointer].wrapping_add(1),
130 |             Op::DecData => data[pointer] = data[pointer].wrapping_sub(1),
131 |             Op::OutData => print_byte(data[pointer]),
132 |             Op::InpData => data[pointer] = read_byte(),
133 |             Op::CondJumpForward => {
134 |                 if data[pointer] == 0 {
135 |                     // Skip until matching end.
136 |                     i = *pairs.get(&i).unwrap();
137 |                 }
138 |             }
139 |             Op::CondJumpBackward => {
140 |                 if data[pointer] != 0 {
141 |                     // Go back to matching start.
142 |                     i = *pairs_reverse.get(&i).unwrap();
143 |                 }
144 |             }
145 |         }
146 |         i += 1;
147 | 
148 |         if i >= len {
149 |             break;
150 |         }
151 |     }
152 |     /* ANCHOR_END: fsm */
153 | }
154 | 
155 | fn main() {
156 |     let src = fs::read_to_string(env::args().nth(1).expect("Expected file argument"))
157 |         .expect("Failed to read file");
158 | 
159 |     execute(src.as_str());
160 | }
161 | /* ANCHOR_END: all */
162 | 


--------------------------------------------------------------------------------
/examples/calculator.rs:
--------------------------------------------------------------------------------
  1 | //! Simple calculator.
  2 | //!
  3 | //! Usage:
  4 | //!     cargo run --example calculator <arithmetic expression>
  5 | //!
  6 | //! Example:
  7 | //!     cargo run --example calculator '1 + 7 * (3 - 4) / 2'
  8 | //!
  9 | //! Following constructs are supported:
 10 | //! - integer literals: `0`, `1`, `15`, etc.
 11 | //! - unary operator: `-`
 12 | //! - binary operators: `+`, `-`, `*`, `/`
 13 | //! - parentheses: `(`, `)`
 14 | 
 15 | /* ANCHOR: all */
 16 | use std::env;
 17 | 
 18 | use chumsky::prelude::*;
 19 | use logos::Logos;
 20 | 
 21 | /* ANCHOR: tokens */
 22 | #[derive(Logos, Debug, PartialEq, Eq, Hash, Clone)]
 23 | #[logos(skip r"[ \t\n]+")]
 24 | #[logos(error = String)]
 25 | enum Token {
 26 |     #[token("+")]
 27 |     Plus,
 28 | 
 29 |     #[token("-")]
 30 |     Minus,
 31 | 
 32 |     #[token("*")]
 33 |     Multiply,
 34 | 
 35 |     #[token("/")]
 36 |     Divide,
 37 | 
 38 |     #[token("(")]
 39 |     LParen,
 40 | 
 41 |     #[token(")")]
 42 |     RParen,
 43 | 
 44 |     #[regex("[0-9]+", |lex| lex.slice().parse::<isize>().unwrap())]
 45 |     Integer(isize),
 46 | }
 47 | /* ANCHOR_END: tokens */
 48 | 
 49 | /* ANCHOR: ast */
 50 | #[derive(Debug)]
 51 | enum Expr {
 52 |     // Integer literal.
 53 |     Int(isize),
 54 | 
 55 |     // Unary minus.
 56 |     Neg(Box<Expr>),
 57 | 
 58 |     // Binary operators.
 59 |     Add(Box<Expr>, Box<Expr>),
 60 |     Sub(Box<Expr>, Box<Expr>),
 61 |     Mul(Box<Expr>, Box<Expr>),
 62 |     Div(Box<Expr>, Box<Expr>),
 63 | }
 64 | /* ANCHOR_END: ast */
 65 | 
 66 | /* ANCHOR: evaluator */
 67 | impl Expr {
 68 |     fn eval(&self) -> isize {
 69 |         match self {
 70 |             Expr::Int(n) => *n,
 71 |             Expr::Neg(rhs) => -rhs.eval(),
 72 |             Expr::Add(lhs, rhs) => lhs.eval() + rhs.eval(),
 73 |             Expr::Sub(lhs, rhs) => lhs.eval() - rhs.eval(),
 74 |             Expr::Mul(lhs, rhs) => lhs.eval() * rhs.eval(),
 75 |             Expr::Div(lhs, rhs) => lhs.eval() / rhs.eval(),
 76 |         }
 77 |     }
 78 | }
 79 | /* ANCHOR_END: evaluator */
 80 | 
 81 | #[allow(clippy::let_and_return)]
 82 | /* ANCHOR: parser */
 83 | fn parser() -> impl Parser<Token, Expr, Error = Simple<Token>> {
 84 |     recursive(|p| {
 85 |         let atom = {
 86 |             let parenthesized = p
 87 |                 .clone()
 88 |                 .delimited_by(just(Token::LParen), just(Token::RParen));
 89 | 
 90 |             let integer = select! {
 91 |                 Token::Integer(n) => Expr::Int(n),
 92 |             };
 93 | 
 94 |             parenthesized.or(integer)
 95 |         };
 96 | 
 97 |         let unary = just(Token::Minus)
 98 |             .repeated()
 99 |             .then(atom)
100 |             .foldr(|_op, rhs| Expr::Neg(Box::new(rhs)));
101 | 
102 |         let binary_1 = unary
103 |             .clone()
104 |             .then(
105 |                 just(Token::Multiply)
106 |                     .or(just(Token::Divide))
107 |                     .then(unary)
108 |                     .repeated(),
109 |             )
110 |             .foldl(|lhs, (op, rhs)| match op {
111 |                 Token::Multiply => Expr::Mul(Box::new(lhs), Box::new(rhs)),
112 |                 Token::Divide => Expr::Div(Box::new(lhs), Box::new(rhs)),
113 |                 _ => unreachable!(),
114 |             });
115 | 
116 |         let binary_2 = binary_1
117 |             .clone()
118 |             .then(
119 |                 just(Token::Plus)
120 |                     .or(just(Token::Minus))
121 |                     .then(binary_1)
122 |                     .repeated(),
123 |             )
124 |             .foldl(|lhs, (op, rhs)| match op {
125 |                 Token::Plus => Expr::Add(Box::new(lhs), Box::new(rhs)),
126 |                 Token::Minus => Expr::Sub(Box::new(lhs), Box::new(rhs)),
127 |                 _ => unreachable!(),
128 |             });
129 | 
130 |         binary_2
131 |     })
132 |     .then_ignore(end())
133 | }
134 | /* ANCHOR_END: parser */
135 | 
136 | /* ANCHOR: main */
137 | fn main() {
138 |     //reads the input expression from the command line
139 |     let input = env::args()
140 |         .nth(1)
141 |         .expect("Expected expression argument (e.g. `1 + 7 * (3 - 4) / 5`)");
142 | 
143 |     //creates a lexer instance from the input
144 |     let lexer = Token::lexer(&input);
145 | 
146 |     //splits the input into tokens, using the lexer
147 |     let mut tokens = vec![];
148 |     for (token, span) in lexer.spanned() {
149 |         match token {
150 |             Ok(token) => tokens.push(token),
151 |             Err(e) => {
152 |                 println!("lexer error at {:?}: {}", span, e);
153 |                 return;
154 |             }
155 |         }
156 |     }
157 | 
158 |     //parses the tokens to construct an AST
159 |     let ast = match parser().parse(tokens) {
160 |         Ok(expr) => {
161 |             println!("[AST]\n{:#?}", expr);
162 |             expr
163 |         }
164 |         Err(e) => {
165 |             println!("parse error: {:#?}", e);
166 |             return;
167 |         }
168 |     };
169 | 
170 |     //evaluates the AST to get the result
171 |     println!("\n[result]\n{}", ast.eval());
172 | }
173 | /* ANCHOR_END: main */
174 | /* ANCHOR_END: all */
175 | 


--------------------------------------------------------------------------------
/examples/custom_error.rs:
--------------------------------------------------------------------------------
 1 | //! ASCII tokens lexer with custom error type.
 2 | //!
 3 | //! Takes tabs-or-spaces separated words or u8 numbers,
 4 | //! only accepting ascii letters.
 5 | //!
 6 | //! Usage:
 7 | //!     cargo run --example custom_error
 8 | 
 9 | /* ANCHOR: all */
10 | use logos::Logos;
11 | 
12 | use std::num::ParseIntError;
13 | 
14 | #[derive(Default, Debug, Clone, PartialEq)]
15 | enum LexingError {
16 |     InvalidInteger(String),
17 |     #[default]
18 |     NonAsciiCharacter,
19 | }
20 | 
21 | /// Error type returned by calling `lex.slice().parse()` to u8.
22 | impl From<ParseIntError> for LexingError {
23 |     fn from(err: ParseIntError) -> Self {
24 |         use std::num::IntErrorKind::*;
25 |         match err.kind() {
26 |             PosOverflow | NegOverflow => LexingError::InvalidInteger("overflow error".to_owned()),
27 |             _ => LexingError::InvalidInteger("other error".to_owned()),
28 |         }
29 |     }
30 | }
31 | 
32 | #[derive(Debug, Logos, PartialEq)]
33 | #[logos(error = LexingError)]
34 | #[logos(skip r"[ \t]+")]
35 | enum Token {
36 |     #[regex(r"[a-zA-Z]+")]
37 |     Word,
38 |     #[regex(r"[0-9]+", |lex| lex.slice().parse())]
39 |     Integer(u8),
40 | }
41 | 
42 | fn main() {
43 |     // 256 overflows u8, since u8's max value is 255.
44 |     // 'é' is not a valid ascii letter.
45 |     let mut lex = Token::lexer("Hello 256 Jérome");
46 | 
47 |     assert_eq!(lex.next(), Some(Ok(Token::Word)));
48 |     assert_eq!(lex.slice(), "Hello");
49 | 
50 |     assert_eq!(
51 |         lex.next(),
52 |         Some(Err(LexingError::InvalidInteger(
53 |             "overflow error".to_owned()
54 |         )))
55 |     );
56 |     assert_eq!(lex.slice(), "256");
57 | 
58 |     assert_eq!(lex.next(), Some(Ok(Token::Word)));
59 |     assert_eq!(lex.slice(), "J");
60 | 
61 |     assert_eq!(lex.next(), Some(Err(LexingError::NonAsciiCharacter)));
62 |     assert_eq!(lex.slice(), "é");
63 | 
64 |     assert_eq!(lex.next(), Some(Ok(Token::Word)));
65 |     assert_eq!(lex.slice(), "rome");
66 | 
67 |     assert_eq!(lex.next(), None);
68 | }
69 | /* ANCHOR_END: all */
70 | 


--------------------------------------------------------------------------------
/examples/example.json:
--------------------------------------------------------------------------------
 1 | [
 2 | 
 3 |   {
 4 | "selftext" : "Hey, folks!\n\nWhile /r/Funny has always had a strong preference for original content – it's right there in Rule 3, after all – we've never required users in good standing to post only things that they personally created. However, we *have* frequently taken steps to cut down on low-effort, low-quality submissions (like memes, screenshots of social media, and so on)... and although we're a little bit late to the game with this, we're going to take another such step:\n\n**Henceforth, AI-generated content of any kind may not be posted in /r/Funny.**\n\nWe know, we know. \"Welcome to 2022,\" right? We're well aware that the novelty of things like Midjourney, ChatGPT, Bing, Rutabaga, Bard, DALL-E, StorFisa, DeepAI, and other such programs is quickly wearing off, and we've seen the growing disillusionment, disapproval, and general annoyance that folks have been voicing... but in our defense, we made up two of those services, so you can't *really* be upset about people using them.\n\nAnyway, this change was prompted by a few different factors (in addition to addressing users' concerns), but one of the most prominent is the fact that AI-generated content requires almost no involvement on the part of a given submitter: While a glorified algorithm may spit out some images, the *user's* only contribution – assuming that they didn't design, code, and train said algorithm, of course – is a short prompt. That requires even less effort than \"making\" memes or taking screenshots of social media does, so if the goal is to encourage high-quality, original content... well, you see the obvious conclusion.\n\nThe TL;DR is that we want to keep /r/Funny as pleasant as possible for contributors, participants, and lurkers alike, so until such time as *real* AIs start registering Reddit accounts (which our counterparts from the future¹ say will happen on September 12th, 2097), AI-generated content will not be allowed.\n\n------\n\n^¹ ^(Yes, we have a time-machine, and no, it isn't just a Magic 8-Ball that we duct-taped to a frog.)",
 5 | 
 6 |     "WHO": "Joe",
 7 |     "WEEK": [
 8 |       {
 9 |         "NUMBER": 3,
10 |         "EXPENSE": [
11 |           {
12 |             "WHAT": "Beer",
13 |             "AMOUNT": 18.00
14 |           },
15 |           {
16 |             "WHAT": "Food",
17 |             "AMOUNT": 12.00
18 |           },
19 |           {
20 |             "WHAT": "Food",
21 |             "AMOUNT": 19.00
22 |           },
23 |           {
24 |             "WHAT": "Car",
25 |             "AMOUNT": 20.00
26 |           }
27 |         ]
28 |       }
29 |     ]
30 |   },
31 | {
32 |     "updated_at": "2015-01-01T15:00:06Z",
33 |     "glossary": {
34 |         "title": "example glossary",
35 | 		"GlossDiv": {
36 |             "title": "S",
37 | 			"GlossList": {
38 |                 "GlossEntry": {
39 |                     "ID": "SGML",
40 | 					"SortAs": "SGML",
41 | 					"GlossTerm": "Standard Generalized Markup Language",
42 | 					"Acronym": "SGML",
43 | 					"Abbrev": "ISO 8879:1986",
44 | 					"GlossDef": {
45 |                         "para": "A meta-markup language, used to create markup languages such as DocBook.",
46 | 						"GlossSeeAlso": ["GML", "XML"]
47 |                     },
48 | 					"GlossSee": "markup"
49 |                 }
50 |             }
51 |         }
52 |     }
53 | }
54 | ]
55 | 


--------------------------------------------------------------------------------
/examples/extras.rs:
--------------------------------------------------------------------------------
 1 | //! Print line and column positions for each word in a file.
 2 | //!
 3 | //! Usage:
 4 | //!     cargo run --example extras <path/to/file>
 5 | //!
 6 | //! Example:
 7 | //!     cargo run --example extras Cargo.toml
 8 | //!
 9 | //! This is a small example on how to use
10 | //! [`Extras`](https://docs.rs/logos/latest/logos/trait.Logos.html#associatedtype.Extras)
11 | //! to convey some (mutable) internal state from token to token.
12 | //!
13 | //! Here, the extras will be a tuple with the following fields:
14 | //!
15 | //! + 0. the line number;
16 | //! + 1. the char index of the current line.
17 | //!
18 | //! From then, one can easily compute the column number of some token by computing:
19 | //!
20 | //! ```rust,no_run,no_playground
21 | //! fn get_column(lex: &Lexer<Token>) -> usize {
22 | //!     lex.span().start - lex.extras.1
23 | //! }
24 | //! ```
25 | 
26 | /* ANCHOR: all */
27 | use logos::{Lexer, Logos, Skip};
28 | use std::env;
29 | use std::fs;
30 | 
31 | /* ANCHOR: callbacks */
32 | /// Update the line count and the char index.
33 | fn newline_callback(lex: &mut Lexer<Token>) -> Skip {
34 |     lex.extras.0 += 1;
35 |     lex.extras.1 = lex.span().end;
36 |     Skip
37 | }
38 | 
39 | /// Compute the line and column position for the current word.
40 | fn word_callback(lex: &mut Lexer<Token>) -> (usize, usize) {
41 |     let line = lex.extras.0;
42 |     let column = lex.span().start - lex.extras.1;
43 | 
44 |     (line, column)
45 | }
46 | /* ANCHOR_END: callbacks */
47 | 
48 | /* ANCHOR: tokens */
49 | /// Simple tokens to retrieve words and their location.
50 | #[derive(Debug, Logos)]
51 | #[logos(extras = (usize, usize))]
52 | enum Token {
53 |     #[regex(r"\n", newline_callback)]
54 |     Newline,
55 | 
56 |     #[regex(r"\w+", word_callback)]
57 |     Word((usize, usize)),
58 | }
59 | /* ANCHOR_END: tokens */
60 | 
61 | fn main() {
62 |     let src = fs::read_to_string(env::args().nth(1).expect("Expected file argument"))
63 |         .expect("Failed to read file");
64 | 
65 |     let mut lex = Token::lexer(src.as_str());
66 | 
67 |     while let Some(token) = lex.next() {
68 |         if let Ok(Token::Word((line, column))) = token {
69 |             println!("Word '{}' found at ({}, {})", lex.slice(), line, column);
70 |         }
71 |     }
72 | }
73 | /* ANCHOR_END: all */
74 | 


--------------------------------------------------------------------------------
/examples/hello_world.bf:
--------------------------------------------------------------------------------
 1 | [ This program prints "Hello World!" and a newline to the screen, its
 2 |   length is 106 active command characters. [It is not the shortest.]
 3 | 
 4 |   This loop is an "initial comment loop", a simple way of adding a comment
 5 |   to a BF program such that you don't have to worry about any command
 6 |   characters. Any ".", ",", "+", "-", "<" and ">" characters are simply
 7 |   ignored, the "[" and "]" characters just have to be balanced. This
 8 |   loop and the commands it contains are ignored because the current cell
 9 |   defaults to a value of 0; the 0 value causes this loop to be skipped.
10 | ]
11 | ++++++++               Set Cell #0 to 8
12 | [
13 |     >++++               Add 4 to Cell #1; this will always set Cell #1 to 4
14 |     [                   as the cell will be cleared by the loop
15 |         >++             Add 2 to Cell #2
16 |         >+++            Add 3 to Cell #3
17 |         >+++            Add 3 to Cell #4
18 |         >+              Add 1 to Cell #5
19 |         <<<<-           Decrement the loop counter in Cell #1
20 |     ]                   Loop until Cell #1 is zero; number of iterations is 4
21 |     >+                  Add 1 to Cell #2
22 |     >+                  Add 1 to Cell #3
23 |     >-                  Subtract 1 from Cell #4
24 |     >>+                 Add 1 to Cell #6
25 |     [<]                 Move back to the first zero cell you find; this will
26 |                         be Cell #1 which was cleared by the previous loop
27 |     <-                  Decrement the loop Counter in Cell #0
28 | ]                       Loop until Cell #0 is zero; number of iterations is 8
29 | 
30 | The result of this is:
31 | Cell no :   0   1   2   3   4   5   6
32 | Contents:   0   0  72 104  88  32   8
33 | Pointer :   ^
34 | 
35 | >>.                     Cell #2 has value 72 which is 'H'
36 | >---.                   Subtract 3 from Cell #3 to get 101 which is 'e'
37 | +++++++..+++.           Likewise for 'llo' from Cell #3
38 | >>.                     Cell #5 is 32 for the space
39 | <-.                     Subtract 1 from Cell #4 for 87 to give a 'W'
40 | <.                      Cell #3 was set to 'o' from the end of 'Hello'
41 | +++.------.--------.    Cell #3 for 'rl' and 'd'
42 | >>+.                    Add 1 to Cell #5 gives us an exclamation point
43 | >++.                    And finally a newline from Cell #6
44 | 


--------------------------------------------------------------------------------
/examples/string-interpolation.rs:
--------------------------------------------------------------------------------
  1 | /* ANCHOR: all */
  2 | use std::collections::HashMap;
  3 | 
  4 | use logos::{Lexer, Logos};
  5 | 
  6 | /* ANCHOR: lexers */
  7 | type SymbolTable = HashMap<String, String>;
  8 | 
  9 | #[derive(Logos, Debug, PartialEq, Clone)]
 10 | #[logos(skip r"\s+")]
 11 | #[logos(extras = SymbolTable)]
 12 | enum VariableDefinitionContext {
 13 |     #[regex(r"[[:alpha:]][[:alnum:]]*", variable_definition)]
 14 |     Id((String /* variable name */, String /* value */)),
 15 |     #[token("=")]
 16 |     Equals,
 17 |     #[token("'")]
 18 |     Quote,
 19 | }
 20 | 
 21 | #[derive(Logos, Debug, PartialEq, Clone)]
 22 | #[logos(extras = SymbolTable)]
 23 | enum StringContext {
 24 |     #[token("'")]
 25 |     Quote,
 26 |     #[regex("[^'$]+")]
 27 |     Content,
 28 |     #[token("${", evaluate_interpolation)]
 29 |     InterpolationStart(String /* evaluated value of the interpolation */),
 30 |     #[token("$")]
 31 |     DollarSign,
 32 | }
 33 | 
 34 | #[derive(Logos, Debug, PartialEq, Clone)]
 35 | #[logos(skip r"\s+")]
 36 | #[logos(extras = SymbolTable)]
 37 | enum StringInterpolationContext {
 38 |     #[regex(r"[[:alpha:]][[:alnum:]]*", get_variable_value)]
 39 |     Id(String /* value for the given id */),
 40 |     #[token("'")]
 41 |     Quote,
 42 |     #[token("}")]
 43 |     InterpolationEnd,
 44 | }
 45 | /* ANCHOR_END: lexers */
 46 | 
 47 | /* ANCHOR: variable_definition */
 48 | fn get_string_content(lex: &mut Lexer<StringContext>) -> String {
 49 |     let mut s = String::new();
 50 |     while let Some(Ok(token)) = lex.next() {
 51 |         match token {
 52 |             StringContext::Content => s.push_str(lex.slice()),
 53 |             StringContext::DollarSign => s.push_str("$"),
 54 |             StringContext::InterpolationStart(value) => s.push_str(&value),
 55 |             StringContext::Quote => break,
 56 |         }
 57 |     }
 58 |     s
 59 | }
 60 | 
 61 | fn variable_definition(lex: &mut Lexer<VariableDefinitionContext>) -> Option<(String, String)> {
 62 |     let id = lex.slice().to_string();
 63 |     if let Some(Ok(VariableDefinitionContext::Equals)) = lex.next() {
 64 |         if let Some(Ok(VariableDefinitionContext::Quote)) = lex.next() {
 65 |             let mut lex2 = lex.clone().morph::<StringContext>();
 66 |             let value = get_string_content(&mut lex2);
 67 |             *lex = lex2.morph();
 68 |             lex.extras.insert(id.clone(), value.clone());
 69 |             return Some((id, value));
 70 |         }
 71 |     }
 72 |     None
 73 | }
 74 | /* ANCHOR_END: variable_definition */
 75 | 
 76 | /* ANCHOR: evaluate_interpolation */
 77 | fn evaluate_interpolation(lex: &mut Lexer<StringContext>) -> Option<String> {
 78 |     let mut lex2 = lex.clone().morph::<StringInterpolationContext>();
 79 |     let mut interpolation = String::new();
 80 |     while let Some(result) = lex2.next() {
 81 |         match result {
 82 |             Ok(token) => match token {
 83 |                 StringInterpolationContext::Id(value) => interpolation.push_str(&value),
 84 |                 StringInterpolationContext::Quote => {
 85 |                     *lex = lex2.morph();
 86 |                     interpolation.push_str(&get_string_content(lex));
 87 |                     lex2 = lex.clone().morph();
 88 |                 }
 89 |                 StringInterpolationContext::InterpolationEnd => break,
 90 |             },
 91 |             Err(()) => panic!("Interpolation error"),
 92 |         }
 93 |     }
 94 |     *lex = lex2.morph();
 95 |     Some(interpolation)
 96 | }
 97 | /* ANCHOR_END: evaluate_interpolation */
 98 | 
 99 | /* ANCHOR: get_variable_value */
100 | fn get_variable_value(lex: &mut Lexer<StringInterpolationContext>) -> Option<String> {
101 |     if let Some(value) = lex.extras.get(lex.slice()) {
102 |         return Some(value.clone());
103 |     }
104 |     None
105 | }
106 | /* ANCHOR_END: get_variable_value */
107 | 
108 | /* ANCHOR: main */
109 | fn test_variable_definition(
110 |     expeected_id: &str,
111 |     expeected_value: &str,
112 |     token: Option<Result<VariableDefinitionContext, ()>>,
113 | ) {
114 |     if let Some(Ok(VariableDefinitionContext::Id((id, value)))) = token {
115 |         assert_eq!(id, expeected_id);
116 |         assert_eq!(value, expeected_value);
117 |     } else {
118 |         panic!("Expected key: {} not found", expeected_id);
119 |     }
120 | }
121 | 
122 | fn main() {
123 |     let mut lex = VariableDefinitionContext::lexer(
124 |         "\
125 |         name = 'Mark'\n\
126 |         greeting = 'Hi ${name}!'\n\
127 |         surname = 'Scott'\n\
128 |         greeting2 = 'Hi ${name ' ' surname}!'\n\
129 |         greeting3 = 'Hi ${name ' ${surname}!'}!'\n\
130 |         ",
131 |     );
132 |     test_variable_definition("name", "Mark", lex.next());
133 |     test_variable_definition("greeting", "Hi Mark!", lex.next());
134 |     test_variable_definition("surname", "Scott", lex.next());
135 |     test_variable_definition("greeting2", "Hi Mark Scott!", lex.next());
136 |     test_variable_definition("greeting3", "Hi Mark Scott!!", lex.next());
137 | }
138 | /* ANCHOR_END: main */
139 | /* ANCHOR_END: all */
140 | 


--------------------------------------------------------------------------------
/fuzz/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | edition.workspace = true
 3 | name = "logos-fuzz"
 4 | publish = false
 5 | rust-version.workspace = true
 6 | 
 7 | [dependencies]
 8 | afl = "0.15"
 9 | arbitrary = "1.3"
10 | logos-codegen = { path = "../logos-codegen", features = ["fuzzing"] }
11 | 
12 | [package.metadata.release]
13 | shared-version = true
14 | 


--------------------------------------------------------------------------------
/fuzz/in/literal:
--------------------------------------------------------------------------------
1 | literal


--------------------------------------------------------------------------------
/fuzz/in/regex:
--------------------------------------------------------------------------------
1 | a+b[cd-h]+?


--------------------------------------------------------------------------------
/fuzz/src/main.rs:
--------------------------------------------------------------------------------
 1 | use afl::fuzz;
 2 | use logos_codegen::{
 3 |     graph::{Graph, Node},
 4 |     mir::Mir,
 5 | };
 6 | 
 7 | fn main() {
 8 |     fuzz!(|regex: String| {
 9 |         let mut graph = Graph::new();
10 | 
11 |         if let Ok(mir) = Mir::utf8(&regex) {
12 |             let leaf = graph.push(Node::Leaf("LEAF"));
13 |             let _ = graph.regex(mir, leaf);
14 |         }
15 |     });
16 | }
17 | 


--------------------------------------------------------------------------------
/logos-cli/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [dependencies]
 2 | anyhow = "1.0.57"
 3 | clap = {version = "3.1.18", features = ["derive"]}
 4 | fs-err = "2.7.0"
 5 | logos-codegen = {version = "0.15.0", path = "../logos-codegen"}
 6 | proc-macro2 = "1.0.39"
 7 | 
 8 | [dev-dependencies]
 9 | assert_cmd = "2.0.4"
10 | assert_fs = "1.0.7"
11 | predicates = "2.1.1"
12 | 
13 | [features]
14 | # Enables debug messages
15 | debug = ["logos-codegen/debug"]
16 | 
17 | [package]
18 | name = "logos-cli"
19 | authors.workspace = true
20 | categories.workspace = true
21 | description.workspace = true
22 | edition.workspace = true
23 | homepage.workspace = true
24 | keywords.workspace = true
25 | license.workspace = true
26 | readme.workspace = true
27 | repository.workspace = true
28 | rust-version.workspace = true
29 | version.workspace = true
30 | 
31 | [package.metadata.release]
32 | shared-version = true
33 | 


--------------------------------------------------------------------------------
/logos-cli/LICENSE-APACHE:
--------------------------------------------------------------------------------
1 | ../LICENSE-APACHE


--------------------------------------------------------------------------------
/logos-cli/LICENSE-MIT:
--------------------------------------------------------------------------------
1 | ../LICENSE-MIT


--------------------------------------------------------------------------------
/logos-cli/src/main.rs:
--------------------------------------------------------------------------------
 1 | use std::{
 2 |     fmt::Write,
 3 |     io,
 4 |     path::PathBuf,
 5 |     process::{Command, Stdio},
 6 | };
 7 | 
 8 | use anyhow::{Context, Result};
 9 | use clap::Parser;
10 | use fs_err as fs;
11 | use proc_macro2::{LexError, TokenStream};
12 | 
13 | /// Logos as a CLI!
14 | #[derive(Parser)]
15 | #[clap(author, version, about, long_about = None)]
16 | pub struct Args {
17 |     /// Input file to process
18 |     #[clap(parse(from_os_str))]
19 |     input: PathBuf,
20 |     /// Path to write output. By default output is printed to stdout.
21 |     #[clap(long, short, parse(from_os_str))]
22 |     output: Option<PathBuf>,
23 |     /// Checks whether the output file is up-to-date instead of writing to it. Requires --output to be specified.
24 |     #[clap(long, requires = "output")]
25 |     check: bool,
26 |     /// Invokes `rustfmt` on the generated code. `rustfmt` must be in $PATH.
27 |     #[clap(long)]
28 |     format: bool,
29 | }
30 | 
31 | pub fn main() -> Result<()> {
32 |     let args = Args::parse();
33 | 
34 |     let input = fs::read_to_string(args.input)?;
35 |     let mut output = codegen(input).context("failed to run rustfmt")?;
36 | 
37 |     if args.format {
38 |         output = rustfmt(output)?;
39 |     }
40 | 
41 |     if let Some(output_path) = args.output {
42 |         let changed = match fs::read_to_string(&output_path) {
43 |             Ok(existing_output) => !eq_ignore_newlines(&existing_output, &output),
44 |             Err(err) if err.kind() == io::ErrorKind::NotFound => true,
45 |             Err(err) => return Err(err.into()),
46 |         };
47 | 
48 |         if !changed {
49 |             Ok(())
50 |         } else if args.check {
51 |             Err(anyhow::format_err!(
52 |                 "contents of {} differed from generated code",
53 |                 output_path.display()
54 |             ))
55 |         } else {
56 |             fs::write(output_path, output)?;
57 |             Ok(())
58 |         }
59 |     } else {
60 |         println!("{}", output);
61 |         Ok(())
62 |     }
63 | }
64 | 
65 | fn codegen(input: String) -> Result<String> {
66 |     let input_tokens: TokenStream = input
67 |         .parse()
68 |         .map_err(|err: LexError| anyhow::Error::msg(err.to_string()))
69 |         .context("failed to parse input as rust code")?;
70 | 
71 |     let mut output = String::new();
72 |     write!(
73 |         output,
74 |         "{}",
75 |         logos_codegen::strip_attributes(input_tokens.clone())
76 |     )?;
77 |     write!(output, "{}", logos_codegen::generate(input_tokens))?;
78 |     Ok(output)
79 | }
80 | 
81 | fn rustfmt(input: String) -> Result<String> {
82 |     let mut command = Command::new("rustfmt")
83 |         .stdin(Stdio::piped())
84 |         .stderr(Stdio::inherit())
85 |         .stdout(Stdio::piped())
86 |         .spawn()?;
87 |     io::Write::write_all(&mut command.stdin.take().unwrap(), input.as_bytes())?;
88 |     let output = command.wait_with_output()?;
89 |     if !output.status.success() {
90 |         anyhow::bail!("rustfmt returned unsuccessful exit code");
91 |     }
92 | 
93 |     String::from_utf8(output.stdout).context("failed to parse rustfmt output as utf-8")
94 | }
95 | 
96 | fn eq_ignore_newlines(lhs: &str, rhs: &str) -> bool {
97 |     lhs.lines().eq(rhs.lines())
98 | }
99 | 


--------------------------------------------------------------------------------
/logos-cli/tests/data/fmt_output.rs:
--------------------------------------------------------------------------------
 1 | #[derive(Debug, Clone, Copy, PartialEq)]
 2 | enum Token {
 3 |     Letter,
 4 | }
 5 | impl<'s> ::logos::Logos<'s> for Token {
 6 |     type Error = ();
 7 |     type Extras = ();
 8 |     type Source = str;
 9 |     fn lex(lex: &mut ::logos::Lexer<'s, Self>) {
10 |         use logos::internal::{CallbackResult, LexerInternal};
11 |         type Lexer<'s> = ::logos::Lexer<'s, Token>;
12 |         fn _end<'s>(lex: &mut Lexer<'s>) {
13 |             lex.end()
14 |         }
15 |         fn _error<'s>(lex: &mut Lexer<'s>) {
16 |             lex.bump_unchecked(1);
17 |             lex.error();
18 |         }
19 |         macro_rules ! _fast_loop { ($ lex : ident , $ test : ident , $ miss : expr) => { while let Some (arr) = $ lex . read :: < & [u8 ; 16] > () { if $ test (arr [0]) { if $ test (arr [1]) { if $ test (arr [2]) { if $ test (arr [3]) { if $ test (arr [4]) { if $ test (arr [5]) { if $ test (arr [6]) { if $ test (arr [7]) { if $ test (arr [8]) { if $ test (arr [9]) { if $ test (arr [10]) { if $ test (arr [11]) { if $ test (arr [12]) { if $ test (arr [13]) { if $ test (arr [14]) { if $ test (arr [15]) { $ lex . bump_unchecked (16) ; continue ; } $ lex . bump_unchecked (15) ; return $ miss ; } $ lex . bump_unchecked (14) ; return $ miss ; } $ lex . bump_unchecked (13) ; return $ miss ; } $ lex . bump_unchecked (12) ; return $ miss ; } $ lex . bump_unchecked (11) ; return $ miss ; } $ lex . bump_unchecked (10) ; return $ miss ; } $ lex . bump_unchecked (9) ; return $ miss ; } $ lex . bump_unchecked (8) ; return $ miss ; } $ lex . bump_unchecked (7) ; return $ miss ; } $ lex . bump_unchecked (6) ; return $ miss ; } $ lex . bump_unchecked (5) ; return $ miss ; } $ lex . bump_unchecked (4) ; return $ miss ; } $ lex . bump_unchecked (3) ; return $ miss ; } $ lex . bump_unchecked (2) ; return $ miss ; } $ lex . bump_unchecked (1) ; return $ miss ; } return $ miss ; } while $ lex . test ($ test) { $ lex . bump_unchecked (1) ; } $ miss } ; }
20 |         #[inline]
21 |         fn goto1_x<'s>(lex: &mut Lexer<'s>) {
22 |             lex.set(Ok(Token::Letter));
23 |         }
24 |         #[inline]
25 |         fn goto3_at1_with3<'s>(lex: &mut Lexer<'s>) {
26 |             match lex.read_at::<&[u8; 2usize]>(1usize) {
27 |                 Some(b"-z") => {
28 |                     lex.bump_unchecked(3usize);
29 |                     goto1_x(lex)
30 |                 }
31 |                 _ => _error(lex),
32 |             }
33 |         }
34 |         #[inline]
35 |         fn goto4<'s>(lex: &mut Lexer<'s>) {
36 |             let arr = match lex.read::<&[u8; 3usize]>() {
37 |                 Some(arr) => arr,
38 |                 None => return _end(lex),
39 |             };
40 |             match arr[0] {
41 |                 b'a' => goto3_at1_with3(lex),
42 |                 _ => _error(lex),
43 |             }
44 |         }
45 |         goto4(lex)
46 |     }
47 | }
48 | 


--------------------------------------------------------------------------------
/logos-cli/tests/data/input.rs:
--------------------------------------------------------------------------------
1 | #[derive(Logos, Debug, Clone, Copy, PartialEq)]
2 | enum Token {
3 |     #[regex("a-z")]
4 |     Letter,
5 | }
6 | 


--------------------------------------------------------------------------------
/logos-cli/tests/data/output.rs:
--------------------------------------------------------------------------------
1 | # [derive (Debug , Clone , Copy , PartialEq)] enum Token { Letter , }impl < 's > :: logos :: Logos < 's > for Token { type Error = () ; type Extras = () ; type Source = str ; fn lex (lex : & mut :: logos :: Lexer < 's , Self >) { use :: logos :: internal :: { LexerInternal , CallbackResult } ; type Lexer < 's > = :: logos :: Lexer < 's , Token > ; fn _end < 's > (lex : & mut Lexer < 's >) { lex . end () } fn _error < 's > (lex : & mut Lexer < 's >) { lex . bump_unchecked (1) ; lex . error () ; } macro_rules ! _fast_loop { ($ lex : ident , $ test : ident , $ miss : expr) => { while let Some (arr) = $ lex . read :: < & [u8 ; 16] > () { if $ test (arr [0]) { if $ test (arr [1]) { if $ test (arr [2]) { if $ test (arr [3]) { if $ test (arr [4]) { if $ test (arr [5]) { if $ test (arr [6]) { if $ test (arr [7]) { if $ test (arr [8]) { if $ test (arr [9]) { if $ test (arr [10]) { if $ test (arr [11]) { if $ test (arr [12]) { if $ test (arr [13]) { if $ test (arr [14]) { if $ test (arr [15]) { $ lex . bump_unchecked (16) ; continue ; } $ lex . bump_unchecked (15) ; return $ miss ; } $ lex . bump_unchecked (14) ; return $ miss ; } $ lex . bump_unchecked (13) ; return $ miss ; } $ lex . bump_unchecked (12) ; return $ miss ; } $ lex . bump_unchecked (11) ; return $ miss ; } $ lex . bump_unchecked (10) ; return $ miss ; } $ lex . bump_unchecked (9) ; return $ miss ; } $ lex . bump_unchecked (8) ; return $ miss ; } $ lex . bump_unchecked (7) ; return $ miss ; } $ lex . bump_unchecked (6) ; return $ miss ; } $ lex . bump_unchecked (5) ; return $ miss ; } $ lex . bump_unchecked (4) ; return $ miss ; } $ lex . bump_unchecked (3) ; return $ miss ; } $ lex . bump_unchecked (2) ; return $ miss ; } $ lex . bump_unchecked (1) ; return $ miss ; } return $ miss ; } while $ lex . test ($ test) { $ lex . bump_unchecked (1) ; } $ miss } ; } # [inline] fn goto1_x < 's > (lex : & mut Lexer < 's >) { lex . set (Ok (Token :: Letter)) ; } # [inline] fn goto3_at1_with3 < 's > (lex : & mut Lexer < 's >) { match lex . read_at :: < & [u8 ; 2usize] > (1usize) { Some (b"-z") => { lex . bump_unchecked (3usize) ; goto1_x (lex) } , _ => _error (lex) , } } # [inline] fn goto4 < 's > (lex : & mut Lexer < 's >) { let arr = match lex . read :: < & [u8 ; 3usize] > () { Some (arr) => arr , None => return _end (lex) , } ; match arr [0] { b'a' => goto3_at1_with3 (lex) , _ => _error (lex) , } } goto4 (lex) } }


--------------------------------------------------------------------------------
/logos-cli/tests/tests.rs:
--------------------------------------------------------------------------------
 1 | use std::path::Path;
 2 | 
 3 | use assert_cmd::Command;
 4 | use assert_fs::{assert::PathAssert, fixture::FileWriteStr, NamedTempFile};
 5 | use predicates::prelude::*;
 6 | 
 7 | const INPUT_FILE: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/tests/data/input.rs");
 8 | const OUTPUT_FILE: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/tests/data/output.rs");
 9 | const FMT_OUTPUT_FILE: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/tests/data/fmt_output.rs");
10 | 
11 | #[test]
12 | fn test_codegen() {
13 |     let tempfile = NamedTempFile::new("output.gen.rs").unwrap();
14 | 
15 |     let mut cmd = Command::cargo_bin("logos-cli").unwrap();
16 |     cmd.arg(INPUT_FILE)
17 |         .arg("--output")
18 |         .arg(tempfile.path())
19 |         .assert()
20 |         .success();
21 | 
22 |     tempfile.assert(normalize_newlines(OUTPUT_FILE));
23 | }
24 | 
25 | #[test]
26 | fn test_codegen_check() {
27 |     Command::cargo_bin("logos-cli")
28 |         .unwrap()
29 |         .arg(INPUT_FILE)
30 |         .arg("--check")
31 |         .arg("--output")
32 |         .arg(OUTPUT_FILE)
33 |         .assert()
34 |         .success();
35 | }
36 | 
37 | #[test]
38 | fn test_codegen_check_format() {
39 |     Command::cargo_bin("logos-cli")
40 |         .unwrap()
41 |         .arg(INPUT_FILE)
42 |         .arg("--format")
43 |         .arg("--check")
44 |         .arg("--output")
45 |         .arg(FMT_OUTPUT_FILE)
46 |         .assert()
47 |         .success();
48 | }
49 | 
50 | #[test]
51 | fn test_codegen_fail_check() {
52 |     let tempfile = NamedTempFile::new("output.gen.rs").unwrap();
53 | 
54 |     tempfile.write_str("some random data").unwrap();
55 | 
56 |     Command::cargo_bin("logos-cli")
57 |         .unwrap()
58 |         .arg(INPUT_FILE)
59 |         .arg("--check")
60 |         .arg("--output")
61 |         .arg(tempfile.path())
62 |         .assert()
63 |         .failure();
64 | }
65 | 
66 | #[test]
67 | fn test_codegen_format() {
68 |     let tempfile = NamedTempFile::new("output.gen.rs").unwrap();
69 | 
70 |     let mut cmd = Command::cargo_bin("logos-cli").unwrap();
71 |     cmd.arg(INPUT_FILE)
72 |         .arg("--format")
73 |         .arg("--output")
74 |         .arg(tempfile.path())
75 |         .assert()
76 |         .success();
77 | 
78 |     tempfile.assert(normalize_newlines(FMT_OUTPUT_FILE));
79 | }
80 | 
81 | fn normalize_newlines(s: impl AsRef<Path>) -> impl Predicate<str> {
82 |     predicates::str::diff(fs_err::read_to_string(s).unwrap().replace("\r\n", "\n")).normalize()
83 | }
84 | 


--------------------------------------------------------------------------------
/logos-codegen/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [dependencies]
 2 | beef = "0.5.0"
 3 | fnv = "1.0.6"
 4 | lazy_static = "1.4.0"
 5 | proc-macro2 = "1.0.9"
 6 | quote = "1.0.3"
 7 | regex-syntax = "0.8.2"
 8 | syn = { version = "2.0.13", features = ["full"] }
 9 | 
10 | [dev-dependencies]
11 | pretty_assertions = "1.4.0"
12 | rstest = "0.23.0"
13 | 
14 | [build-dependencies]
15 | rustc_version = "0.4.1"
16 | 
17 | [features]
18 | # Enables debug messages
19 | debug = []
20 | # Exports out internal methods for fuzzing
21 | fuzzing = []
22 | # Don't use or generate unsafe code
23 | forbid_unsafe = []
24 | 
25 | [lib]
26 | bench = false
27 | 
28 | [package]
29 | name = "logos-codegen"
30 | authors.workspace = true
31 | categories.workspace = true
32 | description.workspace = true
33 | edition.workspace = true
34 | homepage.workspace = true
35 | keywords.workspace = true
36 | license.workspace = true
37 | readme.workspace = true
38 | repository.workspace = true
39 | rust-version.workspace = true
40 | version.workspace = true
41 | 
42 | [package.metadata.release]
43 | shared-version = true
44 | 


--------------------------------------------------------------------------------
/logos-codegen/LICENSE-APACHE:
--------------------------------------------------------------------------------
1 | ../LICENSE-APACHE


--------------------------------------------------------------------------------
/logos-codegen/LICENSE-MIT:
--------------------------------------------------------------------------------
1 | ../LICENSE-MIT


--------------------------------------------------------------------------------
/logos-codegen/build.rs:
--------------------------------------------------------------------------------
 1 | use rustc_version::{version_meta, Version};
 2 | 
 3 | fn main() {
 4 |     let version_meta = version_meta().expect("Could not get Rust version");
 5 | 
 6 |     let rustc_version = version_meta.semver;
 7 |     let trimmed_rustc_version = Version::new(
 8 |         rustc_version.major,
 9 |         rustc_version.minor,
10 |         rustc_version.patch,
11 |     );
12 | 
13 |     // Add cfg flag for Rust >= 1.82
14 |     // Required for precise capturing in edition 2024
15 |     // Due to changes in lifetime and type capture behavior for impl trait
16 |     // see: https://github.com/maciejhirsz/logos/issues/434, https://github.com/rust-lang/rfcs/pull/3498
17 |     println!("cargo:rustc-check-cfg=cfg(rust_1_82)");
18 |     if trimmed_rustc_version >= Version::new(1, 82, 0) {
19 |         println!("cargo:rustc-cfg=rust_1_82");
20 |     }
21 | }
22 | 


--------------------------------------------------------------------------------
/logos-codegen/src/error.rs:
--------------------------------------------------------------------------------
  1 | use std::fmt;
  2 | 
  3 | use beef::lean::Cow;
  4 | use proc_macro2::{Span, TokenStream};
  5 | use quote::quote;
  6 | use quote::{quote_spanned, ToTokens, TokenStreamExt};
  7 | 
  8 | pub type Result<T> = std::result::Result<T, Error>;
  9 | 
 10 | #[derive(Default)]
 11 | pub struct Errors {
 12 |     collected: Vec<SpannedError>,
 13 | }
 14 | 
 15 | impl Errors {
 16 |     pub fn err<M>(&mut self, message: M, span: Span) -> &mut Self
 17 |     where
 18 |         M: Into<Cow<'static, str>>,
 19 |     {
 20 |         self.collected.push(SpannedError {
 21 |             message: message.into(),
 22 |             span,
 23 |         });
 24 | 
 25 |         self
 26 |     }
 27 | 
 28 |     pub fn render(self) -> Option<TokenStream> {
 29 |         let errors = self.collected;
 30 | 
 31 |         match errors.len() {
 32 |             0 => None,
 33 |             _ => Some(quote! {
 34 |                 fn _logos_derive_compile_errors() {
 35 |                     #(#errors)*
 36 |                 }
 37 |             }),
 38 |         }
 39 |     }
 40 | }
 41 | 
 42 | pub struct Error(Cow<'static, str>);
 43 | 
 44 | #[derive(Debug)]
 45 | pub struct SpannedError {
 46 |     message: Cow<'static, str>,
 47 |     span: Span,
 48 | }
 49 | 
 50 | impl Error {
 51 |     pub fn new<M>(message: M) -> Self
 52 |     where
 53 |         M: Into<Cow<'static, str>>,
 54 |     {
 55 |         Error(message.into())
 56 |     }
 57 | 
 58 |     pub fn span(self, span: Span) -> SpannedError {
 59 |         SpannedError {
 60 |             message: self.0,
 61 |             span,
 62 |         }
 63 |     }
 64 | }
 65 | 
 66 | impl fmt::Display for Error {
 67 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 68 |         self.0.fmt(f)
 69 |     }
 70 | }
 71 | 
 72 | impl fmt::Debug for Error {
 73 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 74 |         fmt::Display::fmt(self, f)
 75 |     }
 76 | }
 77 | 
 78 | impl From<regex_syntax::Error> for Error {
 79 |     fn from(err: regex_syntax::Error) -> Error {
 80 |         Error(err.to_string().into())
 81 |     }
 82 | }
 83 | 
 84 | impl From<&'static str> for Error {
 85 |     fn from(err: &'static str) -> Error {
 86 |         Error(err.into())
 87 |     }
 88 | }
 89 | 
 90 | impl From<String> for Error {
 91 |     fn from(err: String) -> Error {
 92 |         Error(err.into())
 93 |     }
 94 | }
 95 | 
 96 | impl From<Error> for Cow<'static, str> {
 97 |     fn from(err: Error) -> Self {
 98 |         err.0
 99 |     }
100 | }
101 | 
102 | impl ToTokens for SpannedError {
103 |     fn to_tokens(&self, tokens: &mut TokenStream) {
104 |         let message = &*self.message;
105 | 
106 |         tokens.append_all(quote_spanned!(self.span => {
107 |             compile_error!(#message)
108 |         }))
109 |     }
110 | }
111 | 


--------------------------------------------------------------------------------
/logos-codegen/src/generator/context.rs:
--------------------------------------------------------------------------------
  1 | use proc_macro2::TokenStream;
  2 | use quote::quote;
  3 | 
  4 | use crate::generator::Generator;
  5 | use crate::graph::NodeId;
  6 | 
  7 | /// This struct keeps track of bytes available to be read without
  8 | /// bounds checking across the tree.
  9 | ///
 10 | /// For example, a branch that matches 4 bytes followed by a fork
 11 | /// with smallest branch containing of 2 bytes can do a bounds check
 12 | /// for 6 bytes ahead, and leave the remaining 2 byte array (fixed size)
 13 | /// to be handled by the fork, avoiding bound checks there.
 14 | #[derive(Default, Clone, Copy, PartialEq, Eq, Hash, Debug)]
 15 | pub struct Context {
 16 |     /// Amount of bytes that haven't been bumped yet but should
 17 |     /// before a new read is performed
 18 |     at: usize,
 19 |     /// Number of bytes available without bound checks
 20 |     available: usize,
 21 |     /// Whether or not the Lexer has been bumped at least by 1 byte
 22 |     bumped: bool,
 23 |     /// Node to backtrack to to in case an explicit match has failed.
 24 |     /// If `None` will instead produce an error token.
 25 |     backtrack: Option<NodeId>,
 26 | }
 27 | 
 28 | impl Context {
 29 |     pub fn can_backtrack(&self) -> bool {
 30 |         self.backtrack.is_some()
 31 |     }
 32 | 
 33 |     pub fn switch(&mut self, miss: Option<NodeId>) -> Option<TokenStream> {
 34 |         self.backtrack = Some(miss?);
 35 |         self.bump()
 36 |     }
 37 | 
 38 |     pub const fn advance(self, n: usize) -> Self {
 39 |         Context {
 40 |             at: self.at + n,
 41 |             ..self
 42 |         }
 43 |     }
 44 | 
 45 |     pub fn bump(&mut self) -> Option<TokenStream> {
 46 |         match self.at {
 47 |             0 => None,
 48 |             n => {
 49 |                 let tokens = quote!(lex.bump_unchecked(#n););
 50 |                 self.at = 0;
 51 |                 self.available = 0;
 52 |                 self.bumped = true;
 53 |                 Some(tokens)
 54 |             }
 55 |         }
 56 |     }
 57 | 
 58 |     pub fn remainder(&self) -> usize {
 59 |         self.available.saturating_sub(self.at)
 60 |     }
 61 | 
 62 |     pub fn read_byte(&mut self) -> TokenStream {
 63 |         let at = self.at;
 64 | 
 65 |         self.advance(1);
 66 | 
 67 |         #[cfg(not(feature = "forbid_unsafe"))]
 68 |         {
 69 |             quote!(unsafe { lex.read_byte_unchecked(#at) })
 70 |         }
 71 | 
 72 |         #[cfg(feature = "forbid_unsafe")]
 73 |         {
 74 |             quote!(lex.read_byte(#at))
 75 |         }
 76 |     }
 77 | 
 78 |     pub fn read(&mut self, len: usize) -> TokenStream {
 79 |         self.available = len;
 80 | 
 81 |         match (self.at, len) {
 82 |             (0, 0) => quote!(lex.read::<u8>()),
 83 |             (a, 0) => quote!(lex.read_at::<u8>(#a)),
 84 |             (0, l) => quote!(lex.read::<&[u8; #l]>()),
 85 |             (a, l) => quote!(lex.read_at::<&[u8; #l]>(#a)),
 86 |         }
 87 |     }
 88 | 
 89 |     pub fn wipe(&mut self) {
 90 |         self.available = 0;
 91 |     }
 92 | 
 93 |     const fn backtrack(self) -> Self {
 94 |         Context {
 95 |             at: 0,
 96 |             available: 0,
 97 |             bumped: self.bumped,
 98 |             backtrack: None,
 99 |         }
100 |     }
101 | 
102 |     pub fn miss(mut self, miss: Option<NodeId>, gen: &mut Generator) -> TokenStream {
103 |         self.wipe();
104 |         match (miss, self.backtrack) {
105 |             (Some(id), _) => gen.goto(id, self).clone(),
106 |             (_, Some(id)) => gen.goto(id, self.backtrack()).clone(),
107 |             _ if self.bumped => quote!(lex.error()),
108 |             _ => quote!(_error(lex)),
109 |         }
110 |     }
111 | 
112 |     pub fn write_suffix(&self, buf: &mut String) {
113 |         use std::fmt::Write;
114 | 
115 |         if self.at > 0 {
116 |             let _ = write!(buf, "_at{}", self.at);
117 |         }
118 |         if self.available > 0 {
119 |             let _ = write!(buf, "_with{}", self.available);
120 |         }
121 |         if let Some(id) = self.backtrack {
122 |             let _ = write!(buf, "_ctx{}", id);
123 |         }
124 |         if self.bumped {
125 |             buf.push_str("_x");
126 |         }
127 |     }
128 | }
129 | 


--------------------------------------------------------------------------------
/logos-codegen/src/generator/leaf.rs:
--------------------------------------------------------------------------------
 1 | use proc_macro2::TokenStream;
 2 | use quote::quote;
 3 | 
 4 | use crate::generator::{Context, Generator};
 5 | use crate::leaf::{Callback, Leaf};
 6 | use crate::util::MaybeVoid;
 7 | 
 8 | impl Generator<'_> {
 9 |     pub fn generate_leaf(&mut self, leaf: &Leaf, mut ctx: Context) -> TokenStream {
10 |         let bump = ctx.bump();
11 | 
12 |         let ident = &leaf.ident;
13 |         let name = self.name;
14 |         let this = self.this;
15 |         let ty = &leaf.field;
16 | 
17 |         let constructor = match leaf.field {
18 |             MaybeVoid::Some(_) => quote!(#name::#ident),
19 |             MaybeVoid::Void => quote!(|()| #name::#ident),
20 |         };
21 | 
22 |         match &leaf.callback {
23 |             Some(Callback::Label(callback)) => quote! {
24 |                 #bump
25 |                 #callback(lex).construct(#constructor, lex);
26 |             },
27 |             Some(Callback::Inline(inline)) => {
28 |                 let arg = &inline.arg;
29 |                 let body = &inline.body;
30 | 
31 |                 #[cfg(not(rust_1_82))]
32 |                 let ret = quote!(impl CallbackResult<'s, #ty, #this>);
33 | 
34 |                 #[cfg(rust_1_82)]
35 |                 let ret = quote!(impl CallbackResult<'s, #ty, #this> + use<'s>);
36 | 
37 |                 quote! {
38 |                     #bump
39 | 
40 |                     #[inline]
41 |                     fn callback<'s>(#arg: &mut Lexer<'s>) -> #ret {
42 |                         #body
43 |                     }
44 | 
45 |                     callback(lex).construct(#constructor, lex);
46 |                 }
47 |             }
48 |             Some(Callback::Skip(_)) => {
49 |                 quote! {
50 |                     #bump
51 | 
52 |                     lex.trivia();
53 |                     #name::lex(lex);
54 |                 }
55 |             }
56 |             None if matches!(leaf.field, MaybeVoid::Void) => quote! {
57 |                 #bump
58 |                 lex.set(Ok(#name::#ident));
59 |             },
60 |             None => quote! {
61 |                 #bump
62 |                 let token = #name::#ident(lex.slice());
63 |                 lex.set(Ok(token));
64 |             },
65 |         }
66 |     }
67 | }
68 | 


--------------------------------------------------------------------------------
/logos-codegen/src/generator/rope.rs:
--------------------------------------------------------------------------------
 1 | use proc_macro2::TokenStream;
 2 | use quote::quote;
 3 | 
 4 | use crate::generator::{Context, Generator};
 5 | use crate::graph::Rope;
 6 | 
 7 | impl Generator<'_> {
 8 |     pub fn generate_rope(&mut self, rope: &Rope, mut ctx: Context) -> TokenStream {
 9 |         let miss = ctx.miss(rope.miss.first(), self);
10 |         let read = ctx.read(rope.pattern.len());
11 |         let then = self.goto(rope.then, ctx.advance(rope.pattern.len()));
12 | 
13 |         let pat = match rope.pattern.to_bytes() {
14 |             Some(bytes) => byte_slice_literal(&bytes),
15 |             None => {
16 |                 let ranges = rope.pattern.iter();
17 | 
18 |                 quote!([#(#ranges),*])
19 |             }
20 |         };
21 | 
22 |         quote! {
23 |             match #read {
24 |                 Some(#pat) => #then,
25 |                 _ => #miss,
26 |             }
27 |         }
28 |     }
29 | }
30 | 
31 | fn byte_slice_literal(bytes: &[u8]) -> TokenStream {
32 |     if bytes.iter().any(|&b| !(0x20..0x7F).contains(&b)) {
33 |         return quote!(&[#(#bytes),*]);
34 |     }
35 | 
36 |     let slice = std::str::from_utf8(bytes).unwrap();
37 | 
38 |     syn::parse_str(&format!("b{:?}", slice)).unwrap()
39 | }
40 | 


--------------------------------------------------------------------------------
/logos-codegen/src/generator/tables.rs:
--------------------------------------------------------------------------------
 1 | use crate::util::ToIdent;
 2 | use proc_macro2::{Literal, TokenStream};
 3 | use quote::{quote, ToTokens};
 4 | use syn::Ident;
 5 | 
 6 | pub struct TableStack {
 7 |     tables: Vec<(Ident, [u8; 256])>,
 8 |     shift: u8,
 9 | }
10 | 
11 | pub struct TableView<'a> {
12 |     ident: &'a Ident,
13 |     table: &'a mut [u8; 256],
14 |     mask: u8,
15 | }
16 | 
17 | impl TableStack {
18 |     pub fn new() -> Self {
19 |         TableStack {
20 |             tables: vec![("COMPACT_TABLE_0".to_ident(), [0; 256])],
21 |             shift: 0,
22 |         }
23 |     }
24 | 
25 |     pub fn view(&mut self) -> TableView {
26 |         let mask = if self.shift < 8 {
27 |             // Reusing existing table with a shifted mask
28 |             let mask = 1u8 << self.shift;
29 | 
30 |             self.shift += 1;
31 | 
32 |             mask
33 |         } else {
34 |             // Need to create a new table
35 |             let ident = format!("COMPACT_TABLE_{}", self.tables.len()).to_ident();
36 | 
37 |             self.tables.push((ident, [0; 256]));
38 |             self.shift = 1;
39 | 
40 |             1
41 |         };
42 | 
43 |         let (ref ident, ref mut table) = self.tables.last_mut().unwrap();
44 | 
45 |         TableView { ident, table, mask }
46 |     }
47 | }
48 | 
49 | impl<'a> TableView<'a> {
50 |     pub fn ident(&self) -> &'a Ident {
51 |         self.ident
52 |     }
53 | 
54 |     pub fn flag(&mut self, byte: u8) {
55 |         self.table[byte as usize] |= self.mask;
56 |     }
57 | 
58 |     pub fn mask(&self) -> Literal {
59 |         Literal::u8_unsuffixed(self.mask)
60 |     }
61 | }
62 | 
63 | impl ToTokens for TableStack {
64 |     fn to_tokens(&self, out: &mut TokenStream) {
65 |         if self.shift == 0 {
66 |             return;
67 |         }
68 | 
69 |         for (ident, table) in self.tables.iter() {
70 |             let bytes = table.iter().copied().map(Literal::u8_unsuffixed);
71 | 
72 |             out.extend(quote! {
73 |                 static #ident: [u8; 256] = [#(#bytes),*];
74 |             });
75 |         }
76 |     }
77 | }
78 | 


--------------------------------------------------------------------------------
/logos-codegen/src/graph/impls.rs:
--------------------------------------------------------------------------------
  1 | use std::fmt::{self, Debug, Display};
  2 | use std::hash::{Hash, Hasher};
  3 | 
  4 | use crate::graph::{Fork, Graph, Node, NodeId, Range, Rope};
  5 | 
  6 | impl<T> From<Fork> for Node<T> {
  7 |     fn from(fork: Fork) -> Self {
  8 |         Node::Fork(fork)
  9 |     }
 10 | }
 11 | impl<T> From<Rope> for Node<T> {
 12 |     fn from(rope: Rope) -> Self {
 13 |         Node::Rope(rope)
 14 |     }
 15 | }
 16 | 
 17 | fn is_ascii(byte: u8) -> bool {
 18 |     (0x20..0x7F).contains(&byte)
 19 | }
 20 | 
 21 | impl Hash for Fork {
 22 |     fn hash<H: Hasher>(&self, state: &mut H) {
 23 |         for branch in self.branches() {
 24 |             branch.hash(state);
 25 |         }
 26 |         self.miss.hash(state);
 27 |     }
 28 | }
 29 | 
 30 | impl<T> Hash for Node<T> {
 31 |     fn hash<H: Hasher>(&self, state: &mut H) {
 32 |         match self {
 33 |             Node::Rope(rope) => {
 34 |                 b"ROPE".hash(state);
 35 |                 rope.hash(state);
 36 |             }
 37 |             Node::Fork(fork) => {
 38 |                 b"FORK".hash(state);
 39 |                 fork.hash(state);
 40 |             }
 41 |             Node::Leaf(_) => b"LEAF".hash(state),
 42 |         }
 43 |     }
 44 | }
 45 | 
 46 | impl Debug for NodeId {
 47 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 48 |         Debug::fmt(&self.0, f)
 49 |     }
 50 | }
 51 | 
 52 | /// We don't need debug impls in release builds
 53 | // #[cfg(test)]
 54 | mod debug {
 55 |     use super::*;
 56 |     use crate::graph::rope::Miss;
 57 |     use crate::graph::Disambiguate;
 58 |     use std::cmp::{Ord, Ordering};
 59 | 
 60 |     impl Disambiguate for &str {
 61 |         fn cmp(left: &&str, right: &&str) -> Ordering {
 62 |             Ord::cmp(left, right)
 63 |         }
 64 |     }
 65 | 
 66 |     impl Debug for Range {
 67 |         fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 68 |             let Range { start, end } = *self;
 69 | 
 70 |             if start != end || !is_ascii(start) {
 71 |                 f.write_str("[")?;
 72 |             }
 73 |             match is_ascii(start) {
 74 |                 true => write!(f, "{}", start as char),
 75 |                 false => write!(f, "{:02X}", start),
 76 |             }?;
 77 |             if start != end {
 78 |                 match is_ascii(end) {
 79 |                     true => write!(f, "-{}]", end as char),
 80 |                     false => write!(f, "-{:02X}]", end),
 81 |                 }?;
 82 |             } else if !is_ascii(start) {
 83 |                 f.write_str("]")?;
 84 |             }
 85 |             Ok(())
 86 |         }
 87 |     }
 88 | 
 89 |     impl Display for Range {
 90 |         fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 91 |             <Range as Debug>::fmt(self, f)
 92 |         }
 93 |     }
 94 | 
 95 |     impl<T: Debug> Debug for Graph<T> {
 96 |         fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 97 |             let entries = self
 98 |                 .nodes()
 99 |                 .iter()
100 |                 .enumerate()
101 |                 .filter_map(|(i, n)| n.as_ref().map(|n| (i, n)));
102 | 
103 |             f.debug_map().entries(entries).finish()
104 |         }
105 |     }
106 | 
107 |     struct Arm<T, U>(T, U);
108 | 
109 |     impl<T, U> Debug for Arm<T, U>
110 |     where
111 |         T: Display,
112 |         U: Display,
113 |     {
114 |         fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
115 |             write!(f, "{} ⇒ {}", self.0, self.1)
116 |         }
117 |     }
118 | 
119 |     impl Debug for Fork {
120 |         fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
121 |             let mut list = f.debug_set();
122 | 
123 |             for (range, then) in self.branches() {
124 |                 list.entry(&Arm(range, then));
125 |             }
126 |             if let Some(id) = self.miss {
127 |                 list.entry(&Arm('_', id));
128 |             }
129 | 
130 |             list.finish()
131 |         }
132 |     }
133 | 
134 |     impl Display for Miss {
135 |         fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
136 |             match self {
137 |                 Miss::First(id) => Display::fmt(id, f),
138 |                 Miss::Any(id) => write!(f, "{}*", id),
139 |                 Miss::None => f.write_str("n/a"),
140 |             }
141 |         }
142 |     }
143 | 
144 |     impl Debug for Rope {
145 |         fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
146 |             use std::fmt::Write;
147 | 
148 |             let mut rope = String::with_capacity(self.pattern.len());
149 |             for range in self.pattern.iter() {
150 |                 write!(rope, "{}", range)?;
151 |             }
152 | 
153 |             match self.miss.is_none() {
154 |                 false => {
155 |                     let mut list = f.debug_list();
156 | 
157 |                     list.entry(&Arm(rope, self.then));
158 |                     list.entry(&Arm('_', self.miss));
159 | 
160 |                     list.finish()
161 |                 }
162 |                 true => Arm(rope, self.then).fmt(f),
163 |             }
164 |         }
165 |     }
166 | 
167 |     impl PartialEq for Fork {
168 |         fn eq(&self, other: &Self) -> bool {
169 |             self.miss == other.miss && self.branches().eq(other.branches())
170 |         }
171 |     }
172 | 
173 |     impl<T: Debug> Debug for Node<T> {
174 |         fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
175 |             match self {
176 |                 Node::Fork(fork) => fork.fmt(f),
177 |                 Node::Rope(rope) => rope.fmt(f),
178 |                 Node::Leaf(leaf) => leaf.fmt(f),
179 |             }
180 |         }
181 |     }
182 | 
183 |     use std::ops::RangeInclusive;
184 | 
185 |     impl From<RangeInclusive<u8>> for Range {
186 |         fn from(range: RangeInclusive<u8>) -> Range {
187 |             Range {
188 |                 start: *range.start(),
189 |                 end: *range.end(),
190 |             }
191 |         }
192 |     }
193 | 
194 |     impl From<RangeInclusive<char>> for Range {
195 |         fn from(range: RangeInclusive<char>) -> Range {
196 |             Range {
197 |                 start: *range.start() as u8,
198 |                 end: *range.end() as u8,
199 |             }
200 |         }
201 |     }
202 | 
203 |     impl<T> PartialEq<Rope> for Node<T> {
204 |         fn eq(&self, other: &Rope) -> bool {
205 |             match self {
206 |                 Node::Rope(rope) => rope == other,
207 |                 _ => false,
208 |             }
209 |         }
210 |     }
211 | 
212 |     impl<T> PartialEq<Fork> for Node<T> {
213 |         fn eq(&self, other: &Fork) -> bool {
214 |             match self {
215 |                 Node::Fork(fork) => fork == other,
216 |                 _ => false,
217 |             }
218 |         }
219 |     }
220 | }
221 | 


--------------------------------------------------------------------------------
/logos-codegen/src/graph/meta.rs:
--------------------------------------------------------------------------------
  1 | use std::cmp::min;
  2 | use std::collections::BTreeMap;
  3 | use std::ops::{Index, IndexMut};
  4 | 
  5 | use crate::graph::{Graph, Node, NodeId};
  6 | 
  7 | #[derive(Debug)]
  8 | pub struct Meta {
  9 |     map: BTreeMap<NodeId, MetaItem>,
 10 | }
 11 | 
 12 | #[derive(Debug, Default)]
 13 | pub struct MetaItem {
 14 |     /// Number of references to this node
 15 |     pub refcount: usize,
 16 |     /// Minimum number of bytes that ought to be read for this
 17 |     /// node to find a match
 18 |     pub min_read: usize,
 19 |     /// Marks whether or not this node leads to a loop entry node.
 20 |     pub is_loop_init: bool,
 21 |     /// Ids of other nodes that point to this node while this
 22 |     /// node is on a stack (creating a loop)
 23 |     pub loop_entry_from: Vec<NodeId>,
 24 | }
 25 | 
 26 | impl Index<NodeId> for Meta {
 27 |     type Output = MetaItem;
 28 | 
 29 |     fn index(&self, id: NodeId) -> &MetaItem {
 30 |         &self.map[&id]
 31 |     }
 32 | }
 33 | 
 34 | impl IndexMut<NodeId> for Meta {
 35 |     fn index_mut(&mut self, id: NodeId) -> &mut MetaItem {
 36 |         self.map.entry(id).or_default()
 37 |     }
 38 | }
 39 | 
 40 | impl MetaItem {
 41 |     fn loop_entry(&mut self, id: NodeId) {
 42 |         if let Err(idx) = self.loop_entry_from.binary_search(&id) {
 43 |             self.loop_entry_from.insert(idx, id);
 44 |         }
 45 |     }
 46 | }
 47 | 
 48 | impl Meta {
 49 |     pub fn analyze<T>(root: NodeId, graph: &Graph<T>) -> Self {
 50 |         let mut meta = Meta {
 51 |             map: Default::default(),
 52 |         };
 53 | 
 54 |         meta.first_pass(root, root, graph, &mut Vec::new());
 55 | 
 56 |         meta
 57 |     }
 58 | 
 59 |     pub fn first_pass<T>(
 60 |         &mut self,
 61 |         this: NodeId,
 62 |         parent: NodeId,
 63 |         graph: &Graph<T>,
 64 |         stack: &mut Vec<NodeId>,
 65 |     ) -> &MetaItem {
 66 |         let meta = &mut self[this];
 67 |         let is_done = meta.refcount > 0;
 68 | 
 69 |         meta.refcount += 1;
 70 | 
 71 |         if stack.contains(&this) {
 72 |             meta.loop_entry(parent);
 73 |             self[parent].is_loop_init = true;
 74 |         }
 75 |         if is_done {
 76 |             return &self[this];
 77 |         }
 78 | 
 79 |         stack.push(this);
 80 | 
 81 |         let mut min_read;
 82 | 
 83 |         match &graph[this] {
 84 |             Node::Fork(fork) => {
 85 |                 min_read = usize::MAX;
 86 |                 for (_, id) in fork.branches() {
 87 |                     let meta = self.first_pass(id, this, graph, stack);
 88 | 
 89 |                     if meta.is_loop_init {
 90 |                         min_read = 1;
 91 |                     } else {
 92 |                         min_read = min(min_read, meta.min_read + 1);
 93 |                     }
 94 |                 }
 95 |                 if let Some(id) = fork.miss {
 96 |                     let meta = self.first_pass(id, this, graph, stack);
 97 | 
 98 |                     if meta.is_loop_init {
 99 |                         min_read = 0;
100 |                     } else {
101 |                         min_read = min(min_read, meta.min_read);
102 |                     }
103 |                 }
104 |                 if min_read == usize::MAX {
105 |                     min_read = 0;
106 |                 }
107 |             }
108 |             Node::Rope(rope) => {
109 |                 min_read = rope.pattern.len();
110 |                 let meta = self.first_pass(rope.then, this, graph, stack);
111 | 
112 |                 if !meta.is_loop_init {
113 |                     min_read += meta.min_read;
114 |                 }
115 | 
116 |                 if let Some(id) = rope.miss.first() {
117 |                     let meta = self.first_pass(id, this, graph, stack);
118 | 
119 |                     if meta.is_loop_init {
120 |                         min_read = 0;
121 |                     } else {
122 |                         min_read = min(min_read, meta.min_read);
123 |                     }
124 |                 }
125 |             }
126 |             Node::Leaf(_) => min_read = 0,
127 |         }
128 | 
129 |         stack.pop();
130 | 
131 |         let meta = &mut self[this];
132 |         meta.min_read = min_read;
133 |         let second_pass = meta.loop_entry_from.clone();
134 | 
135 |         for id in second_pass {
136 |             self.meta_second_pass(id, graph);
137 |         }
138 | 
139 |         &self[this]
140 |     }
141 | 
142 |     fn meta_second_pass<T>(&mut self, id: NodeId, graph: &Graph<T>) {
143 |         let mut min_read;
144 | 
145 |         match &graph[id] {
146 |             Node::Fork(fork) => {
147 |                 min_read = usize::MAX;
148 |                 for (_, id) in fork.branches() {
149 |                     let meta = &self[id];
150 | 
151 |                     if meta.is_loop_init {
152 |                         min_read = 1;
153 |                     } else {
154 |                         min_read = min(min_read, meta.min_read + 1);
155 |                     }
156 |                 }
157 |                 if min_read == usize::MAX {
158 |                     min_read = 0;
159 |                 }
160 |             }
161 |             Node::Rope(rope) => {
162 |                 min_read = rope.pattern.len();
163 |                 let meta = &self[rope.then];
164 | 
165 |                 if !meta.is_loop_init {
166 |                     min_read += meta.min_read;
167 |                 }
168 |             }
169 |             Node::Leaf(_) => unreachable!(),
170 |         }
171 | 
172 |         self[id].min_read = min_read;
173 |     }
174 | }
175 | 


--------------------------------------------------------------------------------
/logos-codegen/src/graph/range.rs:
--------------------------------------------------------------------------------
  1 | use regex_syntax::hir::ClassBytesRange;
  2 | use regex_syntax::hir::ClassUnicodeRange;
  3 | use regex_syntax::utf8::Utf8Range;
  4 | 
  5 | use std::cmp::{Ord, Ordering};
  6 | 
  7 | #[derive(Clone, Copy, PartialEq, Eq, Hash)]
  8 | pub struct Range {
  9 |     pub start: u8,
 10 |     pub end: u8,
 11 | }
 12 | 
 13 | impl Range {
 14 |     pub fn as_byte(&self) -> Option<u8> {
 15 |         if self.is_byte() {
 16 |             Some(self.start)
 17 |         } else {
 18 |             None
 19 |         }
 20 |     }
 21 | 
 22 |     pub fn is_byte(&self) -> bool {
 23 |         self.start == self.end
 24 |     }
 25 | }
 26 | 
 27 | impl From<u8> for Range {
 28 |     fn from(byte: u8) -> Range {
 29 |         Range {
 30 |             start: byte,
 31 |             end: byte,
 32 |         }
 33 |     }
 34 | }
 35 | 
 36 | impl From<&u8> for Range {
 37 |     fn from(byte: &u8) -> Range {
 38 |         Range::from(*byte)
 39 |     }
 40 | }
 41 | 
 42 | impl Iterator for Range {
 43 |     type Item = u8;
 44 | 
 45 |     fn next(&mut self) -> Option<u8> {
 46 |         match self.start.cmp(&self.end) {
 47 |             std::cmp::Ordering::Less => {
 48 |                 let res = self.start;
 49 |                 self.start += 1;
 50 | 
 51 |                 Some(res)
 52 |             }
 53 |             std::cmp::Ordering::Equal => {
 54 |                 let res = self.start;
 55 | 
 56 |                 // Necessary so that range 0xFF-0xFF doesn't loop forever
 57 |                 self.start = 0xFF;
 58 |                 self.end = 0x00;
 59 | 
 60 |                 Some(res)
 61 |             }
 62 |             std::cmp::Ordering::Greater => None,
 63 |         }
 64 |     }
 65 | }
 66 | 
 67 | impl PartialOrd for Range {
 68 |     fn partial_cmp(&self, other: &Range) -> Option<Ordering> {
 69 |         Some(self.cmp(other))
 70 |     }
 71 | }
 72 | 
 73 | impl Ord for Range {
 74 |     fn cmp(&self, other: &Self) -> Ordering {
 75 |         self.start.cmp(&other.start)
 76 |     }
 77 | }
 78 | 
 79 | impl From<Utf8Range> for Range {
 80 |     fn from(r: Utf8Range) -> Range {
 81 |         Range {
 82 |             start: r.start,
 83 |             end: r.end,
 84 |         }
 85 |     }
 86 | }
 87 | 
 88 | impl From<ClassUnicodeRange> for Range {
 89 |     fn from(r: ClassUnicodeRange) -> Range {
 90 |         let start = r.start() as u32;
 91 |         let end = r.end() as u32;
 92 | 
 93 |         if start >= 128 || end >= 128 && end != 0x0010FFFF {
 94 |             panic!("Casting non-ascii ClassUnicodeRange to Range")
 95 |         }
 96 | 
 97 |         Range {
 98 |             start: start as u8,
 99 |             end: end as u8,
100 |         }
101 |     }
102 | }
103 | 
104 | impl From<ClassBytesRange> for Range {
105 |     fn from(r: ClassBytesRange) -> Range {
106 |         Range {
107 |             start: r.start(),
108 |             end: r.end(),
109 |         }
110 |     }
111 | }
112 | 
113 | #[cfg(test)]
114 | mod tests {
115 |     use super::*;
116 | 
117 |     #[test]
118 |     fn range_iter_one() {
119 |         let byte = Range::from(b'!');
120 |         let collected = byte.take(1000).collect::<Vec<_>>();
121 | 
122 |         assert_eq!(b"!", &collected[..]);
123 |     }
124 | 
125 |     #[test]
126 |     fn range_iter_few() {
127 |         let byte = Range {
128 |             start: b'a',
129 |             end: b'd',
130 |         };
131 |         let collected = byte.take(1000).collect::<Vec<_>>();
132 | 
133 |         assert_eq!(b"abcd", &collected[..]);
134 |     }
135 | 
136 |     #[test]
137 |     fn range_iter_bounds() {
138 |         let byte = Range::from(0xFA..=0xFF);
139 | 
140 |         let collected = byte.take(1000).collect::<Vec<_>>();
141 | 
142 |         assert_eq!(b"\xFA\xFB\xFC\xFD\xFE\xFF", &collected[..]);
143 |     }
144 | }
145 | 


--------------------------------------------------------------------------------
/logos-codegen/src/leaf.rs:
--------------------------------------------------------------------------------
  1 | use std::cmp::{Ord, Ordering};
  2 | use std::fmt::{self, Debug, Display};
  3 | 
  4 | use proc_macro2::{Span, TokenStream};
  5 | use syn::{spanned::Spanned, Ident};
  6 | 
  7 | use crate::graph::{Disambiguate, Node};
  8 | use crate::util::MaybeVoid;
  9 | 
 10 | #[derive(Clone)]
 11 | pub struct Leaf<'t> {
 12 |     pub ident: Option<&'t Ident>,
 13 |     pub span: Span,
 14 |     pub priority: usize,
 15 |     pub field: MaybeVoid,
 16 |     pub callback: Option<Callback>,
 17 | }
 18 | 
 19 | #[derive(Clone)]
 20 | pub enum Callback {
 21 |     Label(TokenStream),
 22 |     Inline(Box<InlineCallback>),
 23 |     Skip(Span),
 24 | }
 25 | 
 26 | #[derive(Clone)]
 27 | pub struct InlineCallback {
 28 |     pub arg: Ident,
 29 |     pub body: TokenStream,
 30 |     pub span: Span,
 31 | }
 32 | 
 33 | impl From<InlineCallback> for Callback {
 34 |     fn from(inline: InlineCallback) -> Callback {
 35 |         Callback::Inline(Box::new(inline))
 36 |     }
 37 | }
 38 | 
 39 | impl Callback {
 40 |     pub fn span(&self) -> Span {
 41 |         match self {
 42 |             Callback::Label(tokens) => tokens.span(),
 43 |             Callback::Inline(inline) => inline.span,
 44 |             Callback::Skip(span) => *span,
 45 |         }
 46 |     }
 47 | }
 48 | 
 49 | impl<'t> Leaf<'t> {
 50 |     pub fn new(ident: &'t Ident, span: Span) -> Self {
 51 |         Leaf {
 52 |             ident: Some(ident),
 53 |             span,
 54 |             priority: 0,
 55 |             field: MaybeVoid::Void,
 56 |             callback: None,
 57 |         }
 58 |     }
 59 | 
 60 |     pub fn new_skip(span: Span) -> Self {
 61 |         Leaf {
 62 |             ident: None,
 63 |             span,
 64 |             priority: 0,
 65 |             field: MaybeVoid::Void,
 66 |             callback: Some(Callback::Skip(span)),
 67 |         }
 68 |     }
 69 | 
 70 |     pub fn callback(mut self, callback: Option<Callback>) -> Self {
 71 |         self.callback = callback;
 72 |         self
 73 |     }
 74 | 
 75 |     pub fn field(mut self, field: MaybeVoid) -> Self {
 76 |         self.field = field;
 77 |         self
 78 |     }
 79 | 
 80 |     pub fn priority(mut self, priority: usize) -> Self {
 81 |         self.priority = priority;
 82 |         self
 83 |     }
 84 | }
 85 | 
 86 | impl Disambiguate for Leaf<'_> {
 87 |     fn cmp(left: &Leaf, right: &Leaf) -> Ordering {
 88 |         Ord::cmp(&left.priority, &right.priority)
 89 |     }
 90 | }
 91 | 
 92 | impl<'t> From<Leaf<'t>> for Node<Leaf<'t>> {
 93 |     fn from(leaf: Leaf<'t>) -> Self {
 94 |         Node::Leaf(leaf)
 95 |     }
 96 | }
 97 | 
 98 | impl Debug for Leaf<'_> {
 99 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
100 |         write!(f, "::{}", self)?;
101 | 
102 |         match self.callback {
103 |             Some(Callback::Label(ref label)) => write!(f, " ({})", label),
104 |             Some(Callback::Inline(_)) => f.write_str(" (<inline>)"),
105 |             Some(Callback::Skip(_)) => f.write_str(" (<skip>)"),
106 |             None => Ok(()),
107 |         }
108 |     }
109 | }
110 | 
111 | impl Display for Leaf<'_> {
112 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
113 |         match self.ident {
114 |             Some(ident) => Display::fmt(ident, f),
115 |             None => f.write_str("<skip>"),
116 |         }
117 |     }
118 | }
119 | 


--------------------------------------------------------------------------------
/logos-codegen/src/macros.rs:
--------------------------------------------------------------------------------
 1 | #[cfg(feature = "debug")]
 2 | macro_rules! debug {
 3 |     ($($arg:tt)*) => {
 4 |         eprint!("[{}:{}:{}] ", file!(), line!(), column!());
 5 |         eprintln!($($arg)*)
 6 |     }
 7 | }
 8 | 
 9 | #[cfg(not(feature = "debug"))]
10 | macro_rules! debug {
11 |     ($($arg:tt)*) => {};
12 | }
13 | 


--------------------------------------------------------------------------------
/logos-codegen/src/parser/definition.rs:
--------------------------------------------------------------------------------
  1 | use proc_macro2::{Ident, Span};
  2 | use syn::{spanned::Spanned, LitByteStr, LitStr};
  3 | 
  4 | use crate::error::{Errors, Result};
  5 | use crate::leaf::Callback;
  6 | use crate::mir::Mir;
  7 | use crate::parser::nested::NestedValue;
  8 | use crate::parser::{IgnoreFlags, Parser, Subpatterns};
  9 | 
 10 | use super::ignore_flags::ascii_case::MakeAsciiCaseInsensitive;
 11 | 
 12 | pub struct Definition {
 13 |     pub literal: Literal,
 14 |     pub priority: Option<usize>,
 15 |     pub callback: Option<Callback>,
 16 |     pub ignore_flags: IgnoreFlags,
 17 | }
 18 | 
 19 | pub enum Literal {
 20 |     Utf8(LitStr),
 21 |     Bytes(LitByteStr),
 22 | }
 23 | 
 24 | impl Definition {
 25 |     pub fn new(literal: Literal) -> Self {
 26 |         Definition {
 27 |             literal,
 28 |             priority: None,
 29 |             callback: None,
 30 |             ignore_flags: IgnoreFlags::Empty,
 31 |         }
 32 |     }
 33 | 
 34 |     pub fn named_attr(&mut self, name: Ident, value: NestedValue, parser: &mut Parser) {
 35 |         match (name.to_string().as_str(), value) {
 36 |             ("priority", NestedValue::Assign(tokens)) => {
 37 |                 let prio = match tokens.to_string().parse() {
 38 |                     Ok(prio) => prio,
 39 |                     Err(_) => {
 40 |                         parser.err("Expected an unsigned integer", tokens.span());
 41 |                         return;
 42 |                     }
 43 |                 };
 44 | 
 45 |                 if self.priority.replace(prio).is_some() {
 46 |                     parser.err("Resetting previously set priority", tokens.span());
 47 |                 }
 48 |             }
 49 |             ("priority", _) => {
 50 |                 parser.err("Expected: priority = <integer>", name.span());
 51 |             }
 52 |             ("callback", NestedValue::Assign(tokens)) => {
 53 |                 let span = tokens.span();
 54 |                 let callback = match parser.parse_callback(tokens) {
 55 |                     Some(callback) => callback,
 56 |                     None => {
 57 |                         parser.err("Not a valid callback", span);
 58 |                         return;
 59 |                     }
 60 |                 };
 61 | 
 62 |                 if let Some(previous) = self.callback.replace(callback) {
 63 |                     parser
 64 |                         .err(
 65 |                             "Callback has been already set",
 66 |                             span.join(name.span()).unwrap(),
 67 |                         )
 68 |                         .err("Previous callback set here", previous.span());
 69 |                 }
 70 |             }
 71 |             ("callback", _) => {
 72 |                 parser.err("Expected: callback = ...", name.span());
 73 |             }
 74 |             ("ignore", NestedValue::Group(tokens)) => {
 75 |                 self.ignore_flags.parse_group(name, tokens, parser);
 76 |             }
 77 |             ("ignore", _) => {
 78 |                 parser.err("Expected: ignore(<flag>, ...)", name.span());
 79 |             }
 80 |             (unknown, _) => {
 81 |                 parser.err(
 82 |                     format!(
 83 |                         "\
 84 |                         Unknown nested attribute: {}\n\
 85 |                         \n\
 86 |                         Expected one of: priority, callback\
 87 |                         ",
 88 |                         unknown
 89 |                     ),
 90 |                     name.span(),
 91 |                 );
 92 |             }
 93 |         }
 94 |     }
 95 | }
 96 | 
 97 | impl Literal {
 98 |     pub fn to_bytes(&self) -> Vec<u8> {
 99 |         match self {
100 |             Literal::Utf8(string) => string.value().into_bytes(),
101 |             Literal::Bytes(bytes) => bytes.value(),
102 |         }
103 |     }
104 | 
105 |     pub fn escape_regex(&self) -> Literal {
106 |         match self {
107 |             Literal::Utf8(string) => Literal::Utf8(LitStr::new(
108 |                 regex_syntax::escape(&string.value()).as_str(),
109 |                 self.span(),
110 |             )),
111 |             Literal::Bytes(bytes) => Literal::Bytes(LitByteStr::new(
112 |                 regex_syntax::escape(&bytes_to_regex_string(bytes.value())).as_bytes(),
113 |                 self.span(),
114 |             )),
115 |         }
116 |     }
117 | 
118 |     pub fn to_mir(
119 |         &self,
120 |         subpatterns: &Subpatterns,
121 |         ignore_flags: IgnoreFlags,
122 |         errors: &mut Errors,
123 |     ) -> Result<Mir> {
124 |         let value = subpatterns.fix(self, errors);
125 | 
126 |         if ignore_flags.contains(IgnoreFlags::IgnoreAsciiCase) {
127 |             match self {
128 |                 Literal::Utf8(_) => {
129 |                     Mir::utf8(&value).map(MakeAsciiCaseInsensitive::make_ascii_case_insensitive)
130 |                 }
131 |                 Literal::Bytes(_) => Mir::binary_ignore_case(&value),
132 |             }
133 |         } else if ignore_flags.contains(IgnoreFlags::IgnoreCase) {
134 |             match self {
135 |                 Literal::Utf8(_) => Mir::utf8_ignore_case(&value),
136 |                 Literal::Bytes(_) => Mir::binary_ignore_case(&value),
137 |             }
138 |         } else {
139 |             match self {
140 |                 Literal::Utf8(_) => Mir::utf8(&value),
141 |                 Literal::Bytes(_) => Mir::binary(&value),
142 |             }
143 |         }
144 |     }
145 | 
146 |     pub fn span(&self) -> Span {
147 |         match self {
148 |             Literal::Utf8(string) => string.span(),
149 |             Literal::Bytes(bytes) => bytes.span(),
150 |         }
151 |     }
152 | }
153 | 
154 | impl syn::parse::Parse for Literal {
155 |     fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
156 |         let la = input.lookahead1();
157 |         if la.peek(LitStr) {
158 |             Ok(Literal::Utf8(input.parse()?))
159 |         } else if la.peek(LitByteStr) {
160 |             Ok(Literal::Bytes(input.parse()?))
161 |         } else {
162 |             Err(la.error())
163 |         }
164 |     }
165 | }
166 | 
167 | pub fn bytes_to_regex_string(bytes: Vec<u8>) -> String {
168 |     if bytes.is_ascii() {
169 |         unsafe {
170 |             // Unicode values are prohibited, so we can't use
171 |             // safe version of String::from_utf8
172 |             //
173 |             // We can, however, construct a safe ASCII string
174 |             return String::from_utf8_unchecked(bytes);
175 |         }
176 |     }
177 | 
178 |     let mut string = String::with_capacity(bytes.len() * 2);
179 | 
180 |     for byte in bytes {
181 |         if byte < 0x80 {
182 |             string.push(byte as char);
183 |         } else {
184 |             static DIGITS: [u8; 16] = *b"0123456789abcdef";
185 | 
186 |             string.push_str(r"\x");
187 |             string.push(DIGITS[(byte / 16) as usize] as char);
188 |             string.push(DIGITS[(byte % 16) as usize] as char);
189 |         }
190 |     }
191 | 
192 |     string
193 | }
194 | 


--------------------------------------------------------------------------------
/logos-codegen/src/parser/nested.rs:
--------------------------------------------------------------------------------
  1 | use proc_macro2::token_stream::IntoIter as TokenIter;
  2 | use proc_macro2::{Ident, Literal, TokenStream, TokenTree};
  3 | use quote::quote;
  4 | 
  5 | use crate::util::{expect_punct, is_punct};
  6 | 
  7 | pub enum NestedValue {
  8 |     /// `name = ...`
  9 |     Assign(TokenStream),
 10 |     /// `name "literal"`
 11 |     Literal(Literal),
 12 |     /// `name(...)`
 13 |     Group(TokenStream),
 14 |     /// `name ident = ...`
 15 |     KeywordAssign(Ident, TokenStream),
 16 | }
 17 | 
 18 | pub enum Nested {
 19 |     /// Unnamed nested attribute, such as a string,
 20 |     /// callback closure, or a lone ident/path
 21 |     ///
 22 |     /// Note: a lone ident will be Named with no value instead
 23 |     Unnamed(TokenStream),
 24 |     /// Named: name ...
 25 |     Named(Ident, NestedValue),
 26 |     /// Unexpected token,
 27 |     Unexpected(TokenStream),
 28 | }
 29 | 
 30 | pub struct AttributeParser {
 31 |     inner: TokenIter,
 32 | }
 33 | 
 34 | pub struct Empty;
 35 | 
 36 | impl From<Empty> for TokenStream {
 37 |     fn from(_: Empty) -> TokenStream {
 38 |         TokenStream::new()
 39 |     }
 40 | }
 41 | 
 42 | impl AttributeParser {
 43 |     pub fn new(stream: TokenStream) -> Self {
 44 |         AttributeParser {
 45 |             inner: stream.into_iter(),
 46 |         }
 47 |     }
 48 | 
 49 |     pub fn parsed<T>(&mut self) -> Option<syn::Result<T>>
 50 |     where
 51 |         T: syn::parse::Parse,
 52 |     {
 53 |         let tokens = self.collect_tail(TokenStream::new());
 54 | 
 55 |         if tokens.is_empty() {
 56 |             return None;
 57 |         }
 58 | 
 59 |         Some(syn::parse2(tokens))
 60 |     }
 61 | 
 62 |     fn next_tt(&mut self) -> Option<TokenTree> {
 63 |         expect_punct(self.inner.next(), ',')
 64 |     }
 65 | 
 66 |     fn collect_tail<T>(&mut self, first: T) -> TokenStream
 67 |     where
 68 |         T: Into<TokenStream>,
 69 |     {
 70 |         let mut out = first.into();
 71 | 
 72 |         while let Some(tt) = self.next_tt() {
 73 |             out.extend(Some(tt));
 74 |         }
 75 | 
 76 |         out
 77 |     }
 78 | 
 79 |     fn parse_unnamed(&mut self, first: Ident, next: TokenTree) -> Nested {
 80 |         let mut out = TokenStream::from(TokenTree::Ident(first));
 81 | 
 82 |         out.extend(self.collect_tail(next));
 83 | 
 84 |         Nested::Unnamed(out.into_iter().collect())
 85 |     }
 86 | 
 87 |     fn parse_assign(&mut self, name: Ident) -> Nested {
 88 |         let value = self.collect_tail(Empty);
 89 | 
 90 |         Nested::Named(name, NestedValue::Assign(value))
 91 |     }
 92 | 
 93 |     fn parse_literal(&mut self, name: Ident, lit: Literal) -> Nested {
 94 |         // TODO: Error if there are any tokens following
 95 |         let _ = self.collect_tail(Empty);
 96 | 
 97 |         Nested::Named(name, NestedValue::Literal(lit))
 98 |     }
 99 | 
100 |     fn parse_group(&mut self, name: Ident, group: TokenStream) -> Nested {
101 |         Nested::Named(name, NestedValue::Group(group))
102 |     }
103 | 
104 |     fn parse_keyword(&mut self, keyword: Ident, name: Ident) -> Nested {
105 |         let error = expect_punct(self.next_tt(), '=');
106 | 
107 |         match error {
108 |             Some(error) => {
109 |                 let error = self.collect_tail(error);
110 | 
111 |                 Nested::Unexpected(error)
112 |             }
113 |             None => {
114 |                 let value = self.collect_tail(Empty);
115 | 
116 |                 Nested::Named(keyword, NestedValue::KeywordAssign(name, value))
117 |             }
118 |         }
119 |     }
120 | }
121 | 
122 | impl Iterator for AttributeParser {
123 |     type Item = Nested;
124 | 
125 |     fn next(&mut self) -> Option<Nested> {
126 |         let first = self.inner.next()?;
127 | 
128 |         let name = match first {
129 |             TokenTree::Ident(ident) => ident,
130 |             tt => {
131 |                 let stream = self.collect_tail(tt);
132 | 
133 |                 return Some(Nested::Unnamed(stream.into_iter().collect()));
134 |             }
135 |         };
136 | 
137 |         match self.next_tt() {
138 |             Some(tt) if is_punct(&tt, '=') => Some(self.parse_assign(name)),
139 |             Some(TokenTree::Literal(lit)) => Some(self.parse_literal(name, lit)),
140 |             Some(TokenTree::Group(group)) => Some(self.parse_group(name, group.stream())),
141 |             Some(TokenTree::Ident(next)) => Some(self.parse_keyword(name, next)),
142 |             Some(next) => Some(self.parse_unnamed(name, next)),
143 |             None => Some(Nested::Unnamed(quote!(#name))),
144 |         }
145 |     }
146 | }
147 | 


--------------------------------------------------------------------------------
/logos-codegen/src/parser/subpattern.rs:
--------------------------------------------------------------------------------
 1 | use proc_macro2::TokenStream;
 2 | use syn::Ident;
 3 | 
 4 | use crate::error::Errors;
 5 | use crate::mir::Mir;
 6 | use crate::parser::definition::{bytes_to_regex_string, Literal};
 7 | 
 8 | #[derive(Default)]
 9 | pub struct Subpatterns {
10 |     map: Vec<(Ident, String)>,
11 | }
12 | 
13 | impl Subpatterns {
14 |     pub fn add(&mut self, param: Ident, pattern: TokenStream, errors: &mut Errors) {
15 |         let lit = match syn::parse2::<Literal>(pattern) {
16 |             Ok(lit) => lit,
17 |             Err(e) => {
18 |                 errors.err(e.to_string(), e.span());
19 |                 return;
20 |             }
21 |         };
22 | 
23 |         if let Some((name, _)) = self.map.iter().find(|(name, _)| *name == param) {
24 |             errors
25 |                 .err(format!("{} can only be assigned once", param), param.span())
26 |                 .err("Previously assigned here", name.span());
27 |             return;
28 |         }
29 | 
30 |         let fixed = self.fix(&lit, errors);
31 | 
32 |         // Validate the literal as proper regex. If it's not, emit an error.
33 |         let mir = match &lit {
34 |             Literal::Utf8(_) => Mir::utf8(&fixed),
35 |             Literal::Bytes(_) => Mir::binary(&fixed),
36 |         };
37 | 
38 |         if let Err(err) = mir {
39 |             errors.err(err, lit.span());
40 |         };
41 | 
42 |         self.map.push((param, fixed));
43 |     }
44 | 
45 |     pub fn fix(&self, lit: &Literal, errors: &mut Errors) -> String {
46 |         let mut i = 0;
47 |         let mut pattern = match lit {
48 |             Literal::Utf8(s) => s.value(),
49 |             Literal::Bytes(b) => bytes_to_regex_string(b.value()),
50 |         };
51 | 
52 |         while let Some(f) = pattern[i..].find("(?&") {
53 |             i += f;
54 |             pattern.replace_range(i..i + 3, "(?:");
55 |             i += 3;
56 | 
57 |             let subref_end = if let Some(f) = pattern[i..].find(')') {
58 |                 i + f
59 |             } else {
60 |                 pattern.truncate(i); // truncate so latter error doesn't suppress
61 |                 break; // regex-syntax will report the unclosed group
62 |             };
63 | 
64 |             let name = &pattern[i..subref_end];
65 |             let name = match syn::parse_str::<Ident>(name) {
66 |                 Ok(name) => name,
67 |                 Err(_) => {
68 |                     errors.err(
69 |                         format!("subpattern reference `{}` is not an identifier", name),
70 |                         lit.span(),
71 |                     );
72 |                     // we emitted the error; make something up and continue
73 |                     pattern.replace_range(i..subref_end, "_");
74 |                     i += 2;
75 |                     continue;
76 |                 }
77 |             };
78 | 
79 |             match self.map.iter().find(|(def, _)| *def == name) {
80 |                 Some((_, subpattern)) => {
81 |                     pattern.replace_range(i..subref_end, subpattern);
82 |                     i += subpattern.len() + 1;
83 |                 }
84 |                 None => {
85 |                     errors.err(
86 |                         format!("subpattern reference `{}` has not been defined", name),
87 |                         lit.span(),
88 |                     );
89 |                     // leaving `(?:name)` is fine
90 |                     i = subref_end + 1;
91 |                 }
92 |             }
93 |         }
94 | 
95 |         pattern
96 |     }
97 | }
98 | 


--------------------------------------------------------------------------------
/logos-codegen/src/parser/type_params.rs:
--------------------------------------------------------------------------------
  1 | use proc_macro2::{Ident, Span, TokenStream};
  2 | use quote::quote;
  3 | use syn::spanned::Spanned;
  4 | use syn::{Lifetime, LifetimeParam, Path, Type};
  5 | 
  6 | use crate::error::Errors;
  7 | 
  8 | #[derive(Default)]
  9 | pub struct TypeParams {
 10 |     lifetime: bool,
 11 |     type_params: Vec<(Ident, Option<Type>)>,
 12 | }
 13 | 
 14 | impl TypeParams {
 15 |     pub fn explicit_lifetime(&mut self, lt: LifetimeParam, errors: &mut Errors) {
 16 |         if self.lifetime {
 17 |             let span = lt.span();
 18 | 
 19 |             errors.err("Logos types can only have one lifetime", span);
 20 |         }
 21 | 
 22 |         self.lifetime = true;
 23 |     }
 24 | 
 25 |     pub fn add(&mut self, param: Ident) {
 26 |         self.type_params.push((param, None));
 27 |     }
 28 | 
 29 |     pub fn set(&mut self, param: Ident, ty: TokenStream, errors: &mut Errors) {
 30 |         let ty = match syn::parse2::<Type>(ty) {
 31 |             Ok(mut ty) => {
 32 |                 replace_lifetimes(&mut ty);
 33 |                 ty
 34 |             }
 35 |             Err(err) => {
 36 |                 errors.err(err.to_string(), err.span());
 37 |                 return;
 38 |             }
 39 |         };
 40 | 
 41 |         match self.type_params.iter_mut().find(|(name, _)| *name == param) {
 42 |             Some((_, slot)) => {
 43 |                 if let Some(previous) = slot.replace(ty) {
 44 |                     errors
 45 |                         .err(
 46 |                             format!("{} can only have one type assigned to it", param),
 47 |                             param.span(),
 48 |                         )
 49 |                         .err("Previously assigned here", previous.span());
 50 |                 }
 51 |             }
 52 |             None => {
 53 |                 errors.err(
 54 |                     format!("{} is not a declared type parameter", param),
 55 |                     param.span(),
 56 |                 );
 57 |             }
 58 |         }
 59 |     }
 60 | 
 61 |     pub fn find(&self, path: &Path) -> Option<Type> {
 62 |         for (ident, ty) in &self.type_params {
 63 |             if path.is_ident(ident) {
 64 |                 return ty.clone();
 65 |             }
 66 |         }
 67 | 
 68 |         None
 69 |     }
 70 | 
 71 |     pub fn generics(&self, errors: &mut Errors) -> Option<TokenStream> {
 72 |         if !self.lifetime && self.type_params.is_empty() {
 73 |             return None;
 74 |         }
 75 | 
 76 |         let mut generics = Vec::new();
 77 | 
 78 |         if self.lifetime {
 79 |             generics.push(quote!('s));
 80 |         }
 81 | 
 82 |         for (ty, replace) in self.type_params.iter() {
 83 |             match replace {
 84 |                 Some(ty) => generics.push(quote!(#ty)),
 85 |                 None => {
 86 |                     errors.err(
 87 |                         format!(
 88 |                             "Generic type parameter without a concrete type\n\
 89 |                             \n\
 90 |                             Define a concrete type Logos can use: #[logos(type {} = Type)]",
 91 |                             ty,
 92 |                         ),
 93 |                         ty.span(),
 94 |                     );
 95 |                 }
 96 |             }
 97 |         }
 98 | 
 99 |         if generics.is_empty() {
100 |             None
101 |         } else {
102 |             Some(quote!(<#(#generics),*>))
103 |         }
104 |     }
105 | }
106 | 
107 | pub fn replace_lifetimes(ty: &mut Type) {
108 |     traverse_type(ty, &mut replace_lifetime)
109 | }
110 | 
111 | pub fn replace_lifetime(ty: &mut Type) {
112 |     use syn::{GenericArgument, PathArguments};
113 | 
114 |     match ty {
115 |         Type::Path(p) => {
116 |             p.path
117 |                 .segments
118 |                 .iter_mut()
119 |                 .filter_map(|segment| match &mut segment.arguments {
120 |                     PathArguments::AngleBracketed(ab) => Some(ab),
121 |                     _ => None,
122 |                 })
123 |                 .flat_map(|ab| ab.args.iter_mut())
124 |                 .for_each(|arg| {
125 |                     if let GenericArgument::Lifetime(lt) = arg {
126 |                         *lt = Lifetime::new("'s", lt.span());
127 |                     }
128 |                 });
129 |         }
130 |         Type::Reference(r) => {
131 |             let span = match r.lifetime.take() {
132 |                 Some(lt) => lt.span(),
133 |                 None => Span::call_site(),
134 |             };
135 | 
136 |             r.lifetime = Some(Lifetime::new("'s", span));
137 |         }
138 |         _ => (),
139 |     }
140 | }
141 | 
142 | pub fn traverse_type(ty: &mut Type, f: &mut impl FnMut(&mut Type)) {
143 |     f(ty);
144 |     match ty {
145 |         Type::Array(array) => traverse_type(&mut array.elem, f),
146 |         Type::BareFn(bare_fn) => {
147 |             for input in &mut bare_fn.inputs {
148 |                 traverse_type(&mut input.ty, f);
149 |             }
150 |             if let syn::ReturnType::Type(_, ty) = &mut bare_fn.output {
151 |                 traverse_type(ty, f);
152 |             }
153 |         }
154 |         Type::Group(group) => traverse_type(&mut group.elem, f),
155 |         Type::Paren(paren) => traverse_type(&mut paren.elem, f),
156 |         Type::Path(path) => traverse_path(&mut path.path, f),
157 |         Type::Ptr(p) => traverse_type(&mut p.elem, f),
158 |         Type::Reference(r) => traverse_type(&mut r.elem, f),
159 |         Type::Slice(slice) => traverse_type(&mut slice.elem, f),
160 |         Type::TraitObject(object) => object.bounds.iter_mut().for_each(|bound| {
161 |             if let syn::TypeParamBound::Trait(trait_bound) = bound {
162 |                 traverse_path(&mut trait_bound.path, f);
163 |             }
164 |         }),
165 |         Type::Tuple(tuple) => tuple
166 |             .elems
167 |             .iter_mut()
168 |             .for_each(|elem| traverse_type(elem, f)),
169 |         _ => (),
170 |     }
171 | }
172 | 
173 | fn traverse_path(path: &mut Path, f: &mut impl FnMut(&mut Type)) {
174 |     for segment in &mut path.segments {
175 |         match &mut segment.arguments {
176 |             syn::PathArguments::None => (),
177 |             syn::PathArguments::AngleBracketed(args) => {
178 |                 for arg in &mut args.args {
179 |                     match arg {
180 |                         syn::GenericArgument::Type(ty) => {
181 |                             traverse_type(ty, f);
182 |                         }
183 |                         syn::GenericArgument::AssocType(assoc) => {
184 |                             traverse_type(&mut assoc.ty, f);
185 |                         }
186 |                         _ => (),
187 |                     }
188 |                 }
189 |             }
190 |             syn::PathArguments::Parenthesized(args) => {
191 |                 for arg in &mut args.inputs {
192 |                     traverse_type(arg, f);
193 |                 }
194 |                 if let syn::ReturnType::Type(_, ty) = &mut args.output {
195 |                     traverse_type(ty, f);
196 |                 }
197 |             }
198 |         }
199 |     }
200 | }
201 | 


--------------------------------------------------------------------------------
/logos-codegen/src/util.rs:
--------------------------------------------------------------------------------
 1 | use proc_macro2::{Spacing, Span, TokenStream, TokenTree};
 2 | use quote::{quote, ToTokens};
 3 | use syn::Ident;
 4 | 
 5 | /// Analog to Option<TokenStream>, except when put into the quote!
 6 | /// macro, `MaybeVoid::Void` will produce `()`
 7 | #[derive(Clone, Default)]
 8 | pub enum MaybeVoid {
 9 |     Some(TokenStream),
10 |     #[default]
11 |     Void,
12 | }
13 | 
14 | impl MaybeVoid {
15 |     pub fn replace(&mut self, stream: TokenStream) -> MaybeVoid {
16 |         std::mem::replace(self, MaybeVoid::Some(stream))
17 |     }
18 | 
19 |     pub fn take(&mut self) -> MaybeVoid {
20 |         std::mem::replace(self, MaybeVoid::Void)
21 |     }
22 | }
23 | 
24 | impl ToTokens for MaybeVoid {
25 |     fn to_tokens(&self, out: &mut TokenStream) {
26 |         match self {
27 |             MaybeVoid::Some(stream) => out.extend(stream.clone()),
28 |             MaybeVoid::Void => out.extend(quote!(())),
29 |         }
30 |     }
31 | 
32 |     fn to_token_stream(&self) -> TokenStream {
33 |         match self {
34 |             MaybeVoid::Some(stream) => stream.clone(),
35 |             MaybeVoid::Void => quote!(()),
36 |         }
37 |     }
38 | 
39 |     fn into_token_stream(self) -> TokenStream {
40 |         match self {
41 |             MaybeVoid::Some(stream) => stream,
42 |             MaybeVoid::Void => quote!(()),
43 |         }
44 |     }
45 | }
46 | 
47 | pub fn is_punct(tt: &TokenTree, expect: char) -> bool {
48 |     matches!(tt, TokenTree::Punct(punct) if punct.as_char() == expect && punct.spacing() == Spacing::Alone)
49 | }
50 | 
51 | /// If supplied `tt` is a punct matching a char, returns `None`, else returns `tt`
52 | pub fn expect_punct(tt: Option<TokenTree>, expect: char) -> Option<TokenTree> {
53 |     tt.filter(|tt| !is_punct(tt, expect))
54 | }
55 | 
56 | pub trait ToIdent {
57 |     fn to_ident(&self) -> Ident;
58 | }
59 | 
60 | impl ToIdent for str {
61 |     fn to_ident(&self) -> Ident {
62 |         Ident::new(self, Span::call_site())
63 |     }
64 | }
65 | 


--------------------------------------------------------------------------------
/logos-codegen/tests/codegen.rs:
--------------------------------------------------------------------------------
 1 | use std::{error::Error, path::PathBuf};
 2 | 
 3 | #[rstest::rstest]
 4 | #[case("simple")]
 5 | #[case("no_error_lut")]
 6 | pub fn test_codegen(#[case] fixture: &str) -> Result<(), Box<dyn Error>> {
 7 |     let mut fixture_dir = PathBuf::new();
 8 |     fixture_dir.push(env!("CARGO_MANIFEST_DIR"));
 9 |     fixture_dir.push("tests");
10 |     fixture_dir.push("data");
11 |     fixture_dir.push(fixture);
12 | 
13 |     let input = fixture_dir.join("input.rs");
14 |     fixture_dir.push("output.rs");
15 |     let output_file = fixture_dir;
16 | 
17 |     let input = std::fs::read_to_string(input)?;
18 |     let output = std::fs::read_to_string(&output_file)?;
19 | 
20 |     let generated = logos_codegen::generate(input.parse()?);
21 |     let generated = generated.to_string();
22 | 
23 |     if std::env::var("BLESS_CODEGEN").is_ok_and(|value| value == "1") {
24 |         std::fs::write(&output_file, &generated)?;
25 |         return Ok(());
26 |     }
27 | 
28 |     assert_eq!(generated, output, "Codegen test failed: `{fixture}`, run tests again with env var `BLESS_CODEGEN=1` to bless these changes");
29 | 
30 |     Ok(())
31 | }
32 | 


--------------------------------------------------------------------------------
/logos-codegen/tests/data/no_error_lut/input.rs:
--------------------------------------------------------------------------------
 1 | #[derive(Logos)]
 2 | #[logos(source = [u8])]
 3 | enum Token {
 4 |     #[token("\n")]
 5 |     Newline,
 6 |     #[regex(".")]
 7 |     AnyUnicode,
 8 |     #[regex(b".", priority = 0)]
 9 |     Any,
10 | }
11 | 


--------------------------------------------------------------------------------
/logos-codegen/tests/data/no_error_lut/output.rs:
--------------------------------------------------------------------------------
1 | impl < 's > :: logos :: Logos < 's > for Token { type Error = () ; type Extras = () ; type Source = [u8] ; fn lex (lex : & mut :: logos :: Lexer < 's , Self >) { use :: logos :: internal :: { LexerInternal , CallbackResult } ; type Lexer < 's > = :: logos :: Lexer < 's , Token > ; fn _end < 's > (lex : & mut Lexer < 's >) { lex . end () } fn _error < 's > (lex : & mut Lexer < 's >) { lex . bump_unchecked (1) ; lex . error () ; } macro_rules ! _fast_loop { ($ lex : ident , $ test : ident , $ miss : expr) => { while let Some (arr) = $ lex . read :: < & [u8 ; 16] > () { if $ test (arr [0]) { if $ test (arr [1]) { if $ test (arr [2]) { if $ test (arr [3]) { if $ test (arr [4]) { if $ test (arr [5]) { if $ test (arr [6]) { if $ test (arr [7]) { if $ test (arr [8]) { if $ test (arr [9]) { if $ test (arr [10]) { if $ test (arr [11]) { if $ test (arr [12]) { if $ test (arr [13]) { if $ test (arr [14]) { if $ test (arr [15]) { $ lex . bump_unchecked (16) ; continue ; } $ lex . bump_unchecked (15) ; return $ miss ; } $ lex . bump_unchecked (14) ; return $ miss ; } $ lex . bump_unchecked (13) ; return $ miss ; } $ lex . bump_unchecked (12) ; return $ miss ; } $ lex . bump_unchecked (11) ; return $ miss ; } $ lex . bump_unchecked (10) ; return $ miss ; } $ lex . bump_unchecked (9) ; return $ miss ; } $ lex . bump_unchecked (8) ; return $ miss ; } $ lex . bump_unchecked (7) ; return $ miss ; } $ lex . bump_unchecked (6) ; return $ miss ; } $ lex . bump_unchecked (5) ; return $ miss ; } $ lex . bump_unchecked (4) ; return $ miss ; } $ lex . bump_unchecked (3) ; return $ miss ; } $ lex . bump_unchecked (2) ; return $ miss ; } $ lex . bump_unchecked (1) ; return $ miss ; } return $ miss ; } while $ lex . test ($ test) { $ lex . bump_unchecked (1) ; } $ miss } ; } # [inline] fn goto1_x < 's > (lex : & mut Lexer < 's >) { lex . set (Ok (Token :: Newline)) ; } # [inline] fn goto11_ctx11_x < 's > (lex : & mut Lexer < 's >) { lex . set (Ok (Token :: Any)) ; } # [inline] fn goto2_ctx11_x < 's > (lex : & mut Lexer < 's >) { lex . set (Ok (Token :: AnyUnicode)) ; } # [inline] fn goto16_ctx11_x < 's > (lex : & mut Lexer < 's >) { match lex . read :: < & [u8 ; 2usize] > () { Some ([128u8 ..= 159u8 , 128u8 ..= 191u8]) => { lex . bump_unchecked (2usize) ; goto2_ctx11_x (lex) } , _ => goto11_ctx11_x (lex) , } } # [inline] fn goto17_ctx11_x < 's > (lex : & mut Lexer < 's >) { match lex . read :: < & [u8 ; 3usize] > () { Some ([144u8 ..= 191u8 , 128u8 ..= 191u8 , 128u8 ..= 191u8]) => { lex . bump_unchecked (3usize) ; goto2_ctx11_x (lex) } , _ => goto11_ctx11_x (lex) , } } # [inline] fn goto2_x < 's > (lex : & mut Lexer < 's >) { lex . set (Ok (Token :: AnyUnicode)) ; } # [inline] fn goto13_ctx11_x < 's > (lex : & mut Lexer < 's >) { match lex . read :: < & [u8 ; 1usize] > () { Some ([128u8 ..= 191u8]) => { lex . bump_unchecked (1usize) ; goto2_ctx11_x (lex) } , _ => goto11_ctx11_x (lex) , } } # [inline] fn goto18_ctx11_x < 's > (lex : & mut Lexer < 's >) { match lex . read :: < & [u8 ; 3usize] > () { Some ([128u8 ..= 191u8 , 128u8 ..= 191u8 , 128u8 ..= 191u8]) => { lex . bump_unchecked (3usize) ; goto2_ctx11_x (lex) } , _ => goto11_ctx11_x (lex) , } } # [inline] fn goto15_ctx11_x < 's > (lex : & mut Lexer < 's >) { match lex . read :: < & [u8 ; 2usize] > () { Some ([128u8 ..= 191u8 , 128u8 ..= 191u8]) => { lex . bump_unchecked (2usize) ; goto2_ctx11_x (lex) } , _ => goto11_ctx11_x (lex) , } } # [inline] fn goto14_ctx11_x < 's > (lex : & mut Lexer < 's >) { match lex . read :: < & [u8 ; 2usize] > () { Some ([160u8 ..= 191u8 , 128u8 ..= 191u8]) => { lex . bump_unchecked (2usize) ; goto2_ctx11_x (lex) } , _ => goto11_ctx11_x (lex) , } } # [inline] fn goto19_ctx11_x < 's > (lex : & mut Lexer < 's >) { match lex . read :: < & [u8 ; 3usize] > () { Some ([128u8 ..= 143u8 , 128u8 ..= 191u8 , 128u8 ..= 191u8]) => { lex . bump_unchecked (3usize) ; goto2_ctx11_x (lex) } , _ => goto11_ctx11_x (lex) , } } # [inline] fn goto11_x < 's > (lex : & mut Lexer < 's >) { lex . set (Ok (Token :: Any)) ; } # [inline] fn goto20 < 's > (lex : & mut Lexer < 's >) { enum Jump { J1 , J16 , J17 , J2 , J13 , J18 , J15 , J14 , J19 , J11 , } const LUT : [Jump ; 256] = { use Jump :: * ; [J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J1 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J2 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J13 , J14 , J15 , J15 , J15 , J15 , J15 , J15 , J15 , J15 , J15 , J15 , J15 , J15 , J16 , J15 , J15 , J17 , J18 , J18 , J18 , J19 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11 , J11] } ; let byte = match lex . read :: < u8 > () { Some (byte) => byte , None => return _end (lex) , } ; match LUT [byte as usize] { Jump :: J1 => { lex . bump_unchecked (1usize) ; goto1_x (lex) } , Jump :: J16 => { lex . bump_unchecked (1usize) ; goto16_ctx11_x (lex) } , Jump :: J17 => { lex . bump_unchecked (1usize) ; goto17_ctx11_x (lex) } , Jump :: J2 => { lex . bump_unchecked (1usize) ; goto2_x (lex) } , Jump :: J13 => { lex . bump_unchecked (1usize) ; goto13_ctx11_x (lex) } , Jump :: J18 => { lex . bump_unchecked (1usize) ; goto18_ctx11_x (lex) } , Jump :: J15 => { lex . bump_unchecked (1usize) ; goto15_ctx11_x (lex) } , Jump :: J14 => { lex . bump_unchecked (1usize) ; goto14_ctx11_x (lex) } , Jump :: J19 => { lex . bump_unchecked (1usize) ; goto19_ctx11_x (lex) } , Jump :: J11 => { lex . bump_unchecked (1usize) ; goto11_x (lex) } , } } goto20 (lex) } }


--------------------------------------------------------------------------------
/logos-codegen/tests/data/simple/input.rs:
--------------------------------------------------------------------------------
1 | #[derive(Logos, Debug, Clone, Copy, PartialEq)]
2 | enum Token {
3 |     #[regex("a-z")]
4 |     Letter,
5 | }
6 | 


--------------------------------------------------------------------------------
/logos-codegen/tests/data/simple/output.rs:
--------------------------------------------------------------------------------
1 | impl < 's > :: logos :: Logos < 's > for Token { type Error = () ; type Extras = () ; type Source = str ; fn lex (lex : & mut :: logos :: Lexer < 's , Self >) { use :: logos :: internal :: { LexerInternal , CallbackResult } ; type Lexer < 's > = :: logos :: Lexer < 's , Token > ; fn _end < 's > (lex : & mut Lexer < 's >) { lex . end () } fn _error < 's > (lex : & mut Lexer < 's >) { lex . bump_unchecked (1) ; lex . error () ; } macro_rules ! _fast_loop { ($ lex : ident , $ test : ident , $ miss : expr) => { while let Some (arr) = $ lex . read :: < & [u8 ; 16] > () { if $ test (arr [0]) { if $ test (arr [1]) { if $ test (arr [2]) { if $ test (arr [3]) { if $ test (arr [4]) { if $ test (arr [5]) { if $ test (arr [6]) { if $ test (arr [7]) { if $ test (arr [8]) { if $ test (arr [9]) { if $ test (arr [10]) { if $ test (arr [11]) { if $ test (arr [12]) { if $ test (arr [13]) { if $ test (arr [14]) { if $ test (arr [15]) { $ lex . bump_unchecked (16) ; continue ; } $ lex . bump_unchecked (15) ; return $ miss ; } $ lex . bump_unchecked (14) ; return $ miss ; } $ lex . bump_unchecked (13) ; return $ miss ; } $ lex . bump_unchecked (12) ; return $ miss ; } $ lex . bump_unchecked (11) ; return $ miss ; } $ lex . bump_unchecked (10) ; return $ miss ; } $ lex . bump_unchecked (9) ; return $ miss ; } $ lex . bump_unchecked (8) ; return $ miss ; } $ lex . bump_unchecked (7) ; return $ miss ; } $ lex . bump_unchecked (6) ; return $ miss ; } $ lex . bump_unchecked (5) ; return $ miss ; } $ lex . bump_unchecked (4) ; return $ miss ; } $ lex . bump_unchecked (3) ; return $ miss ; } $ lex . bump_unchecked (2) ; return $ miss ; } $ lex . bump_unchecked (1) ; return $ miss ; } return $ miss ; } while $ lex . test ($ test) { $ lex . bump_unchecked (1) ; } $ miss } ; } # [inline] fn goto1_x < 's > (lex : & mut Lexer < 's >) { lex . set (Ok (Token :: Letter)) ; } # [inline] fn goto3_at1_with3 < 's > (lex : & mut Lexer < 's >) { match lex . read_at :: < & [u8 ; 2usize] > (1usize) { Some (b"-z") => { lex . bump_unchecked (3usize) ; goto1_x (lex) } , _ => _error (lex) , } } # [inline] fn goto4 < 's > (lex : & mut Lexer < 's >) { let arr = match lex . read :: < & [u8 ; 3usize] > () { Some (arr) => arr , None => return _end (lex) , } ; match arr [0] { b'a' => goto3_at1_with3 (lex) , _ => _error (lex) , } } goto4 (lex) } }


--------------------------------------------------------------------------------
/logos-derive/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [dependencies]
 2 | logos-codegen = {version = "0.15.0", path = "../logos-codegen"}
 3 | 
 4 | [features]
 5 | # Enables debug messages
 6 | debug = ["logos-codegen/debug"]
 7 | # Don't use or generate unsafe code
 8 | forbid_unsafe = ["logos-codegen/forbid_unsafe"]
 9 | 
10 | [lib]
11 | bench = false
12 | proc-macro = true
13 | 
14 | [package]
15 | name = "logos-derive"
16 | authors.workspace = true
17 | categories.workspace = true
18 | description.workspace = true
19 | edition.workspace = true
20 | homepage.workspace = true
21 | keywords.workspace = true
22 | license.workspace = true
23 | readme.workspace = true
24 | repository.workspace = true
25 | rust-version.workspace = true
26 | version.workspace = true
27 | 
28 | [package.metadata.release]
29 | shared-version = true
30 | 


--------------------------------------------------------------------------------
/logos-derive/LICENSE-APACHE:
--------------------------------------------------------------------------------
1 | ../LICENSE-APACHE


--------------------------------------------------------------------------------
/logos-derive/LICENSE-MIT:
--------------------------------------------------------------------------------
1 | ../LICENSE-MIT


--------------------------------------------------------------------------------
/logos-derive/src/lib.rs:
--------------------------------------------------------------------------------
1 | use proc_macro::TokenStream;
2 | 
3 | #[proc_macro_derive(Logos, attributes(logos, extras, error, end, token, regex))]
4 | pub fn logos(input: TokenStream) -> TokenStream {
5 |     logos_codegen::generate(input.into()).into()
6 | }
7 | 


--------------------------------------------------------------------------------
/logos.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maciejhirsz/logos/96765c0be188f3e8005b48db82bf1e904d2e6650/logos.png


--------------------------------------------------------------------------------
/release.toml:
--------------------------------------------------------------------------------
1 | pre-release-commit-message = "chore(version): bump logos version to {{version}}"
2 | push = false
3 | tag = false
4 | 


--------------------------------------------------------------------------------
/tests/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | edition.workspace = true
 3 | name = "tests"
 4 | publish = false
 5 | rust-version.workspace = true
 6 | version = "0.0.0"
 7 | 
 8 | [dependencies]
 9 | logos-derive = {path = "../logos-derive"}
10 | logos = {path = "../", default-features = false, features = ["std"]}
11 | 
12 | [features]
13 | forbid_unsafe = [
14 |     "logos-derive/forbid_unsafe",
15 |     "logos/forbid_unsafe"
16 | ]
17 | 
18 | [dev-dependencies]
19 | criterion = { version = "2.10.1", package = "codspeed-criterion-compat" }
20 | 
21 | [package.metadata.release]
22 | release = false
23 | 
24 | [[bench]]
25 | harness = false
26 | name = "bench"
27 | 


--------------------------------------------------------------------------------
/tests/benches/bench.rs:
--------------------------------------------------------------------------------
  1 | use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput};
  2 | use logos_derive::Logos;
  3 | 
  4 | #[derive(Debug, Clone, Copy, PartialEq, Logos)]
  5 | pub enum Token {
  6 |     #[regex(r"[ \n\t\f]", logos::skip)]
  7 |     InvalidToken,
  8 | 
  9 |     #[regex("[a-zA-Z_$][a-zA-Z0-9_$]*")]
 10 |     Identifier,
 11 | 
 12 |     #[regex(r#""([^"\\]|\\t|\\u|\\n|\\")*""#)]
 13 |     String,
 14 | 
 15 |     #[token("private")]
 16 |     Private,
 17 | 
 18 |     #[token("primitive")]
 19 |     Primitive,
 20 | 
 21 |     #[token("protected")]
 22 |     Protected,
 23 | 
 24 |     #[token("in")]
 25 |     In,
 26 | 
 27 |     #[token("instanceof")]
 28 |     Instanceof,
 29 | 
 30 |     #[token(".")]
 31 |     Accessor,
 32 | 
 33 |     #[token("...")]
 34 |     Ellipsis,
 35 | 
 36 |     #[token("(")]
 37 |     ParenOpen,
 38 | 
 39 |     #[token(")")]
 40 |     ParenClose,
 41 | 
 42 |     #[token("{")]
 43 |     BraceOpen,
 44 | 
 45 |     #[token("}")]
 46 |     BraceClose,
 47 | 
 48 |     #[token("+")]
 49 |     OpAddition,
 50 | 
 51 |     #[token("++")]
 52 |     OpIncrement,
 53 | 
 54 |     #[token("=")]
 55 |     OpAssign,
 56 | 
 57 |     #[token("==")]
 58 |     OpEquality,
 59 | 
 60 |     #[token("===")]
 61 |     OpStrictEquality,
 62 | 
 63 |     #[token("=>")]
 64 |     FatArrow,
 65 | }
 66 | 
 67 | static SOURCE: &str = "
 68 | foobar(protected primitive private instanceof in) { + ++ = == === => }
 69 | foobar(protected primitive private instanceof in) { + ++ = == === => }
 70 | foobar(protected primitive private instanceof in) { + ++ = == === => }
 71 | foobar(protected primitive private instanceof in) { + ++ = == === => }
 72 | foobar(protected primitive private instanceof in) { + ++ = == === => }
 73 | foobar(protected primitive private instanceof in) { + ++ = == === => }
 74 | foobar(protected primitive private instanceof in) { + ++ = == === => }
 75 | foobar(protected primitive private instanceof in) { + ++ = == === => }
 76 | foobar(protected primitive private instanceof in) { + ++ = == === => }
 77 | foobar(protected primitive private instanceof in) { + ++ = == === => }
 78 | foobar(protected primitive private instanceof in) { + ++ = == === => }
 79 | foobar(protected primitive private instanceof in) { + ++ = == === => }
 80 | foobar(protected primitive private instanceof in) { + ++ = == === => }
 81 | foobar(protected primitive private instanceof in) { + ++ = == === => }
 82 | foobar(protected primitive private instanceof in) { + ++ = == === => }
 83 | foobar(protected primitive private instanceof in) { + ++ = == === => }
 84 | foobar(protected primitive private instanceof in) { + ++ = == === => }
 85 | foobar(protected primitive private instanceof in) { + ++ = == === => }
 86 | foobar(protected primitive private instanceof in) { + ++ = == === => }
 87 | foobar(protected primitive private instanceof in) { + ++ = == === => }
 88 | foobar(protected primitive private instanceof in) { + ++ = == === => }
 89 | foobar(protected primitive private instanceof in) { + ++ = == === => }
 90 | foobar(protected primitive private instanceof in) { + ++ = == === => }
 91 | foobar(protected primitive private instanceof in) { + ++ = == === => }
 92 | foobar(protected primitive private instanceof in) { + ++ = == === => }
 93 | foobar(protected primitive private instanceof in) { + ++ = == === => }
 94 | foobar(protected primitive private instanceof in) { + ++ = == === => }
 95 | foobar(protected primitive private instanceof in) { + ++ = == === => }
 96 | foobar(protected primitive private instanceof in) { + ++ = == === => }
 97 | foobar(protected primitive private instanceof in) { + ++ = == === => }
 98 | ";
 99 | 
100 | static IDENTIFIERS: &str = "It was the year when they finally immanentized the Eschaton \
101 |                             It was the year when they finally immanentized the Eschaton \
102 |                             It was the year when they finally immanentized the Eschaton \
103 |                             It was the year when they finally immanentized the Eschaton \
104 |                             It was the year when they finally immanentized the Eschaton \
105 |                             It was the year when they finally immanentized the Eschaton \
106 |                             It was the year when they finally immanentized the Eschaton \
107 |                             It was the year when they finally immanentized the Eschaton \
108 |                             It was the year when they finally immanentized the Eschaton \
109 |                             It was the year when they finally immanentized the Eschaton \
110 |                             It was the year when they finally immanentized the Eschaton \
111 |                             It was the year when they finally immanentized the Eschaton \
112 |                             It was the year when they finally immanentized the Eschaton";
113 | 
114 | static STRINGS: &str = r#""tree" "to" "a" "graph" "that can" "more adequately represent" "loops and arbitrary state jumps" "with\"\"\"out" "the\n\n\n\n\n" "expl\"\"\"osive" "nature\"""of trying to build up all possible permutations in a tree." "tree" "to" "a" "graph" "that can" "more adequately represent" "loops and arbitrary state jumps" "with\"\"\"out" "the\n\n\n\n\n" "expl\"\"\"osive" "nature\"""of trying to build up all possible permutations in a tree." "tree" "to" "a" "graph" "that can" "more adequately represent" "loops and arbitrary state jumps" "with\"\"\"out" "the\n\n\n\n\n" "expl\"\"\"osive" "nature\"""of trying to build up all possible permutations in a tree." "tree" "to" "a" "graph" "that can" "more adequately represent" "loops and arbitrary state jumps" "with\"\"\"out" "the\n\n\n\n\n" "expl\"\"\"osive" "nature\"""of trying to build up all possible permutations in a tree.""#;
115 | 
116 | static CANDIDATES: [(&str, &str); 3] = [
117 |     ("identifiers", IDENTIFIERS),
118 |     ("keywords_operators_and_punctators", SOURCE),
119 |     ("strings", STRINGS),
120 | ];
121 | 
122 | #[allow(unused_must_use)]
123 | fn iterate(s: &str) {
124 |     use logos::Logos;
125 | 
126 |     let mut lex = Token::lexer(s);
127 | 
128 |     while let Some(token) = lex.next() {
129 |         black_box(token);
130 |     }
131 | }
132 | 
133 | fn count_ok(s: &str) -> usize {
134 |     use logos::Logos;
135 | 
136 |     Token::lexer(s).filter_map(|res| res.ok()).count()
137 | }
138 | 
139 | fn bench_iterate(c: &mut Criterion) {
140 |     let mut group = c.benchmark_group("iterate");
141 | 
142 |     for (name, source) in CANDIDATES {
143 |         group.throughput(Throughput::Bytes(source.len() as u64));
144 |         group.bench_with_input(name, &source, |b, &s| b.iter(|| iterate(s)));
145 |     }
146 | }
147 | 
148 | fn bench_count_ok(c: &mut Criterion) {
149 |     let mut group = c.benchmark_group("count_ok");
150 | 
151 |     for (name, source) in CANDIDATES {
152 |         group.throughput(Throughput::Bytes(source.len() as u64));
153 |         group.bench_with_input(name, &source, |b, &s| b.iter(|| count_ok(s)));
154 |     }
155 | }
156 | 
157 | criterion_group!(benches, bench_iterate, bench_count_ok);
158 | criterion_main!(benches);
159 | 


--------------------------------------------------------------------------------
/tests/src/lib.rs:
--------------------------------------------------------------------------------
  1 | //! ```compile_fail
  2 | //! use logos::Logos;
  3 | //! use logos_derive::Logos;
  4 | //!
  5 | //! #[derive(Logos)]
  6 | //! enum Token {
  7 | //!     #[token(b"\xFF")]
  8 | //!     NonUtf8,
  9 | //! }
 10 | //!
 11 | //! Token::lexer("This shouldn't work with a string literal!");
 12 | //! ```
 13 | //!
 14 | //! Same, but with regex:
 15 | //!
 16 | //! ```compile_fail
 17 | //! use logos::Logos;
 18 | //! use logos_derive::Logos;
 19 | //!
 20 | //! #[derive(Logos)]
 21 | //! enum Token {
 22 | //!     #[regex(b"\xFF")]
 23 | //!     NonUtf8,
 24 | //! }
 25 | //!
 26 | //! Token::lexer("This shouldn't work with a string literal!");
 27 | //! ```
 28 | //!
 29 | //! Matching against .* (or .+) should fail to compile:
 30 | //!
 31 | //! ```compile_fail
 32 | //! use logos::Logos;
 33 | //! use logos_derive::Logos;
 34 | //!
 35 | //! #[derive(Logos, Debug, PartialEq)]
 36 | //! enum Token {
 37 | //!     #[regex(r"\(.*\)")]
 38 | //!     BetweenParen,
 39 | //!
 40 | //! }
 41 | //! ```
 42 | //!
 43 | //! ```compile_fail
 44 | //! use logos::Logos;
 45 | //! use logos_derive::Logos;
 46 | //!
 47 | //! #[derive(Logos, Debug, PartialEq)]
 48 | //! enum Token {
 49 | //!     #[regex(r"\(.+\)")]
 50 | //!     BetweenParen,
 51 | //!
 52 | //! }
 53 | //! ```
 54 | //!
 55 | //! And also when working with bytes:
 56 | //!
 57 | //! ```compile_fail
 58 | //! use logos::Logos;
 59 | //! use logos_derive::Logos;
 60 | //!
 61 | //! #[derive(Logos, Debug, PartialEq)]
 62 | //! enum Token {
 63 | //!     #[regex(b"\x00.*")]
 64 | //!     NonUtf8,
 65 | //!
 66 | //! }
 67 | //! ```
 68 | //!
 69 | //! ```compile_fail
 70 | //! use logos::Logos;
 71 | //! use logos_derive::Logos;
 72 | //!
 73 | //! #[derive(Logos, Debug, PartialEq)]
 74 | //! enum Token {
 75 | //!     #[regex(b"\x00.+")]
 76 | //!     NonUtf8,
 77 | //!
 78 | //! }
 79 | //! ```
 80 | use logos::source::Source;
 81 | use logos::Logos;
 82 | 
 83 | use std::fmt;
 84 | use std::ops::Range;
 85 | 
 86 | #[allow(clippy::type_complexity)]
 87 | pub fn assert_lex<'a, Token>(
 88 |     source: &'a Token::Source,
 89 |     tokens: &[(
 90 |         Result<Token, Token::Error>,
 91 |         <Token::Source as Source>::Slice<'a>,
 92 |         Range<usize>,
 93 |     )],
 94 | ) where
 95 |     Token: Logos<'a> + fmt::Debug + PartialEq,
 96 |     Token::Extras: Default,
 97 | {
 98 |     let mut lex = Token::lexer(source);
 99 | 
100 |     for tuple in tokens {
101 |         assert_eq!(
102 |             &(lex.next().expect("Unexpected end"), lex.slice(), lex.span()),
103 |             tuple
104 |         );
105 |     }
106 | 
107 |     assert_eq!(lex.next(), None);
108 | }
109 | 


--------------------------------------------------------------------------------
/tests/tests/binary.rs:
--------------------------------------------------------------------------------
 1 | use logos_derive::Logos;
 2 | use tests::assert_lex;
 3 | 
 4 | #[derive(Logos, Debug, Clone, Copy, PartialEq)]
 5 | enum Token {
 6 |     #[token("foo")]
 7 |     Foo,
 8 | 
 9 |     #[regex(b"\x42+")]
10 |     Life,
11 | 
12 |     #[regex(b"[\xA0-\xAF]+")]
13 |     Aaaaaaa,
14 | 
15 |     #[token(b"\xCA\xFE\xBE\xEF")]
16 |     CafeBeef,
17 | 
18 |     #[token(b"\x00")]
19 |     Zero,
20 | }
21 | 
22 | #[test]
23 | fn handles_non_utf8() {
24 |     assert_lex(
25 |         &[
26 |             0, 0, 0xCA, 0xFE, 0xBE, 0xEF, b'f', b'o', b'o', 0x42, 0x42, 0x42, 0xAA, 0xAA, 0xA2,
27 |             0xAE, 0x10, 0x20, 0,
28 |         ][..],
29 |         &[
30 |             (Ok(Token::Zero), &[0], 0..1),
31 |             (Ok(Token::Zero), &[0], 1..2),
32 |             (Ok(Token::CafeBeef), &[0xCA, 0xFE, 0xBE, 0xEF], 2..6),
33 |             (Ok(Token::Foo), b"foo", 6..9),
34 |             (Ok(Token::Life), &[0x42, 0x42, 0x42], 9..12),
35 |             (Ok(Token::Aaaaaaa), &[0xAA, 0xAA, 0xA2, 0xAE], 12..16),
36 |             (Err(()), &[0x10], 16..17),
37 |             (Err(()), &[0x20], 17..18),
38 |             (Ok(Token::Zero), &[0], 18..19),
39 |         ],
40 |     );
41 | }
42 | 


--------------------------------------------------------------------------------
/tests/tests/callbacks.rs:
--------------------------------------------------------------------------------
  1 | use logos::{Lexer, Logos as _, Skip};
  2 | use logos_derive::Logos;
  3 | use tests::assert_lex;
  4 | 
  5 | #[derive(Default, Debug, Clone, PartialEq)]
  6 | enum LexingError {
  7 |     ParseNumberError,
  8 |     #[default]
  9 |     Other,
 10 | }
 11 | 
 12 | impl From<std::num::ParseIntError> for LexingError {
 13 |     fn from(_: std::num::ParseIntError) -> Self {
 14 |         LexingError::ParseNumberError
 15 |     }
 16 | }
 17 | 
 18 | impl From<std::num::ParseFloatError> for LexingError {
 19 |     fn from(_: std::num::ParseFloatError) -> Self {
 20 |         LexingError::ParseNumberError
 21 |     }
 22 | }
 23 | 
 24 | mod data {
 25 |     use super::*;
 26 | 
 27 |     #[derive(Logos, Debug, PartialEq)]
 28 |     #[logos(error = LexingError)]
 29 |     #[logos(skip r"[ \t\n\f]+")]
 30 |     enum Token<'a> {
 31 |         #[regex(r"[a-zA-Z]+", |lex| lex.slice())]
 32 |         Text(&'a str),
 33 | 
 34 |         #[regex(r"-?[0-9]+", |lex| lex.slice().parse())]
 35 |         Integer(i64),
 36 | 
 37 |         #[regex(r"-?[0-9]+\.[0-9]+", |lex| lex.slice().parse())]
 38 |         Float(f64),
 39 |     }
 40 | 
 41 |     #[test]
 42 |     fn numbers() {
 43 |         let tokens: Vec<_> = Token::lexer("Hello 1 42 -100 pi 3.14 -77.77").collect();
 44 | 
 45 |         assert_eq!(
 46 |             tokens,
 47 |             &[
 48 |                 Ok(Token::Text("Hello")),
 49 |                 Ok(Token::Integer(1)),
 50 |                 Ok(Token::Integer(42)),
 51 |                 Ok(Token::Integer(-100)),
 52 |                 Ok(Token::Text("pi")),
 53 |                 Ok(Token::Float(3.14)),
 54 |                 Ok(Token::Float(-77.77)),
 55 |             ]
 56 |         );
 57 |     }
 58 | }
 59 | 
 60 | mod nested_lifetime {
 61 |     use super::*;
 62 |     use std::borrow::Cow;
 63 | 
 64 |     #[derive(Logos, Debug, PartialEq)]
 65 |     #[logos(error = LexingError)]
 66 |     #[logos(skip r"[ \t\n\f]+")]
 67 |     enum Token<'a> {
 68 |         #[regex(r"[0-9]+", |lex| {
 69 |             let slice = lex.slice();
 70 | 
 71 |             slice.parse::<u64>().map(|n| {
 72 |                 (slice, n)
 73 |             })
 74 |         })]
 75 |         Integer((&'a str, u64)),
 76 | 
 77 |         #[regex(r"[a-z]+", |lex| Cow::Borrowed(lex.slice()))]
 78 |         Text(Cow<'a, str>),
 79 |     }
 80 | 
 81 |     #[test]
 82 |     fn supplement_lifetime_in_types() {
 83 |         let tokens: Vec<_> = Token::lexer("123 hello 42").collect();
 84 | 
 85 |         assert_eq!(
 86 |             tokens,
 87 |             &[
 88 |                 Ok(Token::Integer(("123", 123))),
 89 |                 Ok(Token::Text(Cow::Borrowed("hello"))),
 90 |                 Ok(Token::Integer(("42", 42))),
 91 |             ],
 92 |         );
 93 |     }
 94 | }
 95 | 
 96 | mod rust {
 97 |     use super::*;
 98 | 
 99 |     /// Adaptation of implementation by matklad:
100 |     /// https://github.com/matklad/fall/blob/527ab331f82b8394949041bab668742868c0c282/lang/rust/syntax/src/rust.fall#L1294-L1324
101 |     fn parse_raw_string(lexer: &mut Lexer<Token>) -> bool {
102 |         // Who needs more then 25 hashes anyway? :)
103 |         let q_hashes = concat!('"', "######", "######", "######", "######", "######");
104 |         let closing = &q_hashes[..lexer.slice().len() - 1]; // skip initial 'r'
105 | 
106 |         lexer
107 |             .remainder()
108 |             .find(closing)
109 |             .map(|i| lexer.bump(i + closing.len()))
110 |             .is_some()
111 |     }
112 | 
113 |     #[derive(Logos, Debug, Clone, Copy, PartialEq)]
114 |     #[logos(error = LexingError)]
115 |     #[logos(skip r"[ \t\n\f]+")]
116 |     enum Token {
117 |         #[regex("[a-zA-Z_][a-zA-Z0-9_]*")]
118 |         Ident,
119 | 
120 |         #[regex("r#*\"", parse_raw_string)]
121 |         RawString,
122 |     }
123 | 
124 |     #[test]
125 |     fn raw_strings() {
126 |         assert_lex(
127 |             " r\"foo\" r#\"bar\"# r#####\"baz\"##### r###\"error\"## ",
128 |             &[
129 |                 (Ok(Token::RawString), "r\"foo\"", 1..7),
130 |                 (Ok(Token::RawString), "r#\"bar\"#", 8..16),
131 |                 (Ok(Token::RawString), "r#####\"baz\"#####", 17..33),
132 |                 (Err(LexingError::Other), "r###\"", 34..39),
133 |                 (Ok(Token::Ident), "error", 39..44),
134 |                 (Err(LexingError::Other), "\"", 44..45),
135 |                 (Err(LexingError::Other), "#", 45..46),
136 |                 (Err(LexingError::Other), "#", 46..47),
137 |             ],
138 |         );
139 |     }
140 | }
141 | 
142 | mod any_token_callback {
143 |     use super::*;
144 | 
145 |     // Adaption of data test for (_) -> Token callbacks
146 |     #[derive(Logos, Debug, PartialEq)]
147 |     #[logos(skip r"[ \t\n\f]+")]
148 |     enum Token {
149 |         #[regex(r"[a-zA-Z]+", |_| Token::Text)]
150 |         #[regex(r"-?[0-9]+", |_| Token::Integer)]
151 |         #[regex(r"-?[0-9]+\.[0-9]+", |_| Token::Float)]
152 |         Text,
153 |         Integer,
154 |         Float,
155 |     }
156 | 
157 |     #[test]
158 |     fn any_token_callback() {
159 |         let tokens: Vec<_> = Token::lexer("Hello 1 42 -100 pi 3.14 -77.77").collect();
160 | 
161 |         assert_eq!(
162 |             tokens,
163 |             &[
164 |                 Ok(Token::Text),
165 |                 Ok(Token::Integer),
166 |                 Ok(Token::Integer),
167 |                 Ok(Token::Integer),
168 |                 Ok(Token::Text),
169 |                 Ok(Token::Float),
170 |                 Ok(Token::Float),
171 |             ]
172 |         );
173 |     }
174 | }
175 | 
176 | mod return_result_skip {
177 |     use super::*;
178 | 
179 |     #[derive(Debug, Default, PartialEq, Clone)]
180 |     enum LexerError {
181 |         UnterminatedComment,
182 |         #[default]
183 |         Other,
184 |     }
185 | 
186 |     #[derive(Logos, Debug, PartialEq)]
187 |     #[logos(skip r"[ \t\n\f]+")]
188 |     #[logos(error = LexerError)]
189 |     enum Token<'src> {
190 |         #[regex(r"<[a-zA-Z0-9-]+>", |lex| &lex.slice()[1..lex.slice().len()-1])]
191 |         Tag(&'src str),
192 | 
193 |         #[token("<!--", skip_comment)]
194 |         Comment,
195 |     }
196 | 
197 |     fn skip_comment<'src>(lexer: &mut Lexer<'src, Token<'src>>) -> Result<Skip, LexerError> {
198 |         let end = lexer
199 |             .remainder()
200 |             .find("-->")
201 |             .ok_or(LexerError::UnterminatedComment)?;
202 |         lexer.bump(end + 3);
203 | 
204 |         Ok(Skip)
205 |     }
206 | 
207 |     #[test]
208 |     fn return_result_skip() {
209 |         let mut lexer = Token::lexer("<foo> <!-- comment --> <bar>");
210 |         assert_eq!(lexer.next(), Some(Ok(Token::Tag("foo"))));
211 |         assert_eq!(lexer.next(), Some(Ok(Token::Tag("bar"))));
212 |         assert_eq!(lexer.next(), None);
213 | 
214 |         let mut lexer = Token::lexer("<foo> <!-- unterminated comment");
215 |         assert_eq!(lexer.next(), Some(Ok(Token::Tag("foo"))));
216 |         assert_eq!(lexer.next(), Some(Err(LexerError::UnterminatedComment)));
217 |     }
218 | }
219 | 


--------------------------------------------------------------------------------
/tests/tests/clone.rs:
--------------------------------------------------------------------------------
 1 | use std::cell::Cell;
 2 | 
 3 | use logos::Logos as _;
 4 | use logos_derive::Logos;
 5 | 
 6 | #[derive(Logos, Clone, Debug, PartialEq)]
 7 | pub enum Token {
 8 |     #[token("a", |_| Evil::default())]
 9 |     Evil(Evil),
10 | }
11 | 
12 | #[derive(Debug, Default, PartialEq)]
13 | pub struct Evil(Box<Cell<u8>>);
14 | 
15 | impl Clone for Evil {
16 |     fn clone(&self) -> Self {
17 |         self.0.set(self.0.get() + 1);
18 |         Self::default()
19 |     }
20 | }
21 | 
22 | #[test]
23 | fn test_it_works_without_cloning() {
24 |     let mut lexer = Token::lexer("aaa");
25 |     assert_eq!(lexer.next(), Some(Ok(Token::Evil(Evil::default()))));
26 |     assert_eq!(lexer.next(), Some(Ok(Token::Evil(Evil::default()))));
27 |     assert_eq!(lexer.next(), Some(Ok(Token::Evil(Evil::default()))));
28 |     assert_eq!(lexer.next(), None);
29 | }
30 | 
31 | #[test]
32 | fn test_clone_ub() {
33 |     let mut lexer = Token::lexer("a");
34 |     assert_eq!(lexer.next(), Some(Ok(Token::Evil(Evil::default()))));
35 | 
36 |     // In logos 0.14.1, this causes use-after-free (UB),
37 |     // because `Clone` dereferences the value returned by the last call to `lexer.next()`,
38 |     // which got deallocated.
39 |     // A real-life example where this could happen is with `Rc`.
40 |     // Note that it may still pass `cargo test`, it will fail with Miri.
41 |     let mut lexer2 = lexer.clone();
42 | 
43 |     assert_eq!(lexer2.next(), None);
44 | }
45 | 
46 | #[test]
47 | fn test_clone_leak() {
48 |     let mut lexer = Token::lexer("a");
49 |     let Some(Ok(Token::Evil(evil))) = lexer.next() else {
50 |         panic!("Expected Token::Evil");
51 |     };
52 |     assert_eq!(evil.0.get(), 0);
53 | 
54 |     // In logos 0.14.1, this causes a memory leak because `evil` is cloned with `lexer`.
55 |     // This produces `evil.0.get() == 1`. It will fail even on `cargo test`.
56 |     let mut lexer2 = lexer.clone();
57 |     assert_eq!(evil.0.get(), 0);
58 | 
59 |     assert_eq!(lexer2.next(), None);
60 |     let _ = evil.clone();
61 |     assert_eq!(evil.0.get(), 1);
62 | }
63 | 


--------------------------------------------------------------------------------
/tests/tests/crate_.rs:
--------------------------------------------------------------------------------
 1 | use logos_derive::Logos;
 2 | use tests::assert_lex;
 3 | 
 4 | mod some {
 5 |     pub mod path {
 6 |         pub use logos as _logos;
 7 |     }
 8 | }
 9 | 
10 | #[derive(Logos, Debug, Clone, Copy, PartialEq)]
11 | #[logos(crate = some::path::_logos)]
12 | #[logos(skip r"[ \t\n\f]+")]
13 | enum Token {
14 |     #[regex("-?[0-9]+")]
15 |     LiteralInteger,
16 | 
17 |     #[token("'")]
18 |     SingleQuote,
19 | }
20 | 
21 | #[test]
22 | fn simple() {
23 |     assert_lex(
24 |         "' -1'2  '",
25 |         &[
26 |             (Ok(Token::SingleQuote), "'", 0..1),
27 |             (Ok(Token::LiteralInteger), "-1", 2..4),
28 |             (Ok(Token::SingleQuote), "'", 4..5),
29 |             (Ok(Token::LiteralInteger), "2", 5..6),
30 |             (Ok(Token::SingleQuote), "'", 8..9),
31 |         ],
32 |     );
33 | }
34 | 


--------------------------------------------------------------------------------
/tests/tests/css.rs:
--------------------------------------------------------------------------------
 1 | use logos_derive::Logos;
 2 | 
 3 | #[derive(Logos, Debug, Clone, Copy, PartialEq)]
 4 | #[logos(skip r"[ \t\n\f]+")]
 5 | enum Token {
 6 |     #[regex("em|ex|ch|rem|vw|vh|vmin|vmax")]
 7 |     RelativeLength,
 8 | 
 9 |     #[regex("cm|mm|Q|in|pc|pt|px", priority = 3)]
10 |     AbsoluteLength,
11 | 
12 |     #[regex("[+-]?[0-9]*[.]?[0-9]+(?:[eE][+-]?[0-9]+)?", priority = 3)]
13 |     Number,
14 | 
15 |     #[regex("[-a-zA-Z_][a-zA-Z0-9_-]*")]
16 |     Ident,
17 | 
18 |     #[token("{")]
19 |     CurlyBracketOpen,
20 | 
21 |     #[token("}")]
22 |     CurlyBracketClose,
23 | 
24 |     #[token(":")]
25 |     Colon,
26 | }
27 | 
28 | mod css {
29 |     use super::*;
30 |     use tests::assert_lex;
31 | 
32 |     #[test]
33 |     fn test_line_height() {
34 |         assert_lex(
35 |             "h2 { line-height: 3cm }",
36 |             &[
37 |                 (Ok(Token::Ident), "h2", 0..2),
38 |                 (Ok(Token::CurlyBracketOpen), "{", 3..4),
39 |                 (Ok(Token::Ident), "line-height", 5..16),
40 |                 (Ok(Token::Colon), ":", 16..17),
41 |                 (Ok(Token::Number), "3", 18..19),
42 |                 (Ok(Token::AbsoluteLength), "cm", 19..21),
43 |                 (Ok(Token::CurlyBracketClose), "}", 22..23),
44 |             ],
45 |         );
46 |     }
47 | 
48 |     #[test]
49 |     fn test_word_spacing() {
50 |         assert_lex(
51 |             "h3 { word-spacing: 4mm }",
52 |             &[
53 |                 (Ok(Token::Ident), "h3", 0..2),
54 |                 (Ok(Token::CurlyBracketOpen), "{", 3..4),
55 |                 (Ok(Token::Ident), "word-spacing", 5..17),
56 |                 (Ok(Token::Colon), ":", 17..18),
57 |                 (Ok(Token::Number), "4", 19..20),
58 |                 (Ok(Token::AbsoluteLength), "mm", 20..22),
59 |                 (Ok(Token::CurlyBracketClose), "}", 23..24),
60 |             ],
61 |         );
62 |     }
63 | 
64 |     #[test]
65 |     fn test_letter_spacing() {
66 |         assert_lex(
67 |             "h3 { letter-spacing: 42em }",
68 |             &[
69 |                 (Ok(Token::Ident), "h3", 0..2),
70 |                 (Ok(Token::CurlyBracketOpen), "{", 3..4),
71 |                 (Ok(Token::Ident), "letter-spacing", 5..19),
72 |                 (Ok(Token::Colon), ":", 19..20),
73 |                 (Ok(Token::Number), "42", 21..23),
74 |                 (Ok(Token::RelativeLength), "em", 23..25),
75 |                 (Ok(Token::CurlyBracketClose), "}", 26..27),
76 |             ],
77 |         );
78 |     }
79 | }
80 | 


--------------------------------------------------------------------------------
/tests/tests/custom_error.rs:
--------------------------------------------------------------------------------
 1 | use logos_derive::Logos;
 2 | use std::num::{IntErrorKind, ParseIntError};
 3 | use tests::assert_lex;
 4 | 
 5 | #[derive(Debug, Clone, PartialEq, Default)]
 6 | enum LexingError {
 7 |     NumberTooLong,
 8 |     NumberNotEven(u32),
 9 |     #[default]
10 |     Other,
11 | }
12 | 
13 | impl From<ParseIntError> for LexingError {
14 |     fn from(value: ParseIntError) -> Self {
15 |         match value.kind() {
16 |             IntErrorKind::PosOverflow => LexingError::NumberTooLong,
17 |             _ => LexingError::Other,
18 |         }
19 |     }
20 | }
21 | 
22 | fn parse_number(input: &str) -> Result<u32, LexingError> {
23 |     let num = input.parse::<u32>()?;
24 |     if num % 2 == 0 {
25 |         Ok(num)
26 |     } else {
27 |         Err(LexingError::NumberNotEven(num))
28 |     }
29 | }
30 | 
31 | #[derive(Logos, Debug, Clone, Copy, PartialEq)]
32 | #[logos(error = LexingError)]
33 | enum Token<'a> {
34 |     #[regex(r"[0-9]+", |lex| parse_number(lex.slice()))]
35 |     Number(u32),
36 |     #[regex(r"[a-zA-Z_]+")]
37 |     Identifier(&'a str),
38 | }
39 | 
40 | #[test]
41 | fn test() {
42 |     assert_lex(
43 |         "123abc1234xyz1111111111111111111111111111111111111111111111111111111,",
44 |         &[
45 |             (Err(LexingError::NumberNotEven(123)), "123", 0..3),
46 |             (Ok(Token::Identifier("abc")), "abc", 3..6),
47 |             (Ok(Token::Number(1234)), "1234", 6..10),
48 |             (Ok(Token::Identifier("xyz")), "xyz", 10..13),
49 |             (
50 |                 Err(LexingError::NumberTooLong),
51 |                 "1111111111111111111111111111111111111111111111111111111",
52 |                 13..68,
53 |             ),
54 |             (Err(LexingError::Other), ",", 68..69),
55 |         ],
56 |     );
57 | }
58 | 


--------------------------------------------------------------------------------
/tests/tests/lexer_modes.rs:
--------------------------------------------------------------------------------
  1 | use logos::Lexer;
  2 | use logos::Logos as _;
  3 | use logos_derive::Logos;
  4 | 
  5 | #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, Logos)]
  6 | enum Outer {
  7 |     #[token("\"")]
  8 |     StartString,
  9 | 
 10 |     #[regex(r"\p{White_Space}")]
 11 |     WhiteSpace,
 12 | }
 13 | 
 14 | #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, Logos)]
 15 | enum Inner {
 16 |     #[regex(r#"[^\\"]+"#)]
 17 |     Text,
 18 | 
 19 |     #[token("\\n")]
 20 |     EscapedNewline,
 21 | 
 22 |     #[regex(r"\\u\{[^}]*\}")]
 23 |     EscapedCodepoint,
 24 | 
 25 |     #[regex(r"\\[0-7]{1,3}")]
 26 |     EscapedOctal,
 27 | 
 28 |     #[token(r#"\""#)]
 29 |     EscapedQuote,
 30 | 
 31 |     #[token("\"")]
 32 |     EndString,
 33 | }
 34 | 
 35 | #[test]
 36 | fn main() {
 37 |     let s = r#""Hello W\u{00f4}rld\n""#;
 38 |     let mut outer = Outer::lexer(s);
 39 | 
 40 |     // The outer lexer has picked up the initial quote character
 41 |     assert_eq!(outer.next(), Some(Ok(Outer::StartString)));
 42 | 
 43 |     // We've entered a string, parser creates sublexer
 44 |     let mut inner = outer.morph();
 45 |     assert_eq!(inner.next(), Some(Ok(Inner::Text)));
 46 |     assert_eq!(inner.next(), Some(Ok(Inner::EscapedCodepoint)));
 47 |     assert_eq!(inner.next(), Some(Ok(Inner::Text)));
 48 |     assert_eq!(inner.next(), Some(Ok(Inner::EscapedNewline)));
 49 |     assert_eq!(inner.next(), Some(Ok(Inner::EndString)));
 50 | 
 51 |     // We've exited the string, parser returns to outer lexer
 52 |     outer = inner.morph();
 53 |     assert_eq!(outer.next(), None);
 54 | }
 55 | 
 56 | enum Modes<'source> {
 57 |     Outer(Lexer<'source, Outer>),
 58 |     Inner(Lexer<'source, Inner>),
 59 | }
 60 | 
 61 | impl<'source> Modes<'source> {
 62 |     fn new(s: &'source str) -> Self {
 63 |         Self::Outer(Outer::lexer(s))
 64 |     }
 65 | }
 66 | 
 67 | #[derive(Debug, PartialEq, Eq)]
 68 | enum Tokens {
 69 |     InnerToken(Inner),
 70 |     OuterToken(Outer),
 71 | }
 72 | 
 73 | struct ModeBridge<'source> {
 74 |     mode: Modes<'source>,
 75 | }
 76 | 
 77 | // Clones as we switch between modes
 78 | impl<'source> Iterator for ModeBridge<'source> {
 79 |     type Item = Result<Tokens, ()>;
 80 |     fn next(&mut self) -> Option<Self::Item> {
 81 |         use Tokens::*;
 82 |         match &mut self.mode {
 83 |             Modes::Inner(inner) => {
 84 |                 let result = inner.next();
 85 |                 if Some(Ok(Inner::EndString)) == result {
 86 |                     self.mode = Modes::Outer(inner.to_owned().morph());
 87 |                 }
 88 |                 result.map(|inner| inner.map(InnerToken))
 89 |             }
 90 |             Modes::Outer(outer) => {
 91 |                 let result = outer.next();
 92 |                 if Some(Ok(Outer::StartString)) == result {
 93 |                     self.mode = Modes::Inner(outer.to_owned().morph());
 94 |                 }
 95 |                 result.map(|outer| outer.map(OuterToken))
 96 |             }
 97 |         }
 98 |     }
 99 | }
100 | 
101 | #[test]
102 | fn iterating_modes() {
103 |     use Inner::*;
104 |     use Tokens::*;
105 |     let s = r#""Hello W\u{00f4}\162ld\n""#;
106 |     let moded = ModeBridge {
107 |         mode: Modes::new(s),
108 |     };
109 | 
110 |     let results: Vec<Result<Tokens, ()>> = moded.collect();
111 |     let expect = vec![
112 |         Ok(OuterToken(Outer::StartString)),
113 |         Ok(InnerToken(Text)),
114 |         Ok(InnerToken(EscapedCodepoint)),
115 |         Ok(InnerToken(EscapedOctal)),
116 |         Ok(InnerToken(Text)),
117 |         Ok(InnerToken(EscapedNewline)),
118 |         Ok(InnerToken(EndString)),
119 |     ];
120 |     assert_eq!(results, expect);
121 | }
122 | 


--------------------------------------------------------------------------------
/tests/tests/properties.rs:
--------------------------------------------------------------------------------
 1 | use logos_derive::Logos;
 2 | use tests::assert_lex;
 3 | 
 4 | mod binary;
 5 | mod custom_error;
 6 | 
 7 | #[derive(Logos, Debug, Clone, Copy, PartialEq)]
 8 | #[logos(skip r"[ \t\n\f]+")]
 9 | enum Token {
10 |     #[regex(r"[a-zA-Z]+")]
11 |     Ascii,
12 | 
13 |     #[regex(r"\p{Greek}+")]
14 |     Greek,
15 | 
16 |     #[regex(r"\p{Cyrillic}+")]
17 |     Cyrillic,
18 | }
19 | 
20 | #[test]
21 | fn greek() {
22 |     assert_lex(
23 |         "λόγος can do unicode",
24 |         &[
25 |             (Ok(Token::Greek), "λόγος", 0..10),
26 |             (Ok(Token::Ascii), "can", 11..14),
27 |             (Ok(Token::Ascii), "do", 15..17),
28 |             (Ok(Token::Ascii), "unicode", 18..25),
29 |         ],
30 |     )
31 | }
32 | 
33 | #[test]
34 | fn cyrillic() {
35 |     assert_lex(
36 |         "До свидания",
37 |         &[
38 |             (Ok(Token::Cyrillic), "До", 0..4),
39 |             (Ok(Token::Cyrillic), "свидания", 5..21),
40 |         ],
41 |     )
42 | }
43 | 


--------------------------------------------------------------------------------
/tests/tests/source.rs:
--------------------------------------------------------------------------------
 1 | use std::ops::Range;
 2 | 
 3 | use logos::{Logos as _, Source};
 4 | use logos_derive::Logos;
 5 | 
 6 | struct RefSource<'s, S: ?Sized + Source>(&'s S);
 7 | 
 8 | impl<'s, S: ?Sized + Source> Source for RefSource<'s, S> {
 9 |     type Slice<'a>
10 |         = S::Slice<'a>
11 |     where
12 |         's: 'a;
13 | 
14 |     fn len(&self) -> usize {
15 |         self.0.len()
16 |     }
17 | 
18 |     fn read<'a, Chunk>(&'a self, offset: usize) -> Option<Chunk>
19 |     where
20 |         Chunk: logos::source::Chunk<'a>,
21 |     {
22 |         self.0.read(offset)
23 |     }
24 | 
25 |     #[cfg(not(feature = "forbid_unsafe"))]
26 |     unsafe fn read_byte_unchecked(&self, offset: usize) -> u8 {
27 |         self.0.read_byte_unchecked(offset)
28 |     }
29 | 
30 |     #[cfg(feature = "forbid_unsafe")]
31 |     fn read_byte(&self, offset: usize) -> u8 {
32 |         self.0.read_byte(offset)
33 |     }
34 | 
35 |     fn slice(&self, range: Range<usize>) -> Option<Self::Slice<'_>> {
36 |         self.0.slice(range)
37 |     }
38 | 
39 |     #[cfg(not(feature = "forbid_unsafe"))]
40 |     unsafe fn slice_unchecked(&self, range: Range<usize>) -> Self::Slice<'_> {
41 |         self.0.slice_unchecked(range)
42 |     }
43 | 
44 |     fn is_boundary(&self, index: usize) -> bool {
45 |         self.0.is_boundary(index)
46 |     }
47 | }
48 | 
49 | /// A simple regression test that it is possible to define a custom source.
50 | ///
51 | /// Note that currently parenthesis are required around types with multiple
52 | /// generic arguments.
53 | #[derive(Logos, Debug, Clone, Copy, PartialEq)]
54 | #[logos(source = (RefSource<'s, str>))]
55 | enum Token {
56 |     #[regex(".")]
57 |     Char,
58 | }
59 | 
60 | #[test]
61 | fn custom_source() {
62 |     let source = RefSource("abc");
63 |     let mut lex = Token::lexer(&source);
64 | 
65 |     assert_eq!(lex.next(), Some(Ok(Token::Char)));
66 |     assert_eq!(lex.next(), Some(Ok(Token::Char)));
67 |     assert_eq!(lex.next(), Some(Ok(Token::Char)));
68 |     assert_eq!(lex.next(), None);
69 | }
70 | 


--------------------------------------------------------------------------------
/tests/tests/string.rs:
--------------------------------------------------------------------------------
  1 | use logos::{Lexer, Logos as _};
  2 | use logos_derive::Logos;
  3 | 
  4 | #[derive(Logos, Clone, Debug, PartialEq)]
  5 | #[logos(skip " ")]
  6 | pub enum Token {
  7 |     #[regex(r#""([^"\\]+|\\.)*""#, lex_single_line_string)]
  8 |     String(String),
  9 | }
 10 | 
 11 | #[test]
 12 | fn test_it_works_without_cloning() {
 13 |     let mut lexer = Token::lexer(r#""Hello, world!" "foo😀bar\nbaz \x3F\u{1234}""#);
 14 |     assert_eq!(
 15 |         lexer.next(),
 16 |         Some(Ok(Token::String("Hello, world!".to_string())))
 17 |     );
 18 |     assert_eq!(
 19 |         lexer.next(),
 20 |         Some(Ok(Token::String("foo😀bar\nbaz \x3F\u{1234}".to_string())))
 21 |     );
 22 |     assert_eq!(lexer.next(), None);
 23 | }
 24 | 
 25 | #[test]
 26 | fn test_it_works_with_cloning() {
 27 |     let mut lexer = Token::lexer(r#""Hello, world!" "foo😀bar\nbaz \x3F\u{1234}""#);
 28 |     let mut lexer2 = lexer.clone();
 29 |     assert_eq!(
 30 |         lexer2.next(),
 31 |         Some(Ok(Token::String("Hello, world!".to_string())))
 32 |     );
 33 |     let mut lexer3 = lexer2.clone();
 34 |     let mut lexer4 = lexer3.clone();
 35 |     assert_eq!(
 36 |         lexer3.next(),
 37 |         Some(Ok(Token::String("foo😀bar\nbaz \x3F\u{1234}".to_string())))
 38 |     );
 39 |     assert_eq!(
 40 |         lexer4.next(),
 41 |         Some(Ok(Token::String("foo😀bar\nbaz \x3F\u{1234}".to_string())))
 42 |     );
 43 |     assert_eq!(lexer4.next(), None);
 44 |     let mut lexer5 = lexer.clone();
 45 |     assert_eq!(
 46 |         lexer5.next(),
 47 |         Some(Ok(Token::String("Hello, world!".to_string())))
 48 |     );
 49 |     assert_eq!(
 50 |         lexer5.next(),
 51 |         Some(Ok(Token::String("foo😀bar\nbaz \x3F\u{1234}".to_string())))
 52 |     );
 53 |     assert_eq!(
 54 |         lexer.next(),
 55 |         Some(Ok(Token::String("Hello, world!".to_string())))
 56 |     );
 57 |     assert_eq!(lexer5.next(), None);
 58 |     assert_eq!(
 59 |         lexer2.next(),
 60 |         Some(Ok(Token::String("foo😀bar\nbaz \x3F\u{1234}".to_string())))
 61 |     );
 62 |     assert_eq!(lexer2.next(), None);
 63 |     assert_eq!(lexer3.next(), None);
 64 |     assert_eq!(
 65 |         lexer.next(),
 66 |         Some(Ok(Token::String("foo😀bar\nbaz \x3F\u{1234}".to_string())))
 67 |     );
 68 |     assert_eq!(lexer.next(), None);
 69 | }
 70 | 
 71 | // Not important
 72 | pub fn lex_single_line_string(lexer: &mut Lexer<Token>) -> Result<String, ()> {
 73 |     let mut string = String::new();
 74 |     let mut chars = lexer.slice()[1..lexer.slice().len() - 1].chars();
 75 |     while let Some(c) = chars.next() {
 76 |         match c {
 77 |             '\\' => {
 78 |                 let c = chars.next().ok_or(())?;
 79 |                 match c {
 80 |                     '\n' => {}
 81 |                     'n' => string.push('\n'),
 82 |                     'r' => string.push('\r'),
 83 |                     't' => string.push('\t'),
 84 |                     '0' => string.push('\0'),
 85 |                     '\'' | '"' | '\\' => string.push(c),
 86 |                     'x' => {
 87 |                         let mut hex = String::new();
 88 |                         hex.push(chars.next().ok_or(())?);
 89 |                         hex.push(chars.next().ok_or(())?);
 90 |                         let code = u8::from_str_radix(&hex, 16).map_err(|_| ())?;
 91 |                         if code > 0x7F {
 92 |                             return Err(());
 93 |                         }
 94 |                         string.push(code as char);
 95 |                     }
 96 |                     'u' => {
 97 |                         if chars.next() != Some('{') {
 98 |                             return Err(());
 99 |                         }
100 |                         let mut hex = String::new();
101 |                         for _ in 0..6 {
102 |                             let c = chars.next().ok_or(())?;
103 |                             if c == '}' {
104 |                                 break;
105 |                             }
106 |                             hex.push(c);
107 |                         }
108 |                         let code = u32::from_str_radix(&hex, 16).map_err(|_| ())?;
109 |                         string.push(char::from_u32(code).ok_or(())?);
110 |                     }
111 |                     _ => return Err(()),
112 |                 }
113 |             }
114 |             _ => string.push(c),
115 |         }
116 |     }
117 |     Ok(string)
118 | }
119 | 


--------------------------------------------------------------------------------
/tests/tests/unicode_dot.rs:
--------------------------------------------------------------------------------
 1 | use logos::Logos as _;
 2 | use logos_derive::Logos;
 3 | 
 4 | #[derive(Logos, Debug, PartialEq)]
 5 | enum TestUnicodeDot {
 6 |     #[regex(".")]
 7 |     Dot,
 8 | }
 9 | 
10 | #[test]
11 | fn test_unicode_dot_str_ascii() {
12 |     let mut lexer = TestUnicodeDot::lexer("a");
13 |     assert_eq!(lexer.next(), Some(Ok(TestUnicodeDot::Dot)));
14 |     assert_eq!(lexer.remainder(), "");
15 |     assert_eq!(lexer.next(), None);
16 | }
17 | 
18 | #[test]
19 | fn test_unicode_dot_str_unicode() {
20 |     let mut lexer = TestUnicodeDot::lexer("");
21 |     assert_eq!(lexer.next(), Some(Ok(TestUnicodeDot::Dot)));
22 |     assert_eq!(lexer.remainder(), "");
23 |     assert_eq!(lexer.next(), None);
24 | }
25 | 
26 | #[derive(Logos, Debug, PartialEq)]
27 | enum TestUnicodeDotBytes {
28 |     #[regex(".", priority = 100)]
29 |     Dot,
30 |     #[regex(b".", priority = 0)]
31 |     InvalidUtf8,
32 | }
33 | 
34 | #[test]
35 | fn test_unicode_dot_bytes_ascii() {
36 |     let mut lexer = TestUnicodeDotBytes::lexer(b"a");
37 |     assert_eq!(lexer.next(), Some(Ok(TestUnicodeDotBytes::Dot)));
38 |     assert_eq!(lexer.remainder(), b"");
39 |     assert_eq!(lexer.next(), None);
40 | }
41 | 
42 | #[test]
43 | fn test_unicode_dot_bytes_unicode() {
44 |     let mut lexer = TestUnicodeDotBytes::lexer("".as_bytes());
45 |     assert_eq!(lexer.next(), Some(Ok(TestUnicodeDotBytes::Dot)));
46 |     assert_eq!(lexer.remainder(), b"");
47 |     assert_eq!(lexer.next(), None);
48 | }
49 | 
50 | #[test]
51 | fn test_unicode_dot_bytes_invalid_utf8() {
52 |     let mut lexer = TestUnicodeDotBytes::lexer(b"\xff");
53 |     assert_eq!(lexer.next(), Some(Ok(TestUnicodeDotBytes::InvalidUtf8)));
54 |     assert_eq!(lexer.remainder(), b"");
55 |     assert_eq!(lexer.next(), None);
56 | }
57 | 


--------------------------------------------------------------------------------