├── .github
    └── workflows
    │   ├── release.yaml
    │   ├── validation-python.yaml
    │   └── validation-rust.yaml
├── .gitignore
├── .pre-commit-config.yaml
├── .readthedocs.yaml
├── CHANGELOG.md
├── Cargo.lock
├── Cargo.toml
├── LICENSE
├── Pipfile
├── README-py.md
├── README.md
├── benches
    ├── findings.md
    ├── logbench.py
    └── results
    │   ├── v0.2.2-27-g87c32f5_2022-12-25_2135.bench
    │   ├── v0.3.1-12-g0d89be1_2022-12-31_0726.bench
    │   ├── v0.3.3-3-g1e7d5fd_2023-01-02_0118.bench
    │   └── v0.4.1-16-g72dc016_2023-10-30_0539.bench
├── book.toml
├── book_src
    ├── SUMMARY.md
    ├── installation.md
    ├── introduction.md
    └── usage.md
├── clippy.toml
├── dictionaries
    └── README.md
├── en_US.license
├── release.toml
├── rustfmt.toml
├── test-suite
├── update_dictionaries.py
├── zspell-cli
    ├── Cargo.toml
    ├── LICENSE
    ├── README.md
    ├── build.rs
    ├── src
    │   ├── cli
    │   │   └── mod.rs
    │   ├── download.rs
    │   ├── main.rs
    │   └── spelling
    │   │   └── mod.rs
    └── tests
    │   ├── cli_dict.rs
    │   ├── cli_lev.rs
    │   └── files
    │       ├── de_res.txt
    │       └── sample-index.json
├── zspell-py
    ├── Cargo.toml
    ├── LICENSE
    ├── README.md
    ├── build.rs
    ├── docs
    │   ├── Makefile
    │   ├── conf.py
    │   ├── index.rst
    │   ├── make.bat
    │   └── requirements.txt
    ├── pyproject.toml
    ├── python
    │   └── zspell
    │   │   ├── __init__.py
    │   │   ├── py.types
    │   │   └── zspell.pyi
    ├── src
    │   └── lib.rs
    └── tests
    │   └── test_basic.py
└── zspell
    ├── Cargo.toml
    ├── LICENSE
    ├── README.md
    ├── benches
        ├── datastructure.rs
        ├── dict_integration.rs
        ├── slice_contains.rs
        ├── small_map.rs
        └── word_splitter.rs
    ├── build.rs
    ├── src
        ├── affix.rs
        ├── affix
        │   ├── node.rs
        │   ├── parse.rs
        │   ├── rule.rs
        │   ├── tests.rs
        │   ├── tests_parse.rs
        │   └── types.rs
        ├── dict.rs
        ├── dict
        │   ├── flags.rs
        │   ├── meta.rs
        │   ├── parse.rs
        │   ├── rule.rs
        │   ├── rules_apply.rs
        │   ├── rules_reverse.rs
        │   ├── tests.rs
        │   ├── tests_parse.rs
        │   └── tests_rule.rs
        ├── error.rs
        ├── helpers.rs
        ├── lib.rs
        ├── meta.rs
        ├── morph.rs
        ├── suggestions.rs
        ├── system.rs
        └── system
        │   └── tests.rs
    ├── test-suite
        ├── 0-example.test
        ├── b-affix-forward-gen-num-flags.test
        ├── b-affix-forward-gen.test
        ├── b-flag-long.test
        ├── b-nosuggest-forbid.test
        ├── b-stemming-morph.test
        ├── h-circumfix.test
        ├── h-ignore-sug.test
        ├── h-ignore-utf.test
        ├── h-keepcase.test
        ├── h-korean.test
        ├── h-limit-multiple-compounding.test
        ├── h-map-utf.test
        ├── h-map.test
        ├── h-morph.test
        ├── h-needaffix.test
        ├── h-needaffix2.test
        ├── h-needaffix3.test
        ├── h-needaffix4.test
        ├── h-needaffix5.test
        ├── h-nepali.test
        ├── h-nosuggest.test
        ├── h-oconv.test
        ├── h-slash.test
        ├── h-timelimit.test
        ├── h-utf8.test
        ├── h-utfcoumpound.test
        ├── h-zeroaffix.test
        ├── i071-number-affixes.test
        └── i093-separate-dict-afx-flags.test
    ├── test-util
        ├── Cargo.toml
        └── src
        │   └── lib.rs
    └── tests
        ├── files
            ├── odyssey.txt
            ├── tortoise_hare_misspelled.txt
            ├── w1_eng_short.aff
            └── w1_eng_short.dic
        └── suite.rs


/.github/workflows/release.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | name: Release
  3 | 
  4 | on:
  5 |   push:
  6 |     tags:
  7 |     - 'v*'
  8 | 
  9 | jobs:
 10 |   # Allow our jobs to block on validation steps
 11 |   validation_rust:
 12 |     uses: ./.github/workflows/validation-rust.yaml
 13 | 
 14 |   validation_py:
 15 |     uses: ./.github/workflows/validation-python.yaml
 16 | 
 17 |   rust_release:
 18 |     name: "build & deploy ${{ matrix.build }} binaries"
 19 |     needs: [validation_rust, validation_py]
 20 |     runs-on: ${{ matrix.os }}
 21 |     strategy:
 22 |       matrix:
 23 |         include:
 24 |           - build: linux
 25 |             os: ubuntu-latest
 26 |             target: x86_64-unknown-linux-gnu
 27 |             # target: x86_64-unknown-linux-musl
 28 |             extension: ''
 29 |             # Unsuccessful compilation; try on local
 30 |           # - build: linux-arm
 31 |           #   os: ubuntu-latest
 32 |           #   target: arm-unknown-linux-gnueabihf
 33 |           #   extension: ''
 34 |           - build: macos
 35 |             os: macos-latest
 36 |             target: x86_64-apple-darwin
 37 |             extension: ''
 38 |           - build: windows-msvc
 39 |             os: windows-latest
 40 |             target: x86_64-pc-windows-msvc
 41 |             extension: .exe
 42 |     env:
 43 |       CARGO: cargo
 44 |       TARGET_DIR: ./target
 45 |       TARGET_FLAGS: ""
 46 | 
 47 |     steps:
 48 |         # Retreive git files
 49 |       - uses: actions/checkout@v4
 50 |       - uses: dtolnay/rust-toolchain@stable
 51 |         with:
 52 |           targets: ${{ matrix.target }}
 53 |       - uses: Swatinem/rust-cache@v2
 54 |         # Debugging aid
 55 |       - name: Show commands
 56 |         run: |
 57 |           echo pwd:
 58 |           pwd
 59 |           echo ls:
 60 |           ls
 61 |           echo "cargo command is: ${{ env.CARGO }}"
 62 |           echo "target flag is: ${{ env.TARGET_FLAGS }}"
 63 |           echo "target dir is: ${{ env.TARGET_DIR }}"
 64 |         # Perform build
 65 |       - name: Build binary
 66 |         uses: actions-rs/cargo@v1
 67 |         with:
 68 |           command: build
 69 |           # We only want to build zspell-cli, not plain zspell or zspell-py (does not support cdylib)
 70 |           args: --package zspell-cli --release --verbose --target ${{ matrix.target }}
 71 |       - name: Show output
 72 |         run: |
 73 |           ls target
 74 |           ls "target/${{ matrix.target }}"
 75 |           ls "target/${{ matrix.target }}/release"
 76 |         # Create .zip or .tar.gz file
 77 |       - name: Build archive
 78 |         shell: bash
 79 |         run: |
 80 |           echo '\nWorking directory:' && pwd
 81 |           echo '\nls:' && ls
 82 | 
 83 |           # outdir="$(ci/cargo-out-dir "${{ env.TARGET_DIR }}")"
 84 |           # outdir=target/release/${{ steps.get_repository_name.outputs.REPOSITORY_NAME }}${{ matrix.extension }}
 85 |           # Find the output directory with the latest timestamp
 86 |           cargo_outdir="$(find "${{ env.TARGET_DIR }}" -name zspell-stamp -print0 | xargs -0 ls -t | head -n1 | xargs dirname)"
 87 |           ref_name=${GITHUB_REF##*/}
 88 | 
 89 |           # Remove leading 'v' for use where needed
 90 |           ref_name_stripped=$(echo $ref_name | perl -0pe 's/^v//')
 91 |           echo "\nRef name: \"$ref_name\""
 92 |           echo "\nRef name stripped: \"$ref_name_stripped\""
 93 | 
 94 |           staging="zspell-$ref_name-${{ matrix.target }}"
 95 |           mkdir -p "$staging"/{completion,doc}
 96 | 
 97 |           # Remove the "unreleased" section from our changelog
 98 |           perl -0777 -i -pe "s/(<\!-- next-header -->.*## \[Unreleased\].*?\n)(?=## |<\!--)//gms" CHANGELOG.md
 99 | 
100 |           cp {README.md,LICENSE} "$staging/"
101 |           cp CHANGELOG.md "$staging/doc/"
102 |           cp "$cargo_outdir"/zspell.1 "$staging/doc"
103 |           cp "$cargo_outdir"/{_zspell,_zspell.ps1,zspell.bash,zspell.elv,zspell.fish} "$staging/completion"
104 | 
105 |           # Build RNOTES.md, which we will use for our Github release (not shipped in zip)
106 |           # Select the release notes from our latest version only
107 |           perl -0777 -ne "print /(## \[$ref_name_stripped\].*?\n)(?=\n*^(?:## |<\!--))/gms" CHANGELOG.md > RNOTES.md
108 |           # Select the diff URL for this version only
109 |           perl -0777 -ne "print /\n\[$ref_name_stripped\]:.*?\n/gms" CHANGELOG.md >> RNOTES.md
110 | 
111 |           echo "Release notes:" && cat RNOTES.md
112 | 
113 |           if [ "${{ matrix.os }}" = "windows-latest" ]; then
114 |             cp "target/${{ matrix.target }}/release/zspell.exe" "$staging/"
115 |             7z a "$staging.zip" "$staging"
116 |             echo "ASSET=$staging.zip" >> $GITHUB_ENV
117 |           else
118 |             cp "target/${{ matrix.target }}/release/zspell" "$staging/"
119 |             tar czf "$staging.tar.gz" "$staging"
120 |             echo "ASSET=$staging.tar.gz" >> $GITHUB_ENV
121 |           fi
122 |         # Upload to github
123 |       - name: Release
124 |         uses: softprops/action-gh-release@v1
125 |         if: startsWith(github.ref, 'refs/tags/')
126 |         env:
127 |           GITHUB_REPOSITORY: pluots/zspell
128 |         with:
129 |           body_path: RNOTES.md
130 |           # note you'll typically need to create a personal access token
131 |           # with permissions to create releases in the other repo
132 |           token: ${{ secrets.GITHUB_TOKEN }}
133 |           files: |
134 |             ${{ env.ASSET }}
135 | 
136 |   linux_wheels:
137 |     runs-on: ubuntu-latest
138 |     needs: [validation_rust, validation_py]
139 |     steps:
140 |     - uses: actions/checkout@v4
141 |     - name: build libc wheels
142 |       uses: messense/maturin-action@v1
143 |       with:
144 |         manylinux: auto
145 |         command: build
146 |         # container default is manylinux
147 |         args: --release -o dist -i 3.7 3.8 3.9 3.10 3.11 3.12 --manifest-path zspell-py/Cargo.toml
148 |     - name: build musl wheels
149 |       uses: messense/maturin-action@v1
150 |       with:
151 |         target: x86_64-unknown-linux-musl
152 |         manylinux: musllinux_1_1
153 |         command: build
154 |         args: --release -o dist -i 3.7 3.8 3.9 3.10 3.11 3.12 --manifest-path zspell-py/Cargo.toml
155 |     - name: upload wheels
156 |       uses: actions/upload-artifact@v2
157 |       with:
158 |         name: wheels
159 |         path: dist
160 | 
161 |   windows_wheels:
162 |     runs-on: windows-latest
163 |     needs: [validation_rust, validation_py]
164 |     steps:
165 |     - uses: actions/checkout@v4
166 |     - uses: messense/maturin-action@v1
167 |       with:
168 |         command: build
169 |         # FIXME: python 3.12 not yet available on windows runners
170 |         args: --release -o dist -i 3.7 3.8 3.9 3.10 3.11 --manifest-path zspell-py/Cargo.toml
171 |     - name: upload wheels
172 |       uses: actions/upload-artifact@v2
173 |       with:
174 |         name: wheels
175 |         path: dist
176 | 
177 |   macos_wheels:
178 |     runs-on: macos-latest
179 |     needs: [validation_rust, validation_py]
180 |     steps:
181 |     - uses: actions/checkout@v4
182 |     - uses: messense/maturin-action@v1
183 |       with:
184 |         command: build
185 |         args: --release -o dist -i 3.7 3.8 3.9 3.10 3.11 3.12 --universal2 --manifest-path zspell-py/Cargo.toml
186 |     - name: upload wheels
187 |       uses: actions/upload-artifact@v2
188 |       with:
189 |         name: wheels
190 |         path: dist
191 | 
192 |   release_all_wheels:
193 |     name: Release wheels
194 |     runs-on: ubuntu-latest
195 |     needs: [linux_wheels, macos_wheels, windows_wheels]
196 |     steps:
197 |       - uses: actions/download-artifact@v2
198 |         with:
199 |           name: wheels
200 |       - name: Publish to PyPI
201 |         uses: messense/maturin-action@v1
202 |         env:
203 |           MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_API_TOKEN }}
204 |         with:
205 |           command: upload
206 |           args: --skip-existing *
207 | 
208 |   deploy_book:
209 |     runs-on: ubuntu-latest
210 |     needs: [validation_rust, validation_py]
211 |     steps:
212 |     - uses: actions/checkout@v4
213 |       with:
214 |         fetch-depth: 0
215 |     - name: Install mdbook
216 |       run: |
217 |         mkdir mdbook
218 |         curl -sSL https://github.com/rust-lang/mdBook/releases/download/v0.4.14/mdbook-v0.4.14-x86_64-unknown-linux-gnu.tar.gz \
219 |         | tar -xz --directory=./mdbook
220 |         echo `pwd`/mdbook >> $GITHUB_PATH
221 |     - name: Deploy GitHub Pages
222 |       run: |
223 |         # This assumes your book is in the root of your repository.
224 |         # Just add a `cd` here if you need to change to another directory.
225 |         mdbook build
226 |         git worktree add gh-pages
227 |         git config user.name "Deploy from CI"
228 |         git config user.email ""
229 |         cd gh-pages
230 |         # Delete the ref to avoid keeping history.
231 |         git update-ref -d refs/heads/gh-pages
232 |         rm -rf *
233 |         mv ../book/* .
234 |         git add .
235 |         git commit -m "Deploy $GITHUB_SHA to gh-pages"
236 |         git push --force --set-upstream origin gh-pages
237 | 


--------------------------------------------------------------------------------
/.github/workflows/validation-python.yaml:
--------------------------------------------------------------------------------
 1 | name: Python Validation
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |     - main
 7 |   pull_request:
 8 |   workflow_call: # allow this to be run from other workflows
 9 | 
10 | jobs:
11 |   verify:
12 |     name: pytest
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |     - uses: actions/checkout@v4
16 |     - uses: actions/cache@v3
17 |       with:
18 |         path: ~/.local/share/virtualenvs
19 |         key: ${{ runner.os }}-pipenv-${{ hashFiles('**/Pipfile.lock') }}
20 |     - uses: dtolnay/rust-toolchain@stable
21 |     - uses: Swatinem/rust-cache@v2
22 |     - uses: actions/setup-python@v4
23 |       with:
24 |         python-version: '3.11'
25 |     - name: Install pipenv
26 |       run: python -m pip install --upgrade pipenv wheel
27 |     - name: Install dependencies
28 |       run: |
29 |         pipenv install --dev
30 |         pipenv run maturin develop --manifest-path zspell-py/Cargo.toml
31 |     - name: Pytest
32 |       run: pipenv run pytest
33 |     - name: Validate docs
34 |       run: pipenv run make -C zspell-py/docs html
35 | 


--------------------------------------------------------------------------------
/.github/workflows/validation-rust.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | name: Rust Validation
  3 | 
  4 | on:
  5 |   push:
  6 |     branches:
  7 |     - main
  8 |   pull_request:
  9 |   workflow_call: # allow this to be run from other workflows
 10 | 
 11 | env:
 12 |   RUSTDOCFLAGS: -D warnings
 13 |   RUSTFLAGS: -D warnings
 14 |   RUST_BACKTRACE: 1
 15 |   CARGO_UNSTABLE_SPARSE_REGISTRY: true
 16 | 
 17 | jobs:
 18 |   clippy:
 19 |     name: clippy
 20 |     runs-on: ubuntu-latest
 21 |     steps:
 22 |       - uses: actions/checkout@v4
 23 |       - uses: dtolnay/rust-toolchain@beta
 24 |         with:
 25 |           components: clippy
 26 |       - uses: Swatinem/rust-cache@v2
 27 |       - run: cargo clippy --all-features --all-targets -- -D warnings
 28 |       - run: cargo clippy --no-default-features --all-targets --features unstable-bench -- -D warnings
 29 | 
 30 |   min_versions:
 31 |     name: check minimum rustc version
 32 |     runs-on: ubuntu-latest
 33 |     env:
 34 |       # we don't care about unused with these feature configurations
 35 |       RUSTFLAGS: -A unused
 36 |     steps:
 37 |       - uses: actions/checkout@v4
 38 |       - uses: dtolnay/rust-toolchain@1.65
 39 |       - uses: Swatinem/rust-cache@v2
 40 |         # check only zspell; we can use later for the CLI
 41 |       - run: cargo check -p zspell --all-features
 42 |       - run: cargo check -p zspell --no-default-features --features unstable-bench
 43 | 
 44 |   test:
 45 |     strategy:
 46 |         fail-fast: true
 47 |         matrix:
 48 |           os: [ubuntu-latest, windows-latest, macos-latest]
 49 |           include:
 50 |             - os: ubuntu-latest
 51 |               name: linux
 52 |               coverage: true
 53 |             - os: windows-latest
 54 |               name: windows
 55 |             - os: macos-latest
 56 |               name: mac
 57 |     name: "test on ${{ matrix.name }}"
 58 |     runs-on: ${{ matrix.os }}
 59 |     steps:
 60 |       - uses: actions/checkout@v4
 61 |       - name: List files
 62 |         run: |
 63 |           pwd
 64 |           ls
 65 |       - uses: dtolnay/rust-toolchain@nightly
 66 |         with:
 67 |           components: ${{ matrix.coverage && 'llvm-tools-preview' || '' }}
 68 |       - uses: taiki-e/install-action@cargo-llvm-cov
 69 |       - name: Install nextest
 70 |         uses: taiki-e/install-action@nextest
 71 |       - uses: Swatinem/rust-cache@v2
 72 |         with:
 73 |           key: ${{ matrix.os }}
 74 |       - name: run tests without coverage
 75 |         if: ${{ ! matrix.coverage }}
 76 |         run: |
 77 |           cargo nextest run
 78 |           cargo test --doc
 79 |       - name: run tests with coverage
 80 |         if: ${{ matrix.coverage }}
 81 |         # nextest can't run doctests so we run coverage on those separately
 82 |         # and do a combined report
 83 |         run: |
 84 |           cargo llvm-cov --no-report nextest
 85 |           cargo llvm-cov --no-report --doc
 86 |           cargo llvm-cov report --doctests --lcov --output-path lcov.info
 87 |       - name: Upload coverage data to codecov
 88 |         if: ${{ matrix.coverage }}
 89 |         uses: codecov/codecov-action@v3
 90 |         env:
 91 |           CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
 92 |         with:
 93 |           fail_ci_if_error: false
 94 |           files: lcov.info
 95 | 
 96 |   sanitizers:
 97 |     name: Test with leak sanitizer
 98 |     runs-on: ubuntu-latest
 99 |     env:
100 |       RUSTFLAGS: -Zsanitizer=leak
101 |     steps:
102 |       - uses: actions/checkout@v4
103 |       - uses: dtolnay/rust-toolchain@nightly
104 |         with:
105 |           components: rust-src
106 |       - name: Install nextest
107 |         uses: taiki-e/install-action@nextest
108 |       - uses: Swatinem/rust-cache@v2
109 |       - run: >
110 |           cargo nextest run -p zspell
111 |           --target=x86_64-unknown-linux-gnu
112 |           -Zbuild-std
113 |       - run: >
114 |           cargo test -p zspell --doc
115 |           --target=x86_64-unknown-linux-gnu
116 |           -Zbuild-std
117 | 
118 |   miri:
119 |     name: Miri
120 |     runs-on: ubuntu-latest
121 |     # basically only run this if we're sure we don't cancel since it eats so much CPU
122 |     needs: ["clippy", "test", "fmt", "doc"]
123 |     env:
124 |       # Can't interact with files in isolation
125 |       MIRIFLAGS: -Zmiri-disable-isolation
126 |     steps:
127 |       - uses: actions/checkout@v4
128 |       - uses: dtolnay/rust-toolchain@nightly
129 |         with:
130 |           components: miri
131 |       - name: Install nextest
132 |         uses: taiki-e/install-action@nextest
133 |       - uses: Swatinem/rust-cache@v2
134 |       - name: Run Miri
135 |         # Miri is _slow_ for our use case, only run a few comprehensive tests
136 |         run: >
137 |           cargo miri nextest run -p zspell -E '
138 |             test(=test_stemming_morph) +
139 |             test(=test_pfxsfx)
140 |           '
141 | 
142 |   fmt:
143 |     name: formatting
144 |     runs-on: ubuntu-latest
145 |     steps:
146 |       - uses: actions/checkout@v4
147 |       - uses: dtolnay/rust-toolchain@nightly
148 |         with:
149 |             components: rustfmt
150 |       - uses: Swatinem/rust-cache@v2
151 |       - run: cargo fmt --all -- --check
152 |       - uses: actions/setup-python@v3
153 |       - name: Validate pre-commit
154 |         uses: pre-commit/action@v3.0.0
155 | 
156 |   doc:
157 |     name: docs
158 |     runs-on: ubuntu-latest
159 |     steps:
160 |       - uses: actions/checkout@v4
161 |       - uses: dtolnay/rust-toolchain@nightly
162 |       - uses: Swatinem/rust-cache@v2
163 |       - run: cargo doc
164 | 
165 |   book:
166 |     name: book
167 |     runs-on: ubuntu-latest
168 |     steps:
169 |       - uses: actions/checkout@v4
170 |       - uses: dtolnay/rust-toolchain@nightly
171 |       - uses: Swatinem/rust-cache@v2
172 |       - run: |
173 |           mkdir -p ~/mdbook
174 |           # Tar is weird with ~ as home
175 |           curl -sSL https://github.com/rust-lang/mdBook/releases/download/v0.4.21/mdbook-v0.4.21-x86_64-unknown-linux-gnu.tar.gz \
176 |           | tar -xz --directory=$(echo ~)/mdbook
177 |       - run: ~/mdbook/mdbook test
178 | 
179 |   # Make sure we turned the clippy lint off
180 |   verify_fixme_critical:
181 |     name: verify critical fixmes
182 |     runs-on: ubuntu-latest
183 |     steps:
184 |       - uses: actions/checkout@v4
185 |       - run: grep -r "FIXME:CRIT" --exclude-dir="target" --exclude-dir=".git" --exclude="validation-rust.yaml" && exit 1 || exit 0
186 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | /target
 2 | dictionaries/*.dic
 3 | dictionaries/*.aff
 4 | dictionaries/*.license
 5 | dictionaries/scowl*
 6 | tests/files/odyssey.txt
 7 | /book
 8 | .docker-cargo
 9 | 
10 | Pipfile.lock
11 | 
12 | # Byte-compiled / optimized / DLL files
13 | __pycache__/
14 | .pytest_cache/
15 | *.py[cod]
16 | 
17 | # C extensions
18 | *.so
19 | 
20 | # Distribution / packaging
21 | .Python
22 | .venv/
23 | env/
24 | build/
25 | develop-eggs/
26 | dist/
27 | eggs/
28 | lib/
29 | lib64/
30 | parts/
31 | sdist/
32 | var/
33 | include/
34 | man/
35 | venv/
36 | *.egg-info/
37 | .installed.cfg
38 | *.egg
39 | 
40 | # Installer logs
41 | pip-log.txt
42 | pip-delete-this-directory.txt
43 | pip-selfcheck.json
44 | 
45 | # Unit test / coverage reports
46 | htmlcov/
47 | .tox/
48 | .coverage
49 | .cache
50 | nosetests.xml
51 | coverage.xml
52 | 
53 | # Translations
54 | *.mo
55 | 
56 | # Mr Developer
57 | .mr.developer.cfg
58 | .project
59 | .pydevproject
60 | 
61 | # Rope
62 | .ropeproject
63 | 
64 | # Django stuff:
65 | *.log
66 | *.pot
67 | 
68 | .DS_Store
69 | 
70 | # Sphinx documentation
71 | docs/_build/
72 | 
73 | # PyCharm
74 | .idea/
75 | 
76 | # VSCode
77 | .vscode/
78 | 
79 | # Pyenv
80 | **/docs/_build
81 | .python-version
82 | 
83 | # Profiling
84 | perf.data*
85 | callgrind.out*
86 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 3 |     rev: v4.5.0
 4 |     hooks:
 5 |       - id: check-yaml
 6 |       - id: check-toml
 7 |       - id: fix-byte-order-marker
 8 |       - id: end-of-file-fixer
 9 |       - id: trailing-whitespace
10 |       - id: mixed-line-ending
11 |       - id: check-added-large-files
12 |         args: ['--maxkb=600']
13 | 
14 |   - repo: https://github.com/psf/black
15 |     rev: 23.9.1
16 |     hooks:
17 |       - id: black
18 | 
19 |   - repo: local
20 |     hooks:
21 |       - id: cargo-fmt
22 |         name: Cargo format
23 |         language: system
24 |         entry: cargo fmt
25 |         args: ["--"]
26 |         types_or: ["rust"]
27 | 


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # .readthedocs.yaml
 2 | # Read the Docs configuration file
 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 4 | 
 5 | # Required
 6 | version: 2
 7 | 
 8 | build:
 9 |    os: ubuntu-22.04
10 |    tools:
11 |       # Use the latest stable
12 |       python: "3.11"
13 |       rust: "1.61"
14 | 
15 | python:
16 |    install:
17 |    - requirements: zspell-py/docs/requirements.txt
18 |    - method: pip
19 |      path: zspell-py/
20 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | # Changelog
  2 | 
  3 | <!-- next-header -->
  4 | 
  5 | ## [Unreleased] - ReleaseDate
  6 | 
  7 | ### Added
  8 | 
  9 | ### Changed
 10 | 
 11 | ### Removed
 12 | 
 13 | 
 14 | 
 15 | ## [0.5.5] - 2024-06-13
 16 | 
 17 | ### Changed
 18 | 
 19 | - Fix the parsing of long (double ASCII) flags in
 20 |   [#109](https://github.com/pluots/zspell/pull/109) and
 21 |   [#111](https://github.com/pluots/zspell/pull/111).
 22 | 
 23 | 
 24 | ## [0.5.3] - 2023-12-13
 25 | 
 26 | ### Changed
 27 | 
 28 | - Fix duplicate flag issue loading the German dictionary from
 29 |   [#93](https://github.com/pluots/zspell/issues/93)
 30 | 
 31 | 
 32 | ## [0.5.2] - 2023-10-30
 33 | 
 34 | ### Added
 35 | 
 36 | Add a `zspell::builder()` shortcut for `DictBuilder::new()`.
 37 | 
 38 | 
 39 | ## [0.5.1] - 2023-10-30
 40 | 
 41 | ### Additions
 42 | 
 43 | Publish the work in progress command line interface under crate name
 44 | `zspell-cli`.
 45 | 
 46 | ### Changed
 47 | 
 48 | - Add support for nonstandard morphological information types.
 49 | - Fix issue where all morph information for an affix rule were being reported,
 50 |   rather than just that for the relevant pattern. Fixes
 51 |   [#73](https://github.com/pluots/zspell/issues/73).
 52 | - Rewrite the `.dic` file parser.
 53 | - Improve handling of morphological information in dictionary files.
 54 | 
 55 | ## [0.5.0] - 2023-10-30
 56 | 
 57 | Immediately superceded release, see 0.5.1.
 58 | 
 59 | ## [0.4.1] - 2023-10-18
 60 | 
 61 | ### Additions
 62 | 
 63 | - Add `Dictionary::{entry, entries}` which allow stemming and morphological
 64 |   analysis.
 65 | 
 66 | ### Fixed
 67 | 
 68 | - Corrected error message for parsing affix headers
 69 | - Corrected pattern matching for groups that include hyphens
 70 | - Default no longer has a nosuggest flag
 71 | - Change output directory to use cargo directory rather than source
 72 | 
 73 | ### Changed
 74 | 
 75 | - Remove features `unstable-analysis` and `unstable-stem` since functionality is
 76 |   now public
 77 | - Wordlist now correctly applies more than one affix rule if it is available
 78 | - Moved `DictBuilder::config` behind `zspell-unstable`
 79 | - [build] update CI workflows
 80 | - [internal] make some changes from `TryFrom` to `FromStr`
 81 | - [internal] refactor test system to support stemming and morphological analysis
 82 | 
 83 | ## [0.4.0] - 2023-10-18
 84 | 
 85 | Immediately superceded release, see 0.4.1.
 86 | 
 87 | ## [0.3.3] - 2023-01-01
 88 | 
 89 | ### Changed
 90 | 
 91 | - [build] update python release workflow
 92 | 
 93 | ## [0.3.2] - 2023-01-01
 94 | 
 95 | ### Changed
 96 | 
 97 | - `.dic` parser now ignores lines that start with a tab (sometimes used for
 98 |   comments)
 99 | - Updated python documentation
100 | 
101 | ## [0.3.1] - 2022-12-30
102 | 
103 | Minor patch to build system workflow
104 | 
105 | ## [0.3.0] - 2022-12-30
106 | 
107 | This change is a huge rewrite of the library! Hopefully this will pave the way
108 | forward for more features and easier growth.
109 | 
110 | ### Changes
111 | 
112 | - Added `DictBuilder` to simplify dictionary creation
113 | - Removed `affix::Config` as the representation was limiting & clunky
114 | - The methods on `Dictionary` are now infallible since an uncompiled dictionary
115 |   can no longer be created
116 | - Rewrote the `error` module
117 | - Simplified imports, everything needed is now top-level
118 | - Rewrote affix file parser so it is much more efficient and now handles all
119 |   known keys. We do not yet act on all possible values.
120 | - Rewrote the dictionary & personal wordlist parsers
121 | 
122 | ### Additions
123 | 
124 | - `check_indices` is now available to return better information about the
125 |   location of errors
126 | - Python modules now have correct bindings (horray!)
127 | 
128 | There are also a few new APIs that are feature gated. They should be considered
129 | very unstable until those feature gates are removed.
130 | 
131 | - Suggestions
132 | - Stemming
133 | - Morphological analysis
134 | - System tools. These were previously public but have been moved behind the
135 |   feature gate.
136 | 
137 | ## [0.2.2] - 2022-11-04
138 | 
139 | Minor bups in the dependency list
140 | 
141 | ## [0.2.1] - 2022-11-04
142 | 
143 | ### Changes
144 | 
145 | - Changed word breaking to use unicode segmentation, as suggested by
146 |   @saona-raimundo
147 | 
148 | ## [0.2.0] - 2022-11-04
149 | 
150 | ### Additions
151 | 
152 | - Ability to automatically locate dictionaries on the system, WIP and not yet
153 |   documented
154 | - Command line option to download dictionaries
155 | 
156 | ### Changes
157 | 
158 | - Rename helper CLI and py crates (only relevant within this project)
159 | 
160 | ## [0.1.4] - 2022-08-17
161 | 
162 | ### Additions
163 | 
164 | - Started generating manpages and autocomplete scripts on build
165 | - Started generating a documentation book for the CLI
166 | 
167 | ### Changes
168 | 
169 | - Better reserve & shrink vectors and hash sets to save a small ammount of
170 |   overhead
171 | 
172 | ## [0.1.3] - 2022-08-16
173 | 
174 | ### Changes
175 | 
176 | - Correction to output generation
177 | 
178 | ## [0.1.2] - 2022-08-16
179 | 
180 | ### Additions
181 | 
182 | - Framework for locating files on a user's local machine
183 | 
184 | ### Changes
185 | 
186 | - Updated binary output configuration
187 | 
188 | ## [0.1.1] - 2022-07-25
189 | 
190 | ### Changes
191 | 
192 | - Updated wheel release configuration
193 | 
194 | ## [0.1.0] - 2022-07-25
195 | 
196 | ### Changes
197 | 
198 | - Restructured project to make all modules public that might be needed to
199 |   interface with this library.
200 | - Restructuring to use `<Result>` for all functions that may error
201 | - Behind the scenes work to prepare for automatic dictionary location
202 | 
203 | <!-- next-url -->
204 | [Unreleased]: https://github.com/pluots/zspell/compare/v0.5.5...HEAD
205 | [0.5.5]: https://github.com/pluots/zspell/compare/v0.5.3...v0.5.5
206 | [0.5.3]: https://github.com/pluots/zspell/compare/v0.5.2...v0.5.3
207 | [0.5.2]: https://github.com/pluots/zspell/compare/v0.5.1...v0.5.2
208 | [0.5.1]: https://github.com/pluots/zspell/compare/v0.5.0...v0.5.1
209 | [0.5.0]: https://github.com/pluots/zspell/compare/v0.4.1...v0.5.0
210 | [0.4.1]: https://github.com/pluots/zspell/compare/v0.4.0...v0.4.1
211 | [0.4.0]: https://github.com/pluots/zspell/compare/v0.3.3...v0.4.0
212 | [0.3.3]: https://github.com/pluots/zspell/compare/v0.3.2...v0.3.3
213 | [0.3.2]: https://github.com/pluots/zspell/compare/v0.3.1...v0.3.2
214 | [0.3.1]: https://github.com/pluots/zspell/compare/v0.3.0...v0.3.1
215 | [0.3.0]: https://github.com/pluots/zspell/compare/v0.2.2...v0.3.0
216 | [0.2.2]: https://github.com/pluots/zspell/compare/v0.2.1...v0.2.2
217 | [0.2.1]: https://github.com/pluots/zspell/compare/v0.2.0...v0.2.1
218 | [0.2.0]: https://github.com/pluots/zspell/compare/v0.1.4...v0.2.0
219 | [0.1.4]: https://github.com/pluots/zspell/compare/v0.1.3...v0.1.4
220 | [0.1.3]: https://github.com/pluots/zspell/compare/v0.1.2...v0.1.3
221 | [0.1.2]: https://github.com/pluots/zspell/compare/v0.1.1...v0.1.2
222 | [0.1.1]: https://github.com/pluots/zspell/compare/v0.1.0...v0.1.1
223 | [0.1.0]: https://github.com/pluots/zspell/compare/v0.0.1...v0.1.0
224 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [workspace]
 2 | resolver = "2"
 3 | members = [
 4 |     "zspell",
 5 |     "zspell-py",
 6 |     "zspell-cli",
 7 |     "zspell/test-util",
 8 | ]
 9 | 
10 | default-members = [
11 |     "zspell",
12 |     "zspell-cli",
13 | ]
14 | 
15 | # Build with `cargo build --profile=release-debug`
16 | # Alternatively `cargo bench --profile=release-debug`
17 | # Useful for profiling, not for official releases
18 | [profile.release-debug]
19 | # inherits = "release"
20 | inherits = "dev"
21 | opt-level = 3
22 | debug = true
23 | strip = "none"
24 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright 2022 Trevor Gross
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | You may obtain a copy of the License at
 6 | 
 7 |     http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | 


--------------------------------------------------------------------------------
/Pipfile:
--------------------------------------------------------------------------------
 1 | [[source]]
 2 | url = "https://pypi.org/simple"
 3 | verify_ssl = true
 4 | name = "pypi"
 5 | 
 6 | [packages]
 7 | maturin = "1.3.0"
 8 | sphinx = "7.2.6"
 9 | furo = "2023.9.10"
10 | 
11 | [dev-packages]
12 | black = "23.9.1"
13 | pytest = "7.4.2"
14 | m2r = "0.3.1"
15 | 
16 | [requires]
17 | python_version = "3.11"
18 | 


--------------------------------------------------------------------------------
/README-py.md:
--------------------------------------------------------------------------------
 1 | # ZSpell
 2 | 
 3 | Python bindings for the Rust zspell library: a simple yet fast spellchecker.
 4 | 
 5 | To use this library, you will need a dictionary in the language of your choice.
 6 | Many are available at the following repository:
 7 | <https://github.com/wooorm/dictionaries>
 8 | 
 9 | The full Python API documentation is available at <https://zspell.readthedocs.io>
10 | 
11 | ```py
12 | from zspell import Dictionary
13 | 
14 | with open ("dictionaries/en_US.aff", "r") as f:
15 |     config_str = f.read()
16 | with open ("dictionaries/en_US.dic", "r") as f:
17 |     dict_str = f.read()
18 | d = Dictionary(config_str, dict_str)
19 | 
20 | assert(d.check("Apples are good! Don't you think?"))
21 | assert(not d.check("Apples are baaaad"))
22 | ```
23 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # ZSpell
  2 | 
  3 | This project is a spellchecker written completely in Rust, that maintains
  4 | compatibility with the venerable Hunspell dictionary format. It is entirely
  5 | native and does not rely on any other backends (Enchant, Hunspell, Aspell,
  6 | etc.). This library also has the goal of being usable via WASM. Full Unicode
  7 | support is baked in.
  8 | 
  9 | The library side has a stabalized checker, but the suggestion API is not yet
 10 | finalized. The CLI is usable but not yet considered stabalized. See
 11 | [Feature Status](#feature-status) for more information on what is available.
 12 | 
 13 | Here are some useful quick links:
 14 | 
 15 | - Crate info: <https://crates.io/crates/zspell>
 16 | - Crate CLI docs (incomplete): <https://pluots.github.io/zspell/>
 17 | - Crate library docs: <https://docs.rs/zspell/>
 18 | - Python library page: <https://pypi.org/project/zspell/>
 19 | - Crate source: <https://github.com/pluots/zspell>
 20 | 
 21 | ## Interfaces
 22 | 
 23 | This project exposes multiple interfaces to its spellchecker, listed in this
 24 | section.
 25 | 
 26 | ### Command Line Interface
 27 | 
 28 | Just want to use this spellchecker from the command line? Check out the book,
 29 | located here <https://pluots.github.io/zspell/>, for a more in-depth explanation
 30 | of installation and usage.
 31 | 
 32 | If you don't want to read further, the easiest way to get started is to download
 33 | a prebuilt binary from here: <https://github.com/pluots/zspell/releases>.
 34 | 
 35 | ### Rust Library Interface
 36 | 
 37 | This project also aims to create a fully functional spellchecking library, for
 38 | easy programmatic use. See the documentation for the library side here
 39 | <https://docs.rs/zspell/>. This also includes a lot of design methodology
 40 | discussions, for those who are interested.
 41 | 
 42 | ### Python Interface
 43 | 
 44 | There is a python wrapper for this library with prebuilt wheels, available here:
 45 | <https://pypi.org/project/zspell/>. Its source is located in the
 46 | [zspell-py crate](zspell-py).
 47 | 
 48 | ### Usage via WASM
 49 | 
 50 | The library API should work out of the box. Official WASM bindings will be added
 51 | at some point.
 52 | 
 53 | ## Feature Status
 54 | 
 55 | | Feature                        | Available via Library | Available via CLI | Tracking Issue                                    |
 56 | | ------------------------------ | --------------------- | ----------------- | ------------------------------------------------- |
 57 | | Basic spellcheck functionality | ✓                     | ✓                 |                                                   |
 58 | | Forbidden word handling        | ✓                     | ✓                 | [#17](https://github.com/pluots/zspell/issues/17) |
 59 | | Stemming                       | ✓                     | ✓                 |                                                   |
 60 | | Morph analysis                 | ✓                     | ✓                 |                                                   |
 61 | | Suggestions                    | WIP                   | ✕                 | [#16](https://github.com/pluots/zspell/issues/16) |
 62 | | Compound word handling         | ✕                     | ✕                 |                                                   |
 63 | | Full Morph/Phone Handling      | WIP                   | ✕                 |                                                   |
 64 | | Python Interface               | Beta                  | N/A               | [#18](https://github.com/pluots/zspell/issues/18) |
 65 | | Prebuilt WASM bindings         | ✕                     | N/A               | [#19](https://github.com/pluots/zspell/issues/19) |
 66 | 
 67 | ## Performance
 68 | 
 69 | This repository has the goal of highly prioritizing the most expected usage,
 70 | i.e., that most words to be checked are correct. With optimizations based around
 71 | this concept and with the modern computers now able to store entire compiled
 72 | word lists in memory (~20 MiB), `zspell` tends to outperform other
 73 | spellcheckers.
 74 | 
 75 | ## MSRV
 76 | 
 77 | This library relies on features from Rust 1.65, so that is our current minimum
 78 | supported version. Our CI validates this for the library and examples.
 79 | 
 80 | The CLI and test runner require newer features and do not keep a specific MSRV.
 81 | 
 82 | ## Test suite
 83 | 
 84 | This project keeps a test suite located in `zspell/test-suite` (symlinked to
 85 | `test-suite`). Each file has a simple format that combines a simple affix and
 86 | dictionary file. To add a test, just duplicate and edit `0-example.test`.
 87 | 
 88 | File names are as follows:
 89 | 
 90 | - `0-*`: meta tests that do not get run
 91 | - `b-*`: basic functionality tests
 92 | - `h-*`: tests that come from the Hunspell test suite
 93 | - `i000-*`: tests that address specific issues
 94 | 
 95 | ## License
 96 | 
 97 | See the LICENSE file for license information. The provided license does allow
 98 | for proprietary use and adaptation; that being said, I kindly suggest that if
 99 | you come up with an improvement, you submit a pull request and help us all out
100 | :)
101 | 
102 | ### Test suite license
103 | 
104 | Some tests are taken from Hunspell's test suite. Hunspell has various licenses,
105 | we select MPL and include a SPDX notice on relevant files.
106 | 
107 | ### Dictionary data license
108 | 
109 | The dictionaries provided in this repository for testing purposed have been
110 | obtained under license. These files have been sourced from here:
111 | [https://github.com/wooorm/dictionaries](https://github.com/wooorm/dictionaries)
112 | 
113 | These dictionaries are licensed under various licenses, different from that of
114 | this project. Please see the applicable `.license` file withing the
115 | `dictionaries/` directory.
116 | 


--------------------------------------------------------------------------------
/benches/findings.md:
--------------------------------------------------------------------------------
 1 | # Benchmark Findings
 2 | 
 3 | Simple notes from benchmarks that have been run
 4 | 
 5 | ## Collection types
 6 | 
 7 | Four collections were compared; `Vec` (as a baseline),
 8 | `std::collections::BTreeSet`, `std::collections::HashSet`, and
 9 | `hashbrown::HashSet`.  These were each tested on `.contains()` with values that
10 | did and did not exist, as well as
11 | 
12 | ```
13 |                Vec          BTreeSet     std HashSet    hashbrown HashSet
14 | contains       594  us      2.17 us      530 ns         279 ns
15 | not contains   1.91 us      2.40 us      436 ns         160 ns
16 | collect        18.3 us      301  us      204 us         120 us
17 | ```
18 | 
19 | The `HashSet` implementations significantly beat out other alternatives, and the
20 | `hashbrown` implementation outperformed `std::HashSet`. This is expected because
21 | `hashbrown` uses a faster hash that is not cryptographically secure (not a
22 | problem for our applications).
23 | 
24 | For some reason, the improvements going from `std` to `hashbrown` don't really
25 | seem to show up for the dictionary integration tests. This will take some
26 | looking into.
27 | 
28 | ## Slice `contains` vs. `binary_search`
29 | 
30 | Overall, the price of sorting doesn't seem to have any payoff, especially for
31 | our use cases of short arrays. If it is already sorted then we can save time,
32 | about 20% on average.
33 | 


--------------------------------------------------------------------------------
/benches/logbench.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | """Run `cargo bench`, print the output with CPU information to a timestamped
  3 | file.
  4 | 
  5 | Does not work on Windows (WSL works).
  6 | """
  7 | 
  8 | 
  9 | import platform
 10 | import subprocess as sp
 11 | import sys
 12 | import time
 13 | from datetime import datetime
 14 | from inspect import cleandoc
 15 | from pathlib import Path
 16 | 
 17 | 
 18 | def decode_sp_out(b: bytes) -> str:
 19 |     return b.decode(errors="ignore").strip()
 20 | 
 21 | 
 22 | def get_dtime() -> str:
 23 |     return datetime.utcnow().strftime(r"%Y-%m-%d_%H%M")
 24 | 
 25 | 
 26 | def git_describe() -> str:
 27 |     args = ["git", "describe", "--always", "--tags"]
 28 |     return decode_sp_out(sp.check_output(args))
 29 | 
 30 | 
 31 | def get_fpath(dtime: str, describe: str) -> tuple[str, Path]:
 32 |     fname = f"{describe}_{dtime}.bench"
 33 |     fpath = Path(__file__).resolve().parents[0] / "results" / fname
 34 |     return (fname, fpath)
 35 | 
 36 | 
 37 | def rustc_version() -> str:
 38 |     return decode_sp_out(sp.check_output(["rustc", "--version"]))
 39 | 
 40 | 
 41 | def get_cpu_info() -> str:
 42 |     s = ""
 43 |     if platform.system() == "Darwin":
 44 |         cmd = ["sysctl", "-n", "machdep.cpu.brand_string"]
 45 |         s += decode_sp_out(sp.check_output(cmd))
 46 |     else:
 47 |         tmp = decode_sp_out(sp.check_output("lscpu"))
 48 |         for line in tmp.splitlines():
 49 |             if (
 50 |                 "Architecture" in line
 51 |                 or "Model name" in line
 52 |                 or "Socket" in line
 53 |                 or "Thread" in line
 54 |                 or "CPU(s)" in line
 55 |                 or "MHz" in line
 56 |             ):
 57 |                 s += line
 58 |     return s
 59 | 
 60 | 
 61 | def main():
 62 |     start_time = time.time()
 63 |     dtime = get_dtime()
 64 |     describe = git_describe()
 65 |     fname, fpath = get_fpath(dtime, describe)
 66 |     version = rustc_version()
 67 |     cpu_info = get_cpu_info()
 68 |     cmd = ["cargo", "bench", "--features", "unstable-bench"]
 69 |     cmd += sys.argv[1:]
 70 | 
 71 |     header_str = (
 72 |         cleandoc(
 73 |             f"""
 74 |         {fname}
 75 | 
 76 |         Benchmark from {dtime} on commit {describe}
 77 |         {version}
 78 | 
 79 |         CPU Information:
 80 |         {cpu_info}
 81 | 
 82 |         Running: '{" ".join(cmd)}'
 83 |         """
 84 |         )
 85 |         + "\n\n\n"
 86 |     )
 87 | 
 88 |     print(header_str)
 89 |     output = header_str
 90 | 
 91 |     with sp.Popen(cmd, stdout=sp.PIPE, bufsize=1, universal_newlines=True) as p:
 92 |         for line in p.stdout:
 93 |             print(line, end="")  # process line here
 94 |             output += line
 95 | 
 96 |     if p.returncode != 0:
 97 |         print("\nCommand did not complete successfully")
 98 |         exit(p.returncode)
 99 | 
100 |     end_time = time.time()
101 |     elapsed_time = end_time - start_time
102 |     time_str = f"\nTotal execution time: {time.strftime('%H:%M:%S', time.gmtime(elapsed_time))}"
103 |     output += time_str
104 |     print(time_str)
105 |     print("\nWriting file '{fpath}'...", end="")
106 | 
107 |     with open(fpath, "w") as f:
108 |         f.write(output)
109 | 
110 |     print(" Done!")
111 | 
112 | 
113 | if __name__ == "__main__":
114 |     main()
115 | 


--------------------------------------------------------------------------------
/benches/results/v0.2.2-27-g87c32f5_2022-12-25_2135.bench:
--------------------------------------------------------------------------------
  1 | v0.2.2-27-g87c32f5_2022-12-25_2135.bench
  2 | 
  3 | Benchmark from 2022-12-25_2135 on commit v0.2.2-27-g87c32f5
  4 | rustc 1.68.0-nightly (b569c9dc5 2022-12-21)
  5 | 
  6 | CPU Information:
  7 | Intel(R) Core(TM) i5-5257U CPU @ 2.70GHz
  8 | 
  9 | Running: 'cargo bench --bench datastructure'
 10 | 
 11 | 
 12 | Vec contains true       time:   [569.22 µs 583.36 µs 600.88 µs]
 13 |                         change: [+13.517% +17.843% +22.890%] (p = 0.00 < 0.05)
 14 |                         Performance has regressed.
 15 | Found 2 outliers among 100 measurements (2.00%)
 16 |   2 (2.00%) high severe
 17 | 
 18 | Vec contains false      time:   [1.7272 ms 1.7367 ms 1.7486 ms]
 19 |                         change: [-2.3433% -0.7517% +0.5590%] (p = 0.35 > 0.05)
 20 |                         No change in performance detected.
 21 | Found 6 outliers among 100 measurements (6.00%)
 22 |   2 (2.00%) high mild
 23 |   4 (4.00%) high severe
 24 | 
 25 | BTree contains true     time:   [2.0813 µs 2.0988 µs 2.1284 µs]
 26 |                         change: [-33.216% -18.618% -5.3123%] (p = 0.02 < 0.05)
 27 |                         Performance has improved.
 28 | Found 5 outliers among 100 measurements (5.00%)
 29 |   5 (5.00%) high severe
 30 | 
 31 | BTree contains false    time:   [2.4196 µs 2.4301 µs 2.4494 µs]
 32 |                         change: [-2.6558% -2.0096% -1.1657%] (p = 0.00 < 0.05)
 33 |                         Performance has improved.
 34 | Found 12 outliers among 100 measurements (12.00%)
 35 |   2 (2.00%) high mild
 36 |   10 (10.00%) high severe
 37 | 
 38 | HashSet contains true   time:   [478.58 ns 479.76 ns 481.22 ns]
 39 |                         change: [-1.7564% -1.4741% -1.2080%] (p = 0.00 < 0.05)
 40 |                         Performance has improved.
 41 | Found 12 outliers among 100 measurements (12.00%)
 42 |   3 (3.00%) high mild
 43 |   9 (9.00%) high severe
 44 | 
 45 | HashSet contains false  time:   [393.25 ns 434.25 ns 490.96 ns]
 46 |                         change: [-0.3539% +3.7872% +10.037%] (p = 0.19 > 0.05)
 47 |                         No change in performance detected.
 48 | Found 18 outliers among 100 measurements (18.00%)
 49 |   2 (2.00%) high mild
 50 |   16 (16.00%) high severe
 51 | 
 52 | HashBrownSet contains true
 53 |                         time:   [329.93 ns 358.98 ns 394.22 ns]
 54 |                         change: [+11.832% +15.944% +20.455%] (p = 0.00 < 0.05)
 55 |                         Performance has regressed.
 56 | Found 4 outliers among 100 measurements (4.00%)
 57 |   1 (1.00%) high mild
 58 |   3 (3.00%) high severe
 59 | 
 60 | HashBrownSet contains false
 61 |                         time:   [209.08 ns 233.62 ns 266.77 ns]
 62 |                         change: [+59.011% +105.51% +168.79%] (p = 0.00 < 0.05)
 63 |                         Performance has regressed.
 64 | Found 16 outliers among 100 measurements (16.00%)
 65 |   8 (8.00%) high mild
 66 |   8 (8.00%) high severe
 67 | 
 68 | VecMap contains true    time:   [1.3709 ms 1.5246 ms 1.6924 ms]
 69 |                         change: [+63.260% +80.742% +100.35%] (p = 0.00 < 0.05)
 70 |                         Performance has regressed.
 71 | Found 7 outliers among 100 measurements (7.00%)
 72 |   6 (6.00%) high mild
 73 |   1 (1.00%) high severe
 74 | 
 75 | VecMap contains false   time:   [3.7792 ms 4.1292 ms 4.5332 ms]
 76 |                         change: [+46.003% +60.872% +75.969%] (p = 0.00 < 0.05)
 77 |                         Performance has regressed.
 78 | Found 4 outliers among 100 measurements (4.00%)
 79 |   2 (2.00%) high mild
 80 |   2 (2.00%) high severe
 81 | 
 82 | VecMap get true         time:   [834.35 µs 884.64 µs 937.83 µs]
 83 |                         change: [+15.380% +22.165% +29.051%] (p = 0.00 < 0.05)
 84 |                         Performance has regressed.
 85 | Found 7 outliers among 100 measurements (7.00%)
 86 |   5 (5.00%) high mild
 87 |   2 (2.00%) high severe
 88 | 
 89 | VecMap get false        time:   [4.9732 ms 5.5763 ms 6.2481 ms]
 90 |                         change: [+91.748% +117.22% +146.27%] (p = 0.00 < 0.05)
 91 |                         Performance has regressed.
 92 | Found 4 outliers among 100 measurements (4.00%)
 93 |   3 (3.00%) high mild
 94 |   1 (1.00%) high severe
 95 | 
 96 | BTreeMap contains true  time:   [2.1304 µs 2.1587 µs 2.1911 µs]
 97 |                         change: [+3.8901% +6.2983% +9.0957%] (p = 0.00 < 0.05)
 98 |                         Performance has regressed.
 99 | Found 7 outliers among 100 measurements (7.00%)
100 |   5 (5.00%) high mild
101 |   2 (2.00%) high severe
102 | 
103 | BTreeMap contains false time:   [3.3337 µs 4.1256 µs 4.9965 µs]
104 |                         change: [+18.633% +34.493% +54.500%] (p = 0.00 < 0.05)
105 |                         Performance has regressed.
106 | Found 17 outliers among 100 measurements (17.00%)
107 |   2 (2.00%) high mild
108 |   15 (15.00%) high severe
109 | 
110 | BTreeMap get true       time:   [2.2010 µs 2.2381 µs 2.2866 µs]
111 |                         change: [-13.097% -7.2918% -1.8966%] (p = 0.01 < 0.05)
112 |                         Performance has improved.
113 | Found 4 outliers among 100 measurements (4.00%)
114 |   3 (3.00%) high mild
115 |   1 (1.00%) high severe
116 | 
117 | BTreeMap get false      time:   [2.6190 µs 2.6996 µs 2.7880 µs]
118 |                         change: [-31.970% -14.791% +1.5950%] (p = 0.19 > 0.05)
119 |                         No change in performance detected.
120 | Found 5 outliers among 100 measurements (5.00%)
121 |   5 (5.00%) high mild
122 | 
123 | HashMap contains true   time:   [521.98 ns 532.74 ns 544.49 ns]
124 |                         change: [-4.4057% +1.0033% +6.5691%] (p = 0.73 > 0.05)
125 |                         No change in performance detected.
126 | Found 6 outliers among 100 measurements (6.00%)
127 |   3 (3.00%) high mild
128 |   3 (3.00%) high severe
129 | 
130 | HashMap contains false  time:   [439.96 ns 506.63 ns 592.46 ns]
131 |                         change: [-14.677% -3.6708% +7.9234%] (p = 0.56 > 0.05)
132 |                         No change in performance detected.
133 | Found 13 outliers among 100 measurements (13.00%)
134 |   5 (5.00%) high mild
135 |   8 (8.00%) high severe
136 | 
137 | HashMap get true        time:   [515.70 ns 522.12 ns 529.77 ns]
138 |                         change: [+8.3012% +13.486% +19.008%] (p = 0.00 < 0.05)
139 |                         Performance has regressed.
140 | Found 14 outliers among 100 measurements (14.00%)
141 |   6 (6.00%) high mild
142 |   8 (8.00%) high severe
143 | 
144 | HashMap get false       time:   [418.13 ns 441.50 ns 478.80 ns]
145 |                         change: [-33.490% -21.735% -9.8900%] (p = 0.00 < 0.05)
146 |                         Performance has improved.
147 | Found 12 outliers among 100 measurements (12.00%)
148 |   7 (7.00%) high mild
149 |   5 (5.00%) high severe
150 | 
151 | HashBrownMap contains true
152 |                         time:   [287.48 ns 288.11 ns 288.81 ns]
153 |                         change: [-27.185% -21.547% -15.816%] (p = 0.00 < 0.05)
154 |                         Performance has improved.
155 | Found 17 outliers among 100 measurements (17.00%)
156 |   4 (4.00%) high mild
157 |   13 (13.00%) high severe
158 | 
159 | HashBrownMap contains false
160 |                         time:   [190.02 ns 233.14 ns 291.77 ns]
161 |                         change: [+15.667% +28.449% +44.790%] (p = 0.00 < 0.05)
162 |                         Performance has regressed.
163 | Found 8 outliers among 100 measurements (8.00%)
164 |   3 (3.00%) high mild
165 |   5 (5.00%) high severe
166 | 
167 | HashBrownMap get true   time:   [302.78 ns 336.35 ns 388.80 ns]
168 | Found 8 outliers among 100 measurements (8.00%)
169 |   5 (5.00%) high mild
170 |   3 (3.00%) high severe
171 | 
172 | HashBrownMap get false  time:   [172.41 ns 175.16 ns 178.23 ns]
173 | Found 1 outliers among 100 measurements (1.00%)
174 |   1 (1.00%) high mild
175 | 


--------------------------------------------------------------------------------
/benches/results/v0.3.3-3-g1e7d5fd_2023-01-02_0118.bench:
--------------------------------------------------------------------------------
 1 | v0.3.3-3-g1e7d5fd_2023-01-02_0118.bench
 2 | 
 3 | Benchmark from 2023-01-02_0118 on commit v0.3.3-3-g1e7d5fd
 4 | rustc 1.68.0-nightly (77429957a 2023-01-01)
 5 | 
 6 | CPU Information:
 7 | Intel(R) Core(TM) i5-5257U CPU @ 2.70GHz
 8 | 
 9 | Running: 'cargo bench --features benchmarking --bench dict_integration'
10 | 
11 | 
12 | Parse affix file        time:   [1.7233 ms 1.7281 ms 1.7355 ms]
13 |                         change: [-0.7695% +0.2918% +1.6412%] (p = 0.67 > 0.05)
14 |                         No change in performance detected.
15 | Found 15 outliers among 100 measurements (15.00%)
16 |   3 (3.00%) high mild
17 |   12 (12.00%) high severe
18 | 
19 | Parse dict file         time:   [68.950 ms 69.097 ms 69.264 ms]
20 |                         change: [-1.8725% -1.4052% -0.9341%] (p = 0.00 < 0.05)
21 |                         Change within noise threshold.
22 | Found 4 outliers among 100 measurements (4.00%)
23 |   2 (2.00%) high mild
24 |   2 (2.00%) high severe
25 | 
26 | Spellcheck: compile dictionary
27 |                         time:   [289.23 ms 293.48 ms 300.70 ms]
28 |                         change: [+1.3808% +2.8849% +5.6673%] (p = 0.00 < 0.05)
29 |                         Performance has regressed.
30 | Found 5 outliers among 100 measurements (5.00%)
31 |   2 (2.00%) high mild
32 |   3 (3.00%) high severe
33 | 
34 | Spellcheck: 1 correct word
35 |                         time:   [177.69 ns 177.83 ns 178.01 ns]
36 |                         change: [-5.0761% -2.1543% -0.1512%] (p = 0.09 > 0.05)
37 |                         No change in performance detected.
38 | Found 17 outliers among 100 measurements (17.00%)
39 |   6 (6.00%) high mild
40 |   11 (11.00%) high severe
41 | 
42 | Spellcheck: 1 incorrect word
43 |                         time:   [207.22 ns 207.87 ns 208.60 ns]
44 |                         change: [+2.3077% +2.8911% +3.6198%] (p = 0.00 < 0.05)
45 |                         Performance has regressed.
46 | Found 7 outliers among 100 measurements (7.00%)
47 |   4 (4.00%) high mild
48 |   3 (3.00%) high severe
49 | 
50 | Spellcheck: 15 correct words
51 |                         time:   [5.7807 µs 5.7842 µs 5.7888 µs]
52 |                         change: [+0.1442% +0.9362% +1.8589%] (p = 0.02 < 0.05)
53 |                         Change within noise threshold.
54 | Found 14 outliers among 100 measurements (14.00%)
55 |   4 (4.00%) high mild
56 |   10 (10.00%) high severe
57 | 
58 | Spellcheck: 15 incorrect words
59 |                         time:   [6.6361 µs 6.6398 µs 6.6443 µs]
60 |                         change: [-1.1998% -0.6706% -0.0092%] (p = 0.02 < 0.05)
61 |                         Change within noise threshold.
62 | Found 15 outliers among 100 measurements (15.00%)
63 |   4 (4.00%) high mild
64 |   11 (11.00%) high severe
65 | 
66 | Spellcheck: 188 word paragraph
67 |                         time:   [1.0548 µs 1.0564 µs 1.0585 µs]
68 |                         change: [+0.5199% +1.3356% +2.0162%] (p = 0.00 < 0.05)
69 |                         Change within noise threshold.
70 | Found 13 outliers among 100 measurements (13.00%)
71 |   5 (5.00%) high mild
72 |   8 (8.00%) high severe
73 | 
74 | 
75 | Total execution time: 00:04:21
76 | 


--------------------------------------------------------------------------------
/book.toml:
--------------------------------------------------------------------------------
1 | [book]
2 | authors = ["Trevor Gross"]
3 | language = "en"
4 | multilingual = false
5 | src = "book_src"
6 | title = "The ZSpell Book"
7 | 


--------------------------------------------------------------------------------
/book_src/SUMMARY.md:
--------------------------------------------------------------------------------
1 | # Summary
2 | 
3 | - [Introduction](./introduction.md)
4 | - [Installation](./installation.md)
5 | - [Usage](./usage.md)
6 | 


--------------------------------------------------------------------------------
/book_src/installation.md:
--------------------------------------------------------------------------------
 1 | # Installation
 2 | 
 3 | ## Installing a prebuilt binary
 4 | 
 5 | The easiest way to get started is to download a prebuilt binary for your system.
 6 | Binaries are avilable for for Windows, Linux, and Mac on the x86_64 platform.
 7 | These do not require anything else to be installed.
 8 | 
 9 | Head to <https://github.com/pluots/zspell/releases> and download the latest
10 | binary for your system. Simply extract the download and run the executable.
11 | 
12 | If you would like the tool to be accessible from anywhere on your system, you
13 | will need to copy or link this executable to a location that is in your system
14 | path.
15 | 
16 | ## Installing via Cargo
17 | 
18 | If you already have rust installed and would like to install zspell via Cargo,
19 | this is fairly straightforward:
20 | 
21 | ```sh
22 | cargo install zspell
23 | ```
24 | 
25 | ## Building from source
26 | 
27 | If you would like to build the latest version (potentially unreleased) from
28 | source without installing (e.g. for development purposes), that can be done as
29 | follows:
30 | 
31 | ```sh
32 | git clone https://github.com/pluots/zspell
33 | cd zspell
34 | cargo build --release
35 | ```
36 | 


--------------------------------------------------------------------------------
/book_src/introduction.md:
--------------------------------------------------------------------------------
1 | # Introduction
2 | 
3 | For the time being, this book is still very much under construction. However,
4 | there is still some good information to be found in the available sections, so
5 | please feel free to take a look.
6 | 


--------------------------------------------------------------------------------
/book_src/usage.md:
--------------------------------------------------------------------------------
1 | # Usage
2 | 
3 | This program can be run with the following:
4 | 
5 | ```sh
6 | zspell -d dictionaries/en_US
7 | ```
8 | 


--------------------------------------------------------------------------------
/clippy.toml:
--------------------------------------------------------------------------------
1 | # for performance, we always want to use HashBrown
2 | disallowed-types = ["std::collections::HashMap", "std::collections::HashSet"]
3 | doc-valid-idents = ["ZSpell"]
4 | 


--------------------------------------------------------------------------------
/dictionaries/README.md:
--------------------------------------------------------------------------------
1 | These dictionary files come from Titus Wormer's repository here:
2 | [https://github.com/wooorm/dictionaries](https://github.com/wooorm/dictionaries)
3 | 
4 | They have been automatically collected.
5 | 
6 | These dictionaries are licensed under various licenses, different from that of
7 | this project. Please see the applicable `.license` file withing this directory.
8 | 


--------------------------------------------------------------------------------
/release.toml:
--------------------------------------------------------------------------------
1 | allow-branch = ["main"]
2 | shared-version = true
3 | # Single commit for all crates since we are in one repo
4 | consolidate-commits = true
5 | tag-name = "v{{version}}"
6 | 


--------------------------------------------------------------------------------
/rustfmt.toml:
--------------------------------------------------------------------------------
1 | imports_granularity = "Module"
2 | newline_style = "Unix"
3 | group_imports = "StdExternalCrate"
4 | format_code_in_doc_comments = true
5 | format_macro_bodies = true
6 | format_macro_matchers = true
7 | 


--------------------------------------------------------------------------------
/test-suite:
--------------------------------------------------------------------------------
1 | zspell/test-suite/


--------------------------------------------------------------------------------
/update_dictionaries.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | """This script downloads files from the github `wooorm/dictionaries` repository.
  4 | 
  5 | Be sure to obey licensing.
  6 | """
  7 | 
  8 | import argparse
  9 | import base64
 10 | import json
 11 | import os
 12 | import urllib.request
 13 | from dataclasses import dataclass
 14 | from typing import Any
 15 | 
 16 | # Path to directory with all dictionaries
 17 | ROOT_GH_URL = "https://api.github.com/repos/wooorm/dictionaries/contents/dictionaries"
 18 | 
 19 | 
 20 | @dataclass
 21 | class AuthInfo:
 22 |     """Login information"""
 23 | 
 24 |     username: str
 25 |     token: str
 26 | 
 27 | 
 28 | @dataclass
 29 | class LangDict:
 30 |     """Represent the URLs for a specific language"""
 31 | 
 32 |     name: str
 33 |     dir_url: str
 34 |     dict_url: str = None
 35 |     affix_url: str = None
 36 |     license_url: str = None
 37 | 
 38 |     def set_urls(self, auth: AuthInfo | None):
 39 |         """Set dict, affix, and license URLs from the name and dir URL"""
 40 |         listing: list[dict[str, Any]] = get_url_data_json(self.dir_url, auth)
 41 |         self.dict_url = next(
 42 |             d["download_url"] for d in listing if d["name"].endswith(".dic")
 43 |         )
 44 |         self.affix_url = next(
 45 |             d["download_url"] for d in listing if d["name"].endswith(".aff")
 46 |         )
 47 |         self.license_url = next(
 48 |             d["download_url"] for d in listing if d["name"].lower() == "license"
 49 |         )
 50 | 
 51 |     def download(self, path: str, auth: AuthInfo | None) -> None:
 52 |         """Download the files to a designated path"""
 53 |         print(f"Downloading files for language '{self.name}'")
 54 | 
 55 |         dict_path = f"{path}/{self.dict_fname}"
 56 |         affix_path = f"{path}/{self.affix_fname}"
 57 |         license_path = f"{path}/{self.license_fname}"
 58 |         all_paths = (dict_path, affix_path, license_path)
 59 | 
 60 |         for fname in all_paths:
 61 |             if os.path.exists(fname):
 62 |                 print(f"Language '{self.name}' already exists, found '{fname}'")
 63 |                 print("Skipping")
 64 |                 return
 65 | 
 66 |         download_file(self.dict_url, f"{path}/{self.dict_fname}.tmp", auth)
 67 |         download_file(self.affix_url, f"{path}/{self.affix_fname}.tmp", auth)
 68 |         download_file(self.license_url, f"{path}/{self.license_fname}.tmp", auth)
 69 | 
 70 |         # If all goes well, there will be no problems. If one failed, program would abort
 71 |         # Now remove the old ones, if present
 72 |         for fname in all_paths:
 73 |             if os.path.exists(fname):
 74 |                 os.remove(fname)
 75 | 
 76 |             # And replace with the new
 77 |             os.rename(f"{fname}.tmp", f"{fname}")
 78 | 
 79 |         print(f"Finished downloading files for '{self.name}'")
 80 | 
 81 |     @property
 82 |     def dict_fname(self):
 83 |         return f"{self.name}.dic"
 84 | 
 85 |     @property
 86 |     def affix_fname(self):
 87 |         return f"{self.name}.aff"
 88 | 
 89 |     @property
 90 |     def license_fname(self):
 91 |         return f"{self.name}.license"
 92 | 
 93 | 
 94 | def make_req(url: str, auth: AuthInfo | None) -> str | urllib.request.Request:
 95 |     """Make a request with auth information"""
 96 |     if auth is None:
 97 |         return url
 98 | 
 99 |     auth_str = base64.b64encode(bytes(f"{auth.username}:{auth.token}", "utf8"))
100 |     req = urllib.request.Request(url)
101 |     req.add_header("Authorization", f"Basic {auth_str}")
102 |     return req
103 | 
104 | 
105 | def get_url_data_json(url: str, auth: AuthInfo | None):
106 |     return json.loads(urllib.request.urlopen(make_req(url, auth)).read())
107 | 
108 | 
109 | def download_file(url: str, path: str, auth: AuthInfo | None):
110 |     return urllib.request.urlretrieve(make_req(url, auth), path)
111 | 
112 | 
113 | def parse_args():
114 |     parser = argparse.ArgumentParser(
115 |         prog="Dictionary downloader",
116 |         description="Download dictionaries for development",
117 |     )
118 |     parser.add_argument(
119 |         "languages", nargs="+", help="Specify language codes to download"
120 |     )
121 |     parser.add_argument("--username", help="specify a github username")
122 |     parser.add_argument("--token", help="specify a github token")
123 |     parser.add_argument(
124 |         "--output-dir", help="specify the output directory", default="dictionaries"
125 |     )
126 |     args = parser.parse_args()
127 |     return args
128 | 
129 | 
130 | def make_lang_dicts(languages: list[str], auth: AuthInfo) -> list[LangDict]:
131 |     print("Gathering listing")
132 | 
133 |     listing_data: list[dict] = get_url_data_json(ROOT_GH_URL, auth)
134 | 
135 |     lang_dicts: list[LangDict] = []
136 | 
137 |     for lang in languages:
138 |         lang_name = lang.replace("_", "-")
139 |         listing = next(
140 |             (listing for listing in listing_data if listing.get("name") == lang_name),
141 |             None,
142 |         )
143 |         if listing is None:
144 |             print(f"Unable to find language {lang}")
145 |             exit(1)
146 |         lang_dicts.append(LangDict(listing["name"], listing["url"]))
147 | 
148 |     return lang_dicts
149 | 
150 | 
151 | def main():
152 |     print(__doc__)
153 |     args = parse_args()
154 |     username = args.username or os.environ.get("GH_USERNAME")
155 |     token = args.token or os.environ.get("GH_TOKEN")
156 | 
157 |     if username is None or token is None:
158 |         print("Not using authentication, large requests may fail")
159 |         auth = None
160 |     else:
161 |         print("Using token authentication")
162 |         auth = AuthInfo(username, token)
163 | 
164 |     print(username, token)
165 |     lang_dicts = make_lang_dicts(args.languages, auth)
166 | 
167 |     for ldict in lang_dicts:
168 |         ldict.set_urls(auth)
169 |         ldict.download(args.output_dir, auth)
170 | 
171 | 
172 | if __name__ == "__main__":
173 |     main()
174 | 


--------------------------------------------------------------------------------
/zspell-cli/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "zspell-cli"
 3 | version = "0.5.5"
 4 | edition = "2021"
 5 | publish = true
 6 | description = "Command line interface for the ZSpell spellchecking library"
 7 | rust-version = "1.70"
 8 | 
 9 | 
10 | # Note: this is symlinked from top level
11 | readme = "README.md"
12 | license-file = "LICENSE"
13 | 
14 | documentation = "https://docs.rs/zspell"
15 | repository = "https://github.com/pluots/zspell"
16 | 
17 | keywords = ["spellcheck", "spelling", "cli"]
18 | categories = ["algorithms", "text-processing", "command-line-utilities"]
19 | 
20 | # Required to make name "zspell" instead of "zspell-cli"
21 | [[bin]]
22 | name = "zspell"
23 | path = "src/main.rs"
24 | 
25 | [dependencies]
26 | cfg-if = "1.0"
27 | clap = { version = "4.4.18", features = ["derive", "wrap_help"] }
28 | hex = "0.4"
29 | indicatif = "0.17"
30 | serde = { version = "1.0.203", features = ["derive"] }
31 | serde_json = "1.0.117"
32 | sha1 = "0.10.6"
33 | stringmetrics = "2.2"
34 | termcolor = "1.4.1"
35 | anyhow = "1.0.86"
36 | zspell = { path = "../zspell", version = "0.5.5", features = ["zspell-unstable"] }
37 | ureq = { version = "2.9.7", features = ["json"] }
38 | zspell-index = "0.5.0"
39 | 
40 | [dev-dependencies]
41 | criterion = "0.5"
42 | assert_cmd = "2.0"
43 | predicates = "3.1"
44 | tempfile = "3.10"
45 | httpmock = "0.7"
46 | # util = { path = "util" }
47 | 
48 | [build-dependencies]
49 | clap = { version = "4.4", features = ["derive", "wrap_help"] }
50 | clap_mangen = "0.2"
51 | clap_complete = "4.4"
52 | 
53 | [package.metadata.release]
54 | shared-version = true
55 | allow-branch = ["main", "release"]
56 | 
57 | [[package.metadata.release.pre-release-replacements]]
58 | file = "Cargo.toml"
59 | # Need \d match so we don't accidentally match our pattern here
60 | search = 'zspell = \{ path = "../zspell", version = "[\d\.]*", features = \["zspell-unstable"\] \}'
61 | replace = 'zspell = { path = "../zspell", version = "{{version}}", features = ["zspell-unstable"] }'
62 | 


--------------------------------------------------------------------------------
/zspell-cli/LICENSE:
--------------------------------------------------------------------------------
1 | ../LICENSE


--------------------------------------------------------------------------------
/zspell-cli/README.md:
--------------------------------------------------------------------------------
1 | ../README.md


--------------------------------------------------------------------------------
/zspell-cli/build.rs:
--------------------------------------------------------------------------------
 1 | use std::env;
 2 | use std::fs::File;
 3 | use std::io::Error;
 4 | // Need to rename PathBuf because of the `include!` macro
 5 | use std::path::{self, Path};
 6 | 
 7 | use clap::{Command, CommandFactory};
 8 | use clap_complete::generate_to;
 9 | use clap_complete::shells::Shell;
10 | 
11 | include!("src/cli/mod.rs");
12 | 
13 | fn build_shell_completion(cmd: &mut Command, outdir: &path::PathBuf) -> Result<(), Error> {
14 |     // Generate shell completion scripts for our
15 |     for shell in [
16 |         Shell::Bash,
17 |         Shell::Elvish,
18 |         Shell::Fish,
19 |         Shell::PowerShell,
20 |         Shell::Zsh,
21 |     ] {
22 |         let path = generate_to(
23 |             shell, cmd,      // We need to specify what generator to use
24 |             "zspell", // We need to specify the bin name manually
25 |             outdir,   // We need to specify where to write
26 |         )?;
27 | 
28 |         println!("cargo:warning=completion file written to {path:?}");
29 |     }
30 | 
31 |     Ok(())
32 | }
33 | 
34 | fn build_man_pages(cmd: Command, outdir: &Path) -> Result<(), Error> {
35 |     // Generate man pages
36 |     let man = clap_mangen::Man::new(cmd);
37 |     let mut buffer: Vec<u8> = Default::default();
38 | 
39 |     man.render(&mut buffer)?;
40 | 
41 |     let manpage_out = outdir.join("zspell.1");
42 | 
43 |     println!("cargo:warning=manpage written to {manpage_out:?}");
44 | 
45 |     std::fs::write(manpage_out, buffer)?;
46 | 
47 |     Ok(())
48 | }
49 | 
50 | fn main() -> Result<(), Error> {
51 |     // Output directory will be a cargo-generated random directory
52 |     let outdir = match env::var_os("OUT_DIR") {
53 |         Some(outdir) => std::path::PathBuf::from(outdir),
54 |         None => return Ok(()),
55 |     };
56 | 
57 |     let profile = std::env::var("PROFILE").unwrap();
58 | 
59 |     // Don't generate outputs if we're in debug mode
60 |     match profile.as_str() {
61 |         "debug" => (),
62 |         _ => {
63 |             // Create a dummy file to help find the latest output
64 |             let stamp_path = Path::new(&outdir).join("zspell-stamp");
65 |             if let Err(err) = File::create(&stamp_path) {
66 |                 panic!("failed to write {}: {}", stamp_path.display(), err);
67 |             }
68 | 
69 |             let mut cmd = Cli::command();
70 | 
71 |             build_shell_completion(&mut cmd, &outdir)?;
72 |             build_man_pages(cmd, &outdir)?;
73 |         }
74 |     }
75 | 
76 |     Ok(())
77 | }
78 | 


--------------------------------------------------------------------------------
/zspell-cli/src/cli/mod.rs:
--------------------------------------------------------------------------------
 1 | use std::path::PathBuf;
 2 | 
 3 | use clap::{Parser, Subcommand};
 4 | 
 5 | #[derive(Parser, Debug)]
 6 | #[command(version, about, long_about = None)]
 7 | pub struct Cli {
 8 |     /// If specified, run spellchecking on a file
 9 |     pub file: Option<PathBuf>,
10 | 
11 |     /// Path to a dictionary file. Specify e.g. dictionaries/de_DE if
12 |     /// dictionaries/de_DE.aff and dictionaries/de_DE.dic exist
13 |     #[arg(short = 'd', long)]
14 |     pub dict_path: Option<String>,
15 | 
16 |     /// Whether to print misspelled words
17 |     #[arg(short = 'l', long, default_value_t = false)]
18 |     pub misspelled_words: bool,
19 | 
20 |     /// Whether to print lines with misspelled words
21 |     #[arg(short = 'L', long, default_value_t = false)]
22 |     pub misspelled_lines: bool,
23 | 
24 |     /// Print the a compiled dictionary's word list to stdout and exit
25 |     #[arg(long, default_value_t = false)]
26 |     pub generate_wordlist: bool,
27 | 
28 |     /// Enable morpological analysis mode
29 |     #[arg(short = 'm', long, default_value_t = false)]
30 |     pub analyze: bool,
31 | 
32 |     /// Enable word stemming mode
33 |     #[arg(short = 's', long, default_value_t = false)]
34 |     pub stem: bool,
35 | 
36 |     /// Print the search path and found dictionaries
37 |     #[arg(short = 'D', long, default_value_t = false)]
38 |     pub show_dictionaries: bool,
39 | 
40 |     /// Add a text or personal dictionary
41 |     #[arg(short = 't', long, default_value_t = false)]
42 |     pub text_dictionary: bool,
43 | 
44 |     #[command(subcommand)]
45 |     pub command: Option<Commands>,
46 | }
47 | 
48 | impl Cli {
49 |     pub fn validate(&self) -> Result<(), String> {
50 |         if self.analyze && self.stem {
51 |             Err("cannot use analysis and stemming together".into())
52 |         } else {
53 |             Ok(())
54 |         }
55 |     }
56 | }
57 | 
58 | #[derive(Subcommand, Debug)]
59 | pub enum Commands {
60 |     /// Calculate levenshtein distance
61 |     Lev {
62 |         /// The start string to calculate distance from
63 |         string_a: String,
64 | 
65 |         /// The end string to calculate distance to
66 |         string_b: String,
67 | 
68 |         /// Specify a maximum difference limit for the levenshthein distance
69 |         #[arg(short, long, default_value_t = 1000)]
70 |         limit: u32,
71 |     },
72 | }
73 | 


--------------------------------------------------------------------------------
/zspell-cli/src/main.rs:
--------------------------------------------------------------------------------
 1 | #![forbid(unsafe_code)]
 2 | 
 3 | use std::process::ExitCode;
 4 | 
 5 | use clap::Parser;
 6 | 
 7 | mod cli;
 8 | mod download;
 9 | mod spelling;
10 | 
11 | use spelling::spellcheck_cli;
12 | use stringmetrics::levenshtein_limit;
13 | 
14 | fn main() -> ExitCode {
15 |     let cli_parse = cli::Cli::parse();
16 |     if let Err(e) = cli_parse.validate() {
17 |         eprintln!("{e}");
18 |         return ExitCode::FAILURE;
19 |     }
20 | 
21 |     if let Some(cli::Commands::Lev {
22 |         string_a,
23 |         string_b,
24 |         limit,
25 |     }) = &cli_parse.command
26 |     {
27 |         println!("{}", levenshtein_limit(string_a, string_b, *limit));
28 |         return ExitCode::SUCCESS;
29 |     }
30 | 
31 |     spellcheck_cli(&cli_parse);
32 | 
33 |     ExitCode::SUCCESS
34 | }
35 | 


--------------------------------------------------------------------------------
/zspell-cli/src/spelling/mod.rs:
--------------------------------------------------------------------------------
  1 | //! Helpers for CLI spelling features
  2 | 
  3 | use std::io::{self, BufRead, Write};
  4 | use std::process::ExitCode;
  5 | use std::time::{Instant, SystemTime, UNIX_EPOCH};
  6 | 
  7 | use zspell::error::Error;
  8 | use zspell::system::{create_dict_from_path, PKG_NAME, PKG_VERSION};
  9 | use zspell::Dictionary;
 10 | 
 11 | use crate::cli::Cli;
 12 | 
 13 | // A reminder that code is written by humans
 14 | const SALUTATIONS: [&str; 9] = [
 15 |     "goodbye",
 16 |     "auf Wiedersehen",
 17 |     "adios",
 18 |     "au revoir",
 19 |     "arrivederci",
 20 |     "annyeong",
 21 |     "sayōnara",
 22 |     "see you later calculator",
 23 |     "abyssinia",
 24 | ];
 25 | 
 26 | pub fn spellcheck_cli(cli: &Cli) -> ExitCode {
 27 |     eprint!("{PKG_NAME} {PKG_VERSION} loading dictionaries... ");
 28 | 
 29 |     io::stdout().flush().unwrap();
 30 | 
 31 |     let dict_path = if let Some(v) = cli.dict_path.as_ref() {
 32 |         v.as_str()
 33 |     } else {
 34 |         eprintln!("Dictionary path not specified. Please specify with `-d /path/to/dic`.");
 35 |         return ExitCode::FAILURE;
 36 |     };
 37 | 
 38 |     let load_start = Instant::now();
 39 |     let dict = match create_dict_from_path(dict_path) {
 40 |         Ok(v) => v,
 41 |         Err(e) => {
 42 |             match e {
 43 |                 Error::Io(e) => eprintln!("IO error: {e}"),
 44 |                 Error::Parse(e) => eprintln!("Error parsing: {e}"),
 45 |                 Error::Build(e) => eprintln!("Error building: {e}"),
 46 |                 Error::Regex(e) => eprintln!("Regex error: {e}"),
 47 |                 _ => unreachable!(),
 48 |             };
 49 |             return ExitCode::FAILURE;
 50 |         }
 51 |     };
 52 |     let load_time = load_start.elapsed().as_secs_f32();
 53 |     let wc = dict.wordlist().inner().len() + dict.wordlist_nosuggest().inner().len();
 54 |     eprintln!("loaded {wc} words in {load_time:.2}s. started session");
 55 | 
 56 |     if cli.generate_wordlist {
 57 |         todo!();
 58 |         // for item in dic.iter_wordlist_items().unwrap() {
 59 |         //     println!("{item}");
 60 |         // }
 61 |     } else if cli.analyze {
 62 |         runner_morph_analysis(&dict);
 63 |     } else if cli.stem {
 64 |         runner_stemming(&dict);
 65 |     } else {
 66 |         runner_spellcheck(&dict);
 67 |     }
 68 | 
 69 |     // Quick RNG without external crates
 70 |     let bye = SALUTATIONS[SystemTime::now()
 71 |         .duration_since(UNIX_EPOCH)
 72 |         .unwrap()
 73 |         .as_micros() as usize
 74 |         % SALUTATIONS.len()];
 75 | 
 76 |     eprintln!("\n\nsession ended, {bye}");
 77 | 
 78 |     ExitCode::SUCCESS
 79 | }
 80 | 
 81 | fn runner_spellcheck(dict: &Dictionary) {
 82 |     stdin_interactive_runner(|line| {
 83 |         for (_, misspelled) in dict.check_indices(&line) {
 84 |             println!("{misspelled}");
 85 |         }
 86 |     })
 87 | }
 88 | 
 89 | /// From hunspell:
 90 | ///
 91 | /// ```text
 92 | /// banana foo drinkable dofjjos
 93 | /// banana banana
 94 | ///
 95 | /// foo foo
 96 | ///
 97 | /// drinkable drinkable
 98 | /// drinkable drink
 99 | ///
100 | /// dofjjos
101 | /// ```
102 | fn runner_stemming(dict: &Dictionary) {
103 |     stdin_interactive_runner(|line| {
104 |         for entry in dict.entries(&line) {
105 |             if let Some(stems) = entry.stems() {
106 |                 for stem in stems {
107 |                     println!("{} {stem}", entry.word());
108 |                 }
109 |             } else {
110 |                 println!("{}", entry.word())
111 |             }
112 |         }
113 |         println!();
114 |     })
115 | }
116 | 
117 | fn runner_morph_analysis(dict: &Dictionary) {
118 |     stdin_interactive_runner(|line| {
119 |         for entry in dict.entries(&line) {
120 |             print!("{}", entry.word());
121 | 
122 |             if let Some(morphs) = entry.analyze() {
123 |                 for morph in morphs {
124 |                     print!(" {morph}");
125 |                 }
126 |             } else {
127 |                 println!("{}", entry.word())
128 |             }
129 |         }
130 |         println!("\n");
131 |     })
132 | }
133 | 
134 | /// Common runner interface that calls a function once per stdin line
135 | // FIXME: if not a tty, lock output once before writing
136 | fn stdin_interactive_runner<F>(f: F)
137 | where
138 |     F: Fn(String),
139 | {
140 |     let stdin = io::stdin();
141 |     // This is a false positive, see clippy #9135
142 |     // #[allow(clippy::significant_drop_in_scrutinee)]
143 |     for line in stdin.lock().lines() {
144 |         let line_val = line.expect("received invalid input from IO!");
145 |         f(line_val);
146 |     }
147 | }
148 | 


--------------------------------------------------------------------------------
/zspell-cli/tests/cli_dict.rs:
--------------------------------------------------------------------------------
1 | //! Tests for the dictionary command line interface
2 | 


--------------------------------------------------------------------------------
/zspell-cli/tests/cli_lev.rs:
--------------------------------------------------------------------------------
 1 | //! Tests for the levenshtein command line interface
 2 | 
 3 | use std::process::Command; // Run programs
 4 | 
 5 | use assert_cmd::prelude::*; // Add methods on commands
 6 | use predicates::prelude::*; // Used for writing assertions
 7 | 
 8 | #[test]
 9 | fn lev_basic() -> Result<(), Box<dyn std::error::Error>> {
10 |     let mut cmd = Command::cargo_bin("zspell")?;
11 | 
12 |     cmd.arg("lev")
13 |         .arg("the quick brown fox")
14 |         .arg("the slow brown flocks");
15 |     cmd.assert().success().stdout(predicate::str::contains("9"));
16 | 
17 |     Ok(())
18 | }
19 | 


--------------------------------------------------------------------------------
/zspell-cli/tests/files/de_res.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pluots/zspell/497fb8fa7c6a98d879e7541be942efa54242f595/zspell-cli/tests/files/de_res.txt


--------------------------------------------------------------------------------
/zspell-cli/tests/files/sample-index.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "schema_version": 1,
 3 |   "updated": "2024-01-18T09:49:04Z",
 4 |   "items": [
 5 |     {
 6 |       "lang": "de-AT",
 7 |       "tags": [
 8 |         "source-wooorm"
 9 |       ],
10 |       "is_ext": false,
11 |       "id": "018d1bf7-22c1-7618-b42e-80592e77bc8a",
12 |       "fmt": "hunspell",
13 |       "aff": {
14 |         "urls": [
15 |           "{{ROOT_URL}}/dictionaries/de-AT/index.aff"
16 |         ],
17 |         "hash": "sha1:a464def0d8bb136f20012d431b60faae2cc915b5",
18 |         "size": 19199
19 |       },
20 |       "dic": {
21 |         "urls": [
22 |           "{{ROOT_URL}}/dictionaries/de-AT/index.dic"
23 |         ],
24 |         "hash": "sha1:eee2f5c4eddac4175d67c00bc808032b02058b5d",
25 |         "size": 1121822
26 |       },
27 |       "lic": {
28 |         "urls": [
29 |           "{{ROOT_URL}}/dictionaries/de-AT/license"
30 |         ],
31 |         "hash": "sha1:c4d083267263c478591c4856981f32f31690456d",
32 |         "size": 760
33 |       }
34 |     }
35 |   ]
36 | }
37 | 


--------------------------------------------------------------------------------
/zspell-py/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "zspell-py"
 3 | version = "0.5.5"
 4 | edition = "2021"
 5 | publish = false
 6 | 
 7 | readme = "README.md"
 8 | # Note: this is symlinked from top level
 9 | license-file = "LICENSE"
10 | 
11 | [lib]
12 | name = "zspell"
13 | crate-type = ["cdylib"]
14 | doc = false
15 | 
16 | [dependencies]
17 | regex = "1.10"
18 | pyo3 = { version = "0.21.2", features = ["extension-module"] }
19 | zspell = { path = "../zspell" }
20 | 
21 | [build-dependencies]
22 | pyo3-build-config = "0.21.2"
23 | 
24 | [package.metadata.release]
25 | shared-version = true
26 | 


--------------------------------------------------------------------------------
/zspell-py/LICENSE:
--------------------------------------------------------------------------------
1 | ../LICENSE


--------------------------------------------------------------------------------
/zspell-py/README.md:
--------------------------------------------------------------------------------
1 | ../README-py.md


--------------------------------------------------------------------------------
/zspell-py/build.rs:
--------------------------------------------------------------------------------
1 | // Special build script is needed to link to python C source on mac
2 | 
3 | fn main() {
4 |     pyo3_build_config::add_extension_module_link_args();
5 | }
6 | 


--------------------------------------------------------------------------------
/zspell-py/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/zspell-py/docs/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 6 | 
 7 | # -- Path setup --------------------------------------------------------------
 8 | 
 9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | # sys.path.insert(0, os.path.abspath('.'))
14 | 
15 | import re
16 | from pathlib import Path
17 | 
18 | import m2r
19 | 
20 | # -- Project information -----------------------------------------------------
21 | 
22 | project = "zspell"
23 | copyright = "2023, Trevor Gross"
24 | author = "Trevor Gross"
25 | 
26 | # The full version, including alpha/beta/rc tags
27 | path = Path(__file__).parent.parent.joinpath("Cargo.toml")
28 | with path.open() as fs:
29 |     fstr = fs.read()
30 | 
31 | # Single source of truth for the version
32 | release = re.search(r"^version\s*=\s*\"(.*)\"$", fstr, re.MULTILINE).groups()[0]
33 | 
34 | 
35 | # -- General configuration ---------------------------------------------------
36 | 
37 | # Add any Sphinx extension module names here, as strings. They can be
38 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
39 | # ones.
40 | extensions = [
41 |     "sphinx.ext.duration",
42 |     "sphinx.ext.doctest",
43 |     "sphinx.ext.autodoc",
44 |     "sphinx.ext.autosummary",
45 |     "sphinx.ext.intersphinx",
46 | ]
47 | 
48 | 
49 | # Add any paths that contain templates here, relative to this directory.
50 | templates_path = ["_templates"]
51 | 
52 | # List of patterns, relative to source directory, that match files and
53 | # directories to ignore when looking for source files.
54 | # This pattern also affects html_static_path and html_extra_path.
55 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
56 | 
57 | github_url = "https://github.com/pluots/zspell/"
58 | 
59 | # Autodoc options
60 | autodoc_member_order = "bysource"
61 | autoclass_content = "both"
62 | 
63 | 
64 | # -- Options for HTML output -------------------------------------------------
65 | 
66 | # The theme to use for HTML and HTML Help pages.  See the documentation for
67 | # a list of builtin themes.
68 | #
69 | html_theme = "furo"
70 | 
71 | # Add any paths that contain custom static files (such as style sheets) here,
72 | # relative to this directory. They are copied after the builtin static files,
73 | # so a file named "default.css" will overwrite the builtin "default.css".
74 | html_static_path = ["_static"]
75 | 
76 | 
77 | def convert_docstrings(app, what, name, obj, options, lines):
78 |     """Convert docstrings from markdown to RST"""
79 |     md = "\n".join(lines)
80 |     rst = m2r.convert(md)
81 |     lines.clear()
82 |     lines += rst.splitlines()
83 | 
84 | 
85 | def setup(app):
86 |     app.connect("autodoc-process-docstring", convert_docstrings)
87 | 


--------------------------------------------------------------------------------
/zspell-py/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. zspell documentation master file, created by
 2 |    sphinx-quickstart on Thu Jun 30 00:40:42 2022.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | ZSpell Documentation: Python Interface
 7 | =========================================
 8 | 
 9 | Welcome to the documentation for ZSpell's Python interface. This contains the
10 | basics, please visit the repository at https://github.com/pluots/zspell for
11 | more information.
12 | 
13 | .. toctree::
14 |    :maxdepth: 2
15 |    :caption: Contents:
16 | 
17 | .. module:: zspell
18 | 
19 | .. autoclass:: Dictionary
20 |    :members:
21 |    :undoc-members:
22 | .. :special-members:
23 | 
24 | .. autoclass:: BuildError
25 |    :members:
26 | 
27 | .. autoclass:: ParseError
28 |    :members:
29 | 
30 | .. autoclass:: RegexError
31 |    :members:
32 | 
33 | .. autoclass:: IoError
34 |    :members:
35 | 
36 | 
37 | .. Indices and tables
38 | .. ==================
39 | 
40 | .. * :ref:`genindex`
41 | .. * :ref:`modindex`
42 | .. * :ref:`search`
43 | 


--------------------------------------------------------------------------------
/zspell-py/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.https://www.sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/zspell-py/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | furo>=2022.12
2 | sphinx>=6.0
3 | pygments>=2.13
4 | m2r>=0.3
5 | maturin>=0.14
6 | 


--------------------------------------------------------------------------------
/zspell-py/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["maturin>=0.14,<0.15"]
 3 | build-backend = "maturin"
 4 | 
 5 | [project]
 6 | name = "zspell"
 7 | requires-python = ">=3.7"
 8 | description = "A simple yet fast spellchecker that works with Hunspell dictionaries"
 9 | classifiers = [
10 |     "Programming Language :: Rust",
11 |     "Programming Language :: Python :: Implementation :: CPython",
12 |     "Programming Language :: Python :: Implementation :: PyPy",
13 | ]
14 | 
15 | [tool.pytest.ini_options]
16 | minversion = "6.0"
17 | testpaths = ["tests"]
18 | 


--------------------------------------------------------------------------------
/zspell-py/python/zspell/__init__.py:
--------------------------------------------------------------------------------
1 | from .zspell import *
2 | 
3 | __doc__ = zspell.__doc__
4 | if hasattr(zspell, "__all__"):
5 |     __all__ = zspell.__all__
6 | 


--------------------------------------------------------------------------------
/zspell-py/python/zspell/py.types:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pluots/zspell/497fb8fa7c6a98d879e7541be942efa54242f595/zspell-py/python/zspell/py.types


--------------------------------------------------------------------------------
/zspell-py/python/zspell/zspell.pyi:
--------------------------------------------------------------------------------
 1 | class Dictionary:
 2 |     def __new__(
 3 |         config_str: str, dict_str: str, personal_str: str | None
 4 |     ) -> Dictionary: ...
 5 |     def check(self, input: str) -> bool: ...
 6 |     def check_word(self, word: str) -> bool: ...
 7 | 
 8 | class BuildError: ...
 9 | class IoError: ...
10 | class ParseError: ...
11 | class RegexError: ...
12 | 


--------------------------------------------------------------------------------
/zspell-py/src/lib.rs:
--------------------------------------------------------------------------------
  1 | //! Wrappers around the `zspell` module to expose it to Python
  2 | #![forbid(unsafe_code)]
  3 | 
  4 | use ::zspell as z;
  5 | use pyo3::create_exception;
  6 | use pyo3::exceptions::PyException;
  7 | use pyo3::prelude::*;
  8 | 
  9 | #[pyclass]
 10 | #[derive(Debug)]
 11 | /// This is the main dictionary interface.
 12 | ///
 13 | /// To use it, you need to load in both an affix configuration file and a
 14 | /// dictionary file. Sometimes these are installed on your system but if not,
 15 | /// this repository has them available:
 16 | /// <https://github.com/wooorm/dictionaries>.
 17 | ///
 18 | /// ```pycon
 19 | /// >>> from zspell import Dictionary
 20 | /// >>> with open ("dictionaries/en_US.aff", "r") as f:
 21 | /// ...     config_str = f.read()
 22 | /// ...
 23 | /// >>> with open ("dictionaries/en_US.dic", "r") as f:
 24 | /// ...     dict_str = f.read()
 25 | /// ...
 26 | /// >>> d = Dictionary(config_str, dict_str)
 27 | /// >>> d.check("Apples are good! Don't you think?")
 28 | /// True
 29 | /// >>> d.check("Apples are baaaad")
 30 | /// False
 31 | /// ```
 32 | struct Dictionary(z::Dictionary);
 33 | 
 34 | #[pymethods]
 35 | impl Dictionary {
 36 |     /// Create a new dictionary
 37 |     #[new]
 38 |     #[pyo3(text_signature = "(config_str, dict_str)")]
 39 |     fn new(config_str: &str, dict_str: &str, personal_str: Option<&str>) -> PyResult<Self> {
 40 |         let mut builder = z::DictBuilder::new()
 41 |             .dict_str(dict_str)
 42 |             .config_str(config_str);
 43 | 
 44 |         if let Some(personal) = personal_str {
 45 |             builder = builder.personal_str(personal);
 46 |         }
 47 | 
 48 |         match builder.build() {
 49 |             Ok(dict) => Ok(Self(dict)),
 50 |             Err(err) => Err(convert_error(err)),
 51 |         }
 52 |     }
 53 | 
 54 |     /// Check if a string is valid.
 55 |     #[pyo3(text_signature = "($self, input)")]
 56 |     fn check(&self, input: &str) -> bool {
 57 |         self.0.check(input)
 58 |     }
 59 | 
 60 |     /// Check if a single word is valid.
 61 |     #[pyo3(text_signature = "($self, word)")]
 62 |     fn check_word(&self, word: &str) -> bool {
 63 |         self.0.check_word(word)
 64 |     }
 65 |     // TODO: figure out how to convert to a python iterator
 66 |     // fn check_indices<'a: 'd, 'd>(&'d self, word: &'a str) -> impl Iterator<Item =  (usize, &'a str)> + 'd{
 67 |     //     self.0.check_indices(word)
 68 |     // }
 69 | }
 70 | 
 71 | fn convert_error(err: z::Error) -> PyErr {
 72 |     match err {
 73 |         z::Error::Parse(e) => ParseError::new_err(format!("{e}")),
 74 |         z::Error::Build(e) => BuildError::new_err(format!("{e}")),
 75 |         z::Error::Regex(e) => RegexError::new_err(format!("{e}")),
 76 |         z::Error::Io(e) => IoError::new_err(format!("{e}")),
 77 |         _ => unreachable!(),
 78 |     }
 79 | }
 80 | 
 81 | create_exception!(
 82 |     my_module,
 83 |     BuildError,
 84 |     PyException,
 85 |     "Raised when there is an error building the dictionary."
 86 | );
 87 | create_exception!(
 88 |     my_module,
 89 |     ParseError,
 90 |     PyException,
 91 |     "Raised when there is an error parsing dictionary input."
 92 | );
 93 | create_exception!(
 94 |     my_module,
 95 |     RegexError,
 96 |     PyException,
 97 |     "Raised when there is an error with parsed regex."
 98 | );
 99 | create_exception!(
100 |     my_module,
101 |     IoError,
102 |     PyException,
103 |     "Raised when there is an I/O error."
104 | );
105 | 
106 | #[pymodule]
107 | fn zspell(py: Python<'_>, m: &PyModule) -> PyResult<()> {
108 |     m.add_class::<Dictionary>()?;
109 |     m.add("BuildError", py.get_type::<BuildError>())?;
110 |     m.add("ParseError", py.get_type::<ParseError>())?;
111 |     m.add("IoError", py.get_type::<IoError>())?;
112 |     m.add("RegexError", py.get_type::<RegexError>())?;
113 |     Ok(())
114 | }
115 | 


--------------------------------------------------------------------------------
/zspell-py/tests/test_basic.py:
--------------------------------------------------------------------------------
 1 | from zspell import Dictionary
 2 | 
 3 | CFG_STR = """SET UTF-8
 4 | 
 5 | PFX A Y 1
 6 | PFX A   0     aa         .
 7 | 
 8 | SFX B Y 2
 9 | SFX B   y     bb         y
10 | SFX B   0     cc         [^y]
11 | """
12 | 
13 | DICT_STR = """3
14 | xxx/A
15 | yyy/B
16 | zzz/AB
17 | """
18 | 
19 | 
20 | def test_simple():
21 |     d = Dictionary(CFG_STR, DICT_STR)
22 |     assert d.check("xxx")
23 |     assert d.check("aaxxx")
24 |     assert d.check("aazzzcc")
25 | 


--------------------------------------------------------------------------------
/zspell/Cargo.toml:
--------------------------------------------------------------------------------
  1 | [package]
  2 | name = "zspell"
  3 | version = "0.5.5"
  4 | edition = "2021"
  5 | authors = ["Trevor Gross <tmgross@umich.edu>"]
  6 | description = "Native Rust library for spellchecking"
  7 | rust-version = "1.65"
  8 | 
  9 | readme = "README.md"
 10 | license-file = "LICENSE"
 11 | 
 12 | documentation = "https://docs.rs/zspell"
 13 | repository = "https://github.com/pluots/zspell"
 14 | 
 15 | keywords = ["spellcheck", "spelling", "cli"]
 16 | categories = ["algorithms", "text-processing", "command-line-utilities"]
 17 | 
 18 | [badges]
 19 | maintenance = { status = "actively-developed" }
 20 | 
 21 | # Config for the rust-usable library and the executable binary
 22 | [lib]
 23 | name = "zspell"
 24 | path = "src/lib.rs"
 25 | 
 26 | 
 27 | [dependencies]
 28 | # Base dependencies
 29 | cfg-if = "1.0"
 30 | dirs = "5.0.1"
 31 | hashbrown = "0.14.5"
 32 | itertools = "0.13.0"
 33 | lazy_static = "1.4"
 34 | regex = "1.10"
 35 | stringmetrics = "2.2.2"
 36 | sys-locale = "0.3.1"
 37 | unicode-segmentation = "1.11.0"
 38 | visibility = "0.1.0"
 39 | xxhash-rust = { version = "0.8.10", features = ["xxh32"] }
 40 | 
 41 | [dev-dependencies]
 42 | criterion = "0.5"
 43 | pretty_assertions = "1.4"
 44 | tempfile = "3.10"
 45 | rand = "0.8.5"
 46 | test-util = { path = "test-util" }
 47 | indoc = "2.0.5"
 48 | 
 49 | [build-dependencies]
 50 | autocfg = "1.3.0"
 51 | indoc = "2.0.5"
 52 | 
 53 | [features]
 54 | unstable-suggestions = []
 55 | unstable-system = []
 56 | unstable-bench = []
 57 | zspell-unstable = ["unstable-suggestions", "unstable-system"]
 58 | 
 59 | [[bench]]
 60 | name = "datastructure"
 61 | harness = false
 62 | 
 63 | [[bench]]
 64 | name = "dict_integration"
 65 | harness = false
 66 | 
 67 | [[bench]]
 68 | name = "slice_contains"
 69 | harness = false
 70 | 
 71 | [[bench]]
 72 | name = "small_map"
 73 | harness = false
 74 | 
 75 | [[bench]]
 76 | name = "word_splitter"
 77 | harness = false
 78 | 
 79 | [package.metadata.release]
 80 | shared-version = true
 81 | allow-branch = ["main", "release"]
 82 | 
 83 | # Can't run replacements at workspace root. Need to use this "hacky" sort of way.
 84 | [[package.metadata.release.pre-release-replacements]]
 85 | file = "../CHANGELOG.md"
 86 | search = "Unreleased"
 87 | replace = "{{version}}"
 88 | 
 89 | [[package.metadata.release.pre-release-replacements]]
 90 | file = "../CHANGELOG.md"
 91 | search = "\\.\\.\\.HEAD"
 92 | replace = "...{{tag_name}}"
 93 | exactly = 1
 94 | 
 95 | [[package.metadata.release.pre-release-replacements]]
 96 | file = "../CHANGELOG.md"
 97 | search = "ReleaseDate"
 98 | replace = "{{date}}"
 99 | 
100 | [[package.metadata.release.pre-release-replacements]]
101 | file = "../CHANGELOG.md"
102 | search = "<!-- next-header -->"
103 | replace = """\
104 |   <!-- next-header -->\n\n\
105 |   ## [Unreleased] - ReleaseDate\n\n\
106 |   ### Added\n\n\
107 |   ### Changed\n\n\
108 |   ### Removed\n\n\
109 |   """
110 | exactly = 1
111 | 
112 | [[package.metadata.release.pre-release-replacements]]
113 | file = "../CHANGELOG.md"
114 | search = "<!-- next-url -->"
115 | replace = """\
116 |   <!-- next-url -->\n\
117 |   [Unreleased]: https://github.com/pluots/zspell/compare/{{tag_name}}...HEAD\
118 |   """
119 | exactly = 1
120 | 


--------------------------------------------------------------------------------
/zspell/LICENSE:
--------------------------------------------------------------------------------
1 | ../LICENSE


--------------------------------------------------------------------------------
/zspell/README.md:
--------------------------------------------------------------------------------
1 | ../README.md


--------------------------------------------------------------------------------
/zspell/benches/datastructure.rs:
--------------------------------------------------------------------------------
  1 | //! Benchmarks for operations on datastructures that resemble operations we
  2 | //! might use in our spellchecker
  3 | 
  4 | #![allow(clippy::disallowed_types)]
  5 | #![allow(clippy::incompatible_msrv)]
  6 | 
  7 | use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
  8 | use std::fs::File;
  9 | use std::hint::black_box;
 10 | use std::io::{self, BufRead};
 11 | use std::iter::FromIterator;
 12 | 
 13 | use criterion::{criterion_group, criterion_main, Criterion};
 14 | use hashbrown::{HashMap as HashBrownMap, HashSet as HashBrownSet};
 15 | 
 16 | // We will check all variables in these contains and contains false lists - we
 17 | // want a variety of names from throughout the set
 18 | const CONTAINS_LIST: [&str; 15] = [
 19 |     "Accenture",
 20 |     "Curie",
 21 |     "Gujranwala",
 22 |     "Hesperus",
 23 |     "Juneau",
 24 |     "Lakeland",
 25 |     "Mephistopheles",
 26 |     "O'Connell",
 27 |     "Sweden",
 28 |     "Sarajevo",
 29 |     "sweptback",
 30 |     "tigerish",
 31 |     "Vespucci",
 32 |     "zymurgy",
 33 |     "0",
 34 | ];
 35 | 
 36 | const NOT_CONTAINS_LIST: [&str; 15] = [
 37 |     "aaaaaa",
 38 |     "Curied",
 39 |     "gujranwalda",
 40 |     "Hesperuds",
 41 |     "Junaeau",
 42 |     "Lakaeland",
 43 |     "Mepsifstopheles",
 44 |     "OFonnell",
 45 |     "Swayden",
 46 |     "Sarajayovo",
 47 |     "sweptabback",
 48 |     "tigerstripeish",
 49 |     "Vespucki",
 50 |     "zzzzzzz",
 51 |     "000000",
 52 | ];
 53 | 
 54 | static STR_REF: &str = "SOMETHING";
 55 | 
 56 | /// Load lines from a file
 57 | /// Strip the affix "/" directive
 58 | fn lines_loader() -> Vec<String> {
 59 |     let file = File::open("../dictionaries/en_US.dic").unwrap();
 60 |     let lines = io::BufReader::new(file).lines();
 61 | 
 62 |     let mut v: Vec<String> = Vec::new();
 63 | 
 64 |     for line in lines {
 65 |         v.push(line.unwrap().split('/').next().unwrap().to_owned());
 66 |     }
 67 | 
 68 |     // Validate items
 69 |     for item in CONTAINS_LIST {
 70 |         assert!(v.contains(&item.to_string()))
 71 |     }
 72 |     for item in NOT_CONTAINS_LIST {
 73 |         assert!(!v.contains(&item.to_string()))
 74 |     }
 75 | 
 76 |     v
 77 | }
 78 | 
 79 | type NestedVecMap<T1, T2> = Vec<(T1, Vec<T2>)>;
 80 | 
 81 | /// Take the results of `lines_loader` and create a map datatype
 82 | /// This replicates the data structure we store with some meta
 83 | fn map_loader() -> NestedVecMap<String, &'static str> {
 84 |     let lines = lines_loader();
 85 |     lines
 86 |         .iter()
 87 |         .map(|line| (line.clone(), vec![STR_REF]))
 88 |         .collect()
 89 | }
 90 | 
 91 | // Actual benchmark calling functions
 92 | 
 93 | pub fn bench_vec(c: &mut Criterion) {
 94 |     let vec: Vec<String> = lines_loader();
 95 | 
 96 |     c.bench_function("Vec contains true", |b| {
 97 |         b.iter(|| {
 98 |             for item in CONTAINS_LIST {
 99 |                 black_box(vec.iter().any(|x| x == black_box(item)));
100 |             }
101 |         })
102 |     });
103 | 
104 |     c.bench_function("Vec contains false", |b| {
105 |         b.iter(|| {
106 |             for item in NOT_CONTAINS_LIST {
107 |                 black_box(vec.iter().any(|x| x == black_box(item)));
108 |             }
109 |         })
110 |     });
111 | }
112 | 
113 | pub fn bench_btree(c: &mut Criterion) {
114 |     let bt = BTreeSet::from_iter(lines_loader());
115 | 
116 |     c.bench_function("BTree contains true", |b| {
117 |         b.iter(|| {
118 |             for item in CONTAINS_LIST {
119 |                 black_box(bt.contains(black_box(item)));
120 |             }
121 |         })
122 |     });
123 | 
124 |     c.bench_function("BTree contains false", |b| {
125 |         b.iter(|| {
126 |             for item in NOT_CONTAINS_LIST {
127 |                 black_box(bt.contains(black_box(item)));
128 |             }
129 |         })
130 |     });
131 | }
132 | 
133 | pub fn bench_hashset(c: &mut Criterion) {
134 |     let hs: HashSet<String> = HashSet::from_iter(lines_loader());
135 | 
136 |     c.bench_function("HashSet contains true", |b| {
137 |         b.iter(|| {
138 |             for item in CONTAINS_LIST {
139 |                 black_box(hs.contains(black_box(item)));
140 |             }
141 |         })
142 |     });
143 | 
144 |     c.bench_function("HashSet contains false", |b| {
145 |         b.iter(|| {
146 |             for item in NOT_CONTAINS_LIST {
147 |                 black_box(hs.contains(black_box(item)));
148 |             }
149 |         })
150 |     });
151 | }
152 | 
153 | pub fn bench_hashbrownset(c: &mut Criterion) {
154 |     let hs: HashBrownSet<String> = HashBrownSet::from_iter(lines_loader());
155 | 
156 |     c.bench_function("HashBrownSet contains true", |b| {
157 |         b.iter(|| {
158 |             for item in CONTAINS_LIST {
159 |                 black_box(hs.contains(black_box(item)));
160 |             }
161 |         })
162 |     });
163 | 
164 |     c.bench_function("HashBrownSet contains false", |b| {
165 |         b.iter(|| {
166 |             for item in NOT_CONTAINS_LIST {
167 |                 black_box(hs.contains(black_box(item)));
168 |             }
169 |         })
170 |     });
171 | }
172 | 
173 | // Map type benchmarks
174 | 
175 | pub fn bench_vecmap(c: &mut Criterion) {
176 |     let vm: NestedVecMap<_, _> = map_loader();
177 | 
178 |     c.bench_function("VecMap contains true", |b| {
179 |         b.iter(|| {
180 |             for item in CONTAINS_LIST {
181 |                 black_box(vm.iter().any(|x| x.0 == black_box(item)));
182 |             }
183 |         })
184 |     });
185 | 
186 |     c.bench_function("VecMap contains false", |b| {
187 |         b.iter(|| {
188 |             for item in NOT_CONTAINS_LIST {
189 |                 black_box(vm.iter().any(|x| x.0 == black_box(item)));
190 |             }
191 |         })
192 |     });
193 | 
194 |     c.bench_function("VecMap get true", |b| {
195 |         b.iter(|| {
196 |             for item in CONTAINS_LIST {
197 |                 black_box(vm.iter().find(|x| x.0 == black_box(item)).map(|x| &x.1));
198 |             }
199 |         })
200 |     });
201 | 
202 |     c.bench_function("VecMap get false", |b| {
203 |         b.iter(|| {
204 |             for item in NOT_CONTAINS_LIST {
205 |                 black_box(vm.iter().find(|x| x.0 == black_box(item)).map(|x| &x.1));
206 |             }
207 |         })
208 |     });
209 | }
210 | 
211 | pub fn bench_btreemap(c: &mut Criterion) {
212 |     let bt: BTreeMap<String, _> = BTreeMap::from_iter(map_loader());
213 | 
214 |     c.bench_function("BTreeMap contains true", |b| {
215 |         b.iter(|| {
216 |             for item in CONTAINS_LIST {
217 |                 black_box(bt.contains_key(black_box(item)));
218 |             }
219 |         })
220 |     });
221 | 
222 |     c.bench_function("BTreeMap contains false", |b| {
223 |         b.iter(|| {
224 |             for item in NOT_CONTAINS_LIST {
225 |                 black_box(bt.contains_key(black_box(item)));
226 |             }
227 |         })
228 |     });
229 | 
230 |     c.bench_function("BTreeMap get true", |b| {
231 |         b.iter(|| {
232 |             for item in CONTAINS_LIST {
233 |                 black_box(bt.get(black_box(item)));
234 |             }
235 |         })
236 |     });
237 | 
238 |     c.bench_function("BTreeMap get false", |b| {
239 |         b.iter(|| {
240 |             for item in NOT_CONTAINS_LIST {
241 |                 black_box(bt.get(black_box(item)));
242 |             }
243 |         })
244 |     });
245 | }
246 | 
247 | pub fn bench_hashmap(c: &mut Criterion) {
248 |     let hm: HashMap<String, _> = HashMap::from_iter(map_loader());
249 | 
250 |     c.bench_function("HashMap contains true", |b| {
251 |         b.iter(|| {
252 |             for item in CONTAINS_LIST {
253 |                 black_box(hm.contains_key(black_box(item)));
254 |             }
255 |         })
256 |     });
257 | 
258 |     c.bench_function("HashMap contains false", |b| {
259 |         b.iter(|| {
260 |             for item in NOT_CONTAINS_LIST {
261 |                 black_box(hm.contains_key(black_box(item)));
262 |             }
263 |         })
264 |     });
265 | 
266 |     c.bench_function("HashMap get true", |b| {
267 |         b.iter(|| {
268 |             for item in CONTAINS_LIST {
269 |                 black_box(hm.get(black_box(item)));
270 |             }
271 |         })
272 |     });
273 | 
274 |     c.bench_function("HashMap get false", |b| {
275 |         b.iter(|| {
276 |             for item in NOT_CONTAINS_LIST {
277 |                 black_box(hm.get(black_box(item)));
278 |             }
279 |         })
280 |     });
281 | }
282 | 
283 | pub fn bench_hashbrownmap(c: &mut Criterion) {
284 |     let hm: HashBrownMap<String, _> = HashBrownMap::from_iter(map_loader());
285 | 
286 |     c.bench_function("HashBrownMap contains true", |b| {
287 |         b.iter(|| {
288 |             for item in CONTAINS_LIST {
289 |                 black_box(hm.contains_key(black_box(item)));
290 |             }
291 |         })
292 |     });
293 | 
294 |     c.bench_function("HashBrownMap contains false", |b| {
295 |         b.iter(|| {
296 |             for item in NOT_CONTAINS_LIST {
297 |                 black_box(hm.contains_key(black_box(item)));
298 |             }
299 |         })
300 |     });
301 | 
302 |     c.bench_function("HashBrownMap get true", |b| {
303 |         b.iter(|| {
304 |             for item in CONTAINS_LIST {
305 |                 black_box(hm.get(black_box(item)));
306 |             }
307 |         })
308 |     });
309 | 
310 |     c.bench_function("HashBrownMap get false", |b| {
311 |         b.iter(|| {
312 |             for item in NOT_CONTAINS_LIST {
313 |                 black_box(hm.get(black_box(item)));
314 |             }
315 |         })
316 |     });
317 | }
318 | 
319 | criterion_group!(
320 |     datastructure,
321 |     bench_vec,
322 |     bench_btree,
323 |     bench_hashset,
324 |     bench_hashbrownset,
325 |     bench_vecmap,
326 |     bench_btreemap,
327 |     bench_hashmap,
328 |     bench_hashbrownmap
329 | );
330 | criterion_main!(datastructure);
331 | 


--------------------------------------------------------------------------------
/zspell/benches/dict_integration.rs:
--------------------------------------------------------------------------------
  1 | #![allow(clippy::incompatible_msrv)]
  2 | 
  3 | use std::fs;
  4 | use std::hint::black_box;
  5 | 
  6 | use criterion::{criterion_group, criterion_main, Criterion};
  7 | use zspell::bench::{affix_from_str, DictEntry, FlagType};
  8 | use zspell::{DictBuilder, Dictionary};
  9 | 
 10 | const TEXT: &str = "A Hare was mking fun of the Tortoise one day for being so slow.
 11 | 
 12 | Do you ever get anywhere? he asked with a mocking laugh.
 13 | 
 14 | Yes, replied the Tortoise, and I get there sooner than you think. I'll
 15 | run you a race and prove it.
 16 | 
 17 | The Hare was much amused at the iea of running a race with the Tortise,
 18 | but for the fun of the thing he agreed. So the Fox, who had consented to
 19 | act as judge, maarked the distance and started the runners off.
 20 | 
 21 | The Hare was soon far out of sight, and to make the Tortoise feel very
 22 | deeply how ridiculous it was for him to try a race with a Hare, he lay
 23 | down beside the course to take a nap until the Tortoise should catch up.
 24 | 
 25 | The Tortoise meanwhile kept going sloly but steadily, and, after a time,
 26 | passed the place where the Hare was sleeping. But the Hare slept on very
 27 | peacefully; and when at last he did wake up, the Tortoise was near the goal.
 28 | The Hare now ran his swiftest, but he could not overtaake the Tortoise
 29 | in time.";
 30 | 
 31 | const CONTAINS_LIST: [&str; 15] = [
 32 |     "Accenture",
 33 |     "Curie",
 34 |     "Gujranwala",
 35 |     "Hesperus",
 36 |     "Juneau",
 37 |     "Lakeland",
 38 |     "Mephistopheles",
 39 |     "O'Connell",
 40 |     "Sweden",
 41 |     "Sarajevo",
 42 |     "sweptback",
 43 |     "tigerish",
 44 |     "Vespucci",
 45 |     "zymurgy",
 46 |     "0",
 47 | ];
 48 | 
 49 | const NOT_CONTAINS_LIST: [&str; 15] = [
 50 |     "aaaaaa",
 51 |     "Curied",
 52 |     "gujranwalda",
 53 |     "Hesperuds",
 54 |     "Junaeau",
 55 |     "Lakaeland",
 56 |     "Mepsifstopheles",
 57 |     "OFonnell",
 58 |     "Swayden",
 59 |     "Sarajayovo",
 60 |     "sweptabback",
 61 |     "tigerstripeish",
 62 |     "Vespucki",
 63 |     "zzzzzzz",
 64 |     "000000",
 65 | ];
 66 | 
 67 | fn fixture_create_en_dict() -> Dictionary {
 68 |     // Test that we correctly compile the short wordlist
 69 | 
 70 |     let aff_content = fs::read_to_string("../dictionaries/en_US.aff").unwrap();
 71 |     let dic_content = fs::read_to_string("../dictionaries/en_US.dic").unwrap();
 72 | 
 73 |     DictBuilder::new()
 74 |         .dict_str(black_box(&dic_content))
 75 |         .config_str(black_box(&aff_content))
 76 |         .build()
 77 |         .unwrap()
 78 | }
 79 | 
 80 | pub fn bench_parsers(c: &mut Criterion) {
 81 |     let aff_content = fs::read_to_string("../dictionaries/en_US.aff").unwrap();
 82 |     let dic_content = fs::read_to_string("../dictionaries/en_US.dic").unwrap();
 83 | 
 84 |     c.bench_function("Parse affix file", |b| {
 85 |         b.iter(|| black_box(affix_from_str(black_box(&aff_content)).unwrap()))
 86 |     });
 87 | 
 88 |     c.bench_function("Parse dict file", |b| {
 89 |         b.iter(|| {
 90 |             black_box(
 91 |                 DictEntry::parse_all(black_box(&dic_content), black_box(FlagType::Utf8)).unwrap(),
 92 |             )
 93 |         })
 94 |     });
 95 | }
 96 | 
 97 | /// This test just creates a dictionary. The compiling is the slow step.
 98 | pub fn bench_dict_compile(c: &mut Criterion) {
 99 |     let aff_content = fs::read_to_string("../dictionaries/en_US.aff").unwrap();
100 |     let dic_content = fs::read_to_string("../dictionaries/en_US.dic").unwrap();
101 | 
102 |     c.bench_function("Spellcheck: compile dictionary", |b| {
103 |         b.iter(|| {
104 |             black_box(
105 |                 DictBuilder::new()
106 |                     .dict_str(black_box(&dic_content))
107 |                     .config_str(black_box(&aff_content))
108 |                     .build()
109 |                     .unwrap(),
110 |             )
111 |         })
112 |     });
113 | }
114 | 
115 | pub fn bench_dict_simple(c: &mut Criterion) {
116 |     let dict = fixture_create_en_dict();
117 |     c.bench_function("Spellcheck: 1 correct word", |b| {
118 |         b.iter(|| black_box(dict.check_word(black_box("turbidity's"))))
119 |     });
120 | 
121 |     c.bench_function("Spellcheck: 1 incorrect word", |b| {
122 |         b.iter(|| black_box(dict.check_word(black_box("turbiditated"))))
123 |     });
124 | 
125 |     c.bench_function("Spellcheck: 15 correct words", |b| {
126 |         b.iter(|| {
127 |             for item in CONTAINS_LIST {
128 |                 black_box(dict.check(black_box(item)));
129 |             }
130 |         })
131 |     });
132 | 
133 |     c.bench_function("Spellcheck: 15 incorrect words", |b| {
134 |         b.iter(|| {
135 |             for item in NOT_CONTAINS_LIST {
136 |                 black_box(dict.check(black_box(item)));
137 |             }
138 |         })
139 |     });
140 | }
141 | 
142 | pub fn bench_dict_paragraph(c: &mut Criterion) {
143 |     let dict = fixture_create_en_dict();
144 | 
145 |     c.bench_function("Spellcheck: 188 word paragraph", |b| {
146 |         b.iter(|| black_box(dict.check(black_box(TEXT))))
147 |     });
148 | }
149 | 
150 | criterion_group!(
151 |     dict_integration,
152 |     bench_parsers,
153 |     bench_dict_compile,
154 |     bench_dict_simple,
155 |     bench_dict_paragraph,
156 |     // bench_parallel,
157 | );
158 | criterion_main!(dict_integration);
159 | 


--------------------------------------------------------------------------------
/zspell/benches/slice_contains.rs:
--------------------------------------------------------------------------------
 1 | //! Benchmark the difference between contains & `binary_search`es, intended
 2 | 
 3 | #![allow(clippy::incompatible_msrv)]
 4 | 
 5 | use std::hint::black_box;
 6 | 
 7 | use criterion::{criterion_group, criterion_main, Criterion};
 8 | 
 9 | pub fn benches(c: &mut Criterion) {
10 |     const EMPTY: [&str; 0] = [];
11 |     const SORT1: [&str; 1] = ["A"];
12 |     const SORT3: [&str; 3] = ["A", "B", "C"];
13 |     const SORT10: [&str; 10] = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J"];
14 |     const UNSORT3: [&str; 3] = ["C", "A", "B"];
15 |     const UNSORT10: [&str; 10] = ["B", "F", "C", "A", "I", "J", "E", "D", "H", "G"];
16 | 
17 |     c.bench_function("Empty: `contains`", |b| {
18 |         b.iter(|| black_box(&EMPTY).contains(black_box(&"A")))
19 |     });
20 | 
21 |     c.bench_function("Empty: `binary_search`", |b| {
22 |         b.iter(|| black_box(&EMPTY).binary_search(black_box(&"A")).is_ok())
23 |     });
24 | 
25 |     c.bench_function("1x: `contains`", |b| {
26 |         b.iter(|| black_box(&SORT1).contains(black_box(&"A")))
27 |     });
28 | 
29 |     c.bench_function("1x: `binary_search`", |b| {
30 |         b.iter(|| black_box(&SORT1).binary_search(black_box(&"A")).is_ok())
31 |     });
32 | 
33 |     c.bench_function("3 sorted: `contains`", |b| {
34 |         b.iter(|| black_box(SORT3).contains(black_box(&"B")))
35 |     });
36 | 
37 |     c.bench_function("3 sorted: `binary_search`", |b| {
38 |         b.iter(|| black_box(SORT3).binary_search(black_box(&"B")).is_ok())
39 |     });
40 | 
41 |     c.bench_function("10 sorted: `contains` early", |b| {
42 |         b.iter(|| black_box(SORT10).contains(black_box(&"B")))
43 |     });
44 | 
45 |     c.bench_function("10 sorted: `binary_search` early", |b| {
46 |         b.iter(|| black_box(SORT10).binary_search(black_box(&"B")).is_ok())
47 |     });
48 | 
49 |     c.bench_function("10 sorted: `contains` mid", |b| {
50 |         b.iter(|| black_box(SORT10).contains(black_box(&"G")))
51 |     });
52 | 
53 |     c.bench_function("10 sorted: `binary_search` mid", |b| {
54 |         b.iter(|| black_box(SORT10).binary_search(black_box(&"G")).is_ok())
55 |     });
56 | 
57 |     c.bench_function("10 sorted: `contains` late", |b| {
58 |         b.iter(|| black_box(SORT10).contains(black_box(&"J")))
59 |     });
60 | 
61 |     c.bench_function("10 sorted: `binary_search` late", |b| {
62 |         b.iter(|| black_box(SORT10).binary_search(black_box(&"J")).is_ok())
63 |     });
64 | 
65 |     c.bench_function("3 unsorted: `contains`", |b| {
66 |         b.iter(|| black_box(SORT3).contains(black_box(&"B")))
67 |     });
68 | 
69 |     c.bench_function("3 unsorted: `binary_search`", |b| {
70 |         b.iter(|| {
71 |             let mut arr = black_box(UNSORT3);
72 |             arr.sort_unstable();
73 |             black_box(arr).binary_search(black_box(&"B")).is_ok()
74 |         })
75 |     });
76 | 
77 |     c.bench_function("10 unsorted: `contains`", |b| {
78 |         b.iter(|| black_box(SORT10).contains(black_box(&"G")))
79 |     });
80 | 
81 |     c.bench_function("10 unsorted: `binary_search`", |b| {
82 |         b.iter(|| {
83 |             let mut arr = black_box(UNSORT10);
84 |             arr.sort_unstable();
85 |             black_box(arr).binary_search(black_box(&"G")).is_ok()
86 |         })
87 |     });
88 | }
89 | 
90 | criterion_group!(slice_contains, benches,);
91 | criterion_main!(slice_contains);
92 | 


--------------------------------------------------------------------------------
/zspell/benches/word_splitter.rs:
--------------------------------------------------------------------------------
 1 | #![allow(clippy::incompatible_msrv)]
 2 | 
 3 | use std::hint::black_box;
 4 | 
 5 | use criterion::{criterion_group, criterion_main, Criterion};
 6 | use unicode_segmentation::UnicodeSegmentation;
 7 | 
 8 | const TESTSTR: &str = "the quick brown.   Fox Jum-ped -- where? 'over' (the) very-lazy dog";
 9 | 
10 | // These aren't fair comparisons because they don't return indices
11 | pub fn basic_splits(c: &mut Criterion) {
12 |     c.bench_function("Split whitespace", |b| {
13 |         b.iter(|| black_box(black_box(TESTSTR).split_whitespace().last().unwrap()))
14 |     });
15 |     c.bench_function("Split ascii whitespace", |b| {
16 |         b.iter(|| black_box(black_box(TESTSTR).split_ascii_whitespace().last().unwrap()))
17 |     });
18 | }
19 | 
20 | pub fn segmentation(c: &mut Criterion) {
21 |     c.bench_function("Simple segmentation", |b| {
22 |         b.iter(|| {
23 |             black_box(
24 |                 black_box(TESTSTR)
25 |                     .split_word_bound_indices()
26 |                     .last()
27 |                     .unwrap(),
28 |             )
29 |         })
30 |     });
31 |     c.bench_function("Skip whitespace using all", |b| {
32 |         b.iter(|| {
33 |             black_box(
34 |                 black_box(TESTSTR)
35 |                     .split_word_bound_indices()
36 |                     .filter(|split| split.1.chars().all(|c| c.is_alphanumeric() || c == '-'))
37 |                     .last()
38 |                     .unwrap(),
39 |             )
40 |         })
41 |     });
42 |     c.bench_function("Skip whitespace using first", |b| {
43 |         b.iter(|| {
44 |             black_box(
45 |                 black_box(TESTSTR)
46 |                     .split_word_bound_indices()
47 |                     .filter(|split| {
48 |                         let first = split.1.chars().next().unwrap();
49 |                         first.is_alphanumeric() || first == '-'
50 |                     })
51 |                     .last()
52 |                     .unwrap(),
53 |             )
54 |         })
55 |     });
56 |     c.bench_function("Skip whitespace using first nohyphen", |b| {
57 |         b.iter(|| {
58 |             black_box(
59 |                 black_box(TESTSTR)
60 |                     .split_word_bound_indices()
61 |                     .filter(|split| split.1.chars().next().unwrap().is_alphanumeric())
62 |                     .last()
63 |                     .unwrap(),
64 |             )
65 |         })
66 |     });
67 | }
68 | 
69 | // pub fn segmentation_peek(c: &mut Criterion) {
70 | //     c.bench_function("Skip whitespace using first", |b| {
71 | //         b.iter(|| {
72 | //             black_box(
73 | //                 black_box(TESTSTR)
74 | //                     .split_word_bound_indices()
75 | //                     .filter(|split| {
76 | //                         let first = split.1.chars().next().unwrap();
77 | //                         first.is_alphanumeric() || first == '-'
78 | //                     })
79 | //                     .last()
80 | //                     .unwrap(),
81 | //             )
82 | //         })
83 | //     });
84 | // }
85 | 
86 | criterion_group!(word_splitter, basic_splits, segmentation);
87 | criterion_main!(word_splitter);
88 | 


--------------------------------------------------------------------------------
/zspell/build.rs:
--------------------------------------------------------------------------------
 1 | use std::fmt::Write;
 2 | use std::path::Path;
 3 | use std::{env, fs};
 4 | 
 5 | use indoc::indoc;
 6 | 
 7 | fn main() {
 8 |     update_tests();
 9 |     emit_autocfg();
10 | }
11 | 
12 | const TEST_PREFIX: &str = "// autogenerated file, do not edit manually
13 | // one test is generated for each `.test` file
14 | 
15 | ";
16 | 
17 | /// Autogenerate an integration test for every `.test` file
18 | fn update_tests() {
19 |     let root = Path::new(env!("CARGO_MANIFEST_DIR"));
20 |     let out_path = Path::new(&env::var("OUT_DIR").unwrap()).join("auto_suite.rs");
21 |     let suite_dir = root.join("test-suite");
22 |     let test_paths = fs::read_dir(suite_dir).unwrap();
23 | 
24 |     let mut to_write = TEST_PREFIX.to_owned();
25 |     let mut all_test_names = Vec::new();
26 | 
27 |     for path in test_paths {
28 |         let path = path.unwrap().path();
29 |         // let path_str = path.display();
30 |         let fname = path.file_name().unwrap().to_string_lossy();
31 |         let test_name = fname
32 |             .strip_suffix(".test")
33 |             .unwrap()
34 |             .trim_start_matches(char::is_numeric)
35 |             .trim_start_matches(['_', '-'])
36 |             .replace('-', "_");
37 | 
38 |         if all_test_names.contains(&test_name) {
39 |             panic!("duplicate generated test name {test_name}");
40 |         }
41 | 
42 |         if test_name == "example" {
43 |             continue;
44 |         }
45 | 
46 |         write!(
47 |             to_write,
48 |             indoc! {"
49 | 
50 |                 #[test]
51 |                 fn test_{test_name}() {{
52 |                     let path = std::path::Path::new(env!(\"CARGO_MANIFEST_DIR\"));
53 |                     let path = path.join(\"test-suite/{fname}\");
54 |                     let mgr = test_util::TestManager::new_from_file(path);
55 |                     let dict = mgr.build_dict();
56 |                     mgr.check_all(&dict);
57 |                 }}
58 |             "},
59 |             test_name = test_name,
60 |             fname = fname,
61 |         )
62 |         .unwrap();
63 | 
64 |         all_test_names.push(test_name);
65 |     }
66 | 
67 |     fs::write(out_path, to_write).unwrap();
68 | }
69 | 
70 | /// Add configuration that depends on rust version
71 | fn emit_autocfg() {
72 |     const PROBE_BOX: &str = r#" || {
73 |         let s = "foo".to_owned();
74 |         let _b: Box<[String]> = [s].as_slice().into();
75 |     }
76 |     "#;
77 | 
78 |     let ac = autocfg::new();
79 | 
80 |     // check if we have `Box<[T]>: From<&[T: Clone]>` loosened from `T: Copy` (1.71)
81 |     ac.emit_expression_cfg(PROBE_BOX, "box_from_slice_has_clone_bound");
82 |     println!("cargo:rustc-check-cfg=cfg(box_from_slice_has_clone_bound)");
83 | }
84 | 


--------------------------------------------------------------------------------
/zspell/src/affix/node.rs:
--------------------------------------------------------------------------------
  1 | //! Parser representations of an affix file
  2 | 
  3 | use super::ParsedRuleGroup;
  4 | use crate::affix::{CompoundPattern, CompoundSyllable, Conversion, Encoding, FlagType, Phonetic};
  5 | 
  6 | /// A single line entry in an affix file
  7 | ///
  8 | /// We represent all flags as strings, and parse them later
  9 | #[non_exhaustive]
 10 | #[derive(Debug, PartialEq, Eq)]
 11 | pub enum AffixNode {
 12 |     /*
 13 |         General ptions
 14 |     */
 15 |     /// `SET`
 16 |     Encoding(Encoding),
 17 |     /// `FLAG`
 18 |     FlagType(FlagType),
 19 |     /// `COMPLEXPREFIXES` twofold prefix stripping
 20 |     ComplexPrefixes,
 21 |     /// `LANG`
 22 |     Language(String),
 23 |     /// `IGNORE`
 24 |     IgnoreChars(Vec<char>),
 25 |     /// `AF`
 26 |     AffixAlias(Vec<String>),
 27 |     /// `AM`
 28 |     MorphAlias(Vec<String>),
 29 | 
 30 |     /*
 31 |         Suggestion Options
 32 |     */
 33 |     /// `KEY`
 34 |     NeighborKeys(Vec<String>),
 35 |     /// `TRY`
 36 |     TryCharacters(String),
 37 |     /// `NOSUGGEST`
 38 |     NoSuggestFlag(String),
 39 |     /// `MAXCPDSUGS`
 40 |     CompoundSugMax(u16),
 41 |     /// `MAXNGRAMSUGS`
 42 |     NGramSugMax(u16),
 43 |     /// `MAXDIFF`
 44 |     NGramDiffMax(u8),
 45 |     /// `ONLYMAXDIFF`
 46 |     NGramLimitToDiffMax,
 47 |     /// `NOSPLITSUGS`
 48 |     NoSplitSuggestions,
 49 |     /// `SUGSWITHDOTS`
 50 |     KeepTermDots,
 51 |     /// `REP`
 52 |     Replacement(Vec<Conversion>),
 53 |     /// `MAP`
 54 |     Mapping(Vec<(char, char)>),
 55 |     /// `PHONE`
 56 |     Phonetic(Vec<Phonetic>),
 57 |     /// `WARN`
 58 |     WarnRareFlag(String),
 59 |     /*
 60 |         Compounding Options
 61 |     */
 62 |     /// `FORBIDWARN`
 63 |     ForbidWarnWords,
 64 |     /// `BREAK`
 65 |     BreakSeparator(Vec<String>),
 66 |     /// `COMPOUNDRULE`
 67 |     #[allow(dead_code)]
 68 |     CompoundRule(Vec<String>),
 69 |     /// `COMPOUNDMIN`
 70 |     CompoundMinLen(u16),
 71 |     /// `COMPOUNDFLAG`
 72 |     CompoundFlag(String),
 73 |     /// `COMPOUNDBEGIN`
 74 |     CompoundBeginFlag(String),
 75 |     /// `COMPOUNDLAST`
 76 |     CompoundEndFlag(String),
 77 |     /// `COMPOUNDMIDDLE`
 78 |     CompoundMiddleFlag(String),
 79 |     /// `ONLYINCOMPOUND`
 80 |     CompoundOnlyFlag(String),
 81 |     /// `COMPOUNDPERMITFLAG`
 82 |     CompoundPermitFlag(String),
 83 |     /// `COMPOUNDFORBIDFLAG`
 84 |     CompoundForbidFlag(String),
 85 |     /// `COMPOUNDMORESUFFIXES`
 86 |     CompoundMoreSuffixes,
 87 |     /// `COMPOUNDROOT`
 88 |     CompoundRootFlag(String),
 89 |     /// `COMPOUNDWORDMAX`
 90 |     CompoundWordMax(u16),
 91 |     /// `CHECKCOMPOUNDDUP`
 92 |     CompoundForbidDup,
 93 |     /// `CHECKCOMPOUNDREP`
 94 |     CompoundForbidRepeat,
 95 |     /// `CHECKCOMPOUNDCASE`
 96 |     CompoundCheckCase,
 97 |     /// `CHECKCOMPOUNDTRIPLE`
 98 |     CompoundCheckTriple,
 99 |     /// `SIMPLIFIEDTRIPLE`
100 |     CompoundSimplifyTriple,
101 |     /// `CHECKCOMPOUNDPATTERN`
102 |     CompoundForbidPats(Vec<CompoundPattern>),
103 |     /// `FORCEUCASE`
104 |     CompoundForceUpFlag(String),
105 |     /// `COMPOUNDSYLLABLE`
106 |     CompoundSyllable(CompoundSyllable),
107 |     /// `SYLLABLENUM`
108 |     SyllableNum(String),
109 | 
110 |     /*
111 |         Affix Options
112 |     */
113 |     /// `PFX`
114 |     Prefix(ParsedRuleGroup),
115 |     /// `SFX`
116 |     Suffix(ParsedRuleGroup),
117 | 
118 |     /*
119 |         Other options
120 |     */
121 |     /// `CIRCUMFIX`
122 |     AfxCircumfixFlag(String),
123 |     /// `FORBIDDENWORD`
124 |     ForbiddenWordFlag(String),
125 |     /// `FULLSTRIP`
126 |     AfxFullStrip,
127 |     /// `KEEPCASE`
128 |     AfxKeepCaseFlag(String),
129 |     /// `ICONV`
130 |     AfxInputConversion(Vec<Conversion>),
131 |     /// `OCONV`
132 |     AfxOutputConversion(Vec<Conversion>),
133 |     /// `LEMMA_PRESENT` this flag is deprecated
134 |     AfxLemmaPresentFlag(String),
135 |     /// `NEEDAFFIX`
136 |     AfxNeededFlag(String),
137 |     /// `PSEUDOROOT` this flag is deprecated
138 |     AfxPseudoRootFlag(String),
139 |     /// `SUBSTANDARD`
140 |     AfxSubstandardFlag(String),
141 |     /// `WORDCHARS`
142 |     AfxWordChars(String),
143 |     /// `CHECKSHARPS`
144 |     AfxCheckSharps,
145 |     /// `#` line
146 |     Comment,
147 |     /// `NAME`
148 |     Name(String),
149 |     /// `HOME`
150 |     HomePage(String),
151 |     /// `VERSION`
152 |     Version(String),
153 | }
154 | 
155 | impl AffixNode {
156 |     pub const fn name_str(&self) -> &'static str {
157 |         match self {
158 |             AffixNode::Encoding(_) => "SET",
159 |             AffixNode::FlagType(_) => "FLAG",
160 |             AffixNode::ComplexPrefixes => "COMPLEXPREFIXES",
161 |             AffixNode::Language(_) => "LANG",
162 |             AffixNode::IgnoreChars(_) => "IGNORE",
163 |             AffixNode::AffixAlias(_) => "AF",
164 |             AffixNode::MorphAlias(_) => "AM",
165 |             AffixNode::NeighborKeys(_) => "KEY",
166 |             AffixNode::TryCharacters(_) => "TRY",
167 |             AffixNode::NoSuggestFlag(_) => "NOSUGGEST",
168 |             AffixNode::CompoundSugMax(_) => "MAXCPDSUGS",
169 |             AffixNode::NGramSugMax(_) => "MAXNGRAMSUGS",
170 |             AffixNode::NGramDiffMax(_) => "MAXDIFF",
171 |             AffixNode::NGramLimitToDiffMax => "ONLYMAXDIFF",
172 |             AffixNode::NoSplitSuggestions => "NOSPLITSUGS",
173 |             AffixNode::KeepTermDots => "SUGSWITHDOTS",
174 |             AffixNode::Replacement(_) => "REP",
175 |             AffixNode::Mapping(_) => "MAP",
176 |             AffixNode::Phonetic(_) => "PHONE",
177 |             AffixNode::WarnRareFlag(_) => "WARN",
178 |             AffixNode::ForbidWarnWords => "FORBIDWARN",
179 |             AffixNode::BreakSeparator(_) => "BREAK",
180 |             AffixNode::CompoundRule(_) => "COMPOUNDRULE",
181 |             AffixNode::CompoundMinLen(_) => "COMPOUNDMIN",
182 |             AffixNode::CompoundFlag(_) => "COMPOUNDFLAG",
183 |             AffixNode::CompoundBeginFlag(_) => "COMPOUNDBEGIN",
184 |             AffixNode::CompoundEndFlag(_) => "COMPOUNDLAST",
185 |             AffixNode::CompoundMiddleFlag(_) => "COMPOUNDMIDDLE",
186 |             AffixNode::CompoundOnlyFlag(_) => "ONLYINCOMPOUND",
187 |             AffixNode::CompoundPermitFlag(_) => "COMPOUNDPERMITFLAG",
188 |             AffixNode::CompoundForbidFlag(_) => "COMPOUNDFORBIDFLAG",
189 |             AffixNode::CompoundMoreSuffixes => "COMPOUNDMORESUFFIXES",
190 |             AffixNode::CompoundRootFlag(_) => "COMPOUNDROOT",
191 |             AffixNode::CompoundWordMax(_) => "COMPOUNDWORDMAX",
192 |             AffixNode::CompoundForbidDup => "CHECKCOMPOUNDDUP",
193 |             AffixNode::CompoundForbidRepeat => "CHECKCOMPOUNDREP",
194 |             AffixNode::CompoundCheckCase => "CHECKCOMPOUNDCASE",
195 |             AffixNode::CompoundCheckTriple => "CHECKCOMPOUNDTRIPLE",
196 |             AffixNode::CompoundSimplifyTriple => "SIMPLIFIEDTRIPLE",
197 |             AffixNode::CompoundForbidPats(_) => "CHECKCOMPOUNDPATTERN",
198 |             AffixNode::CompoundForceUpFlag(_) => "FORCEUCASE",
199 |             AffixNode::CompoundSyllable(_) => "COMPOUNDSYLLABLE",
200 |             AffixNode::SyllableNum(_) => "SYLLABLENUM",
201 |             AffixNode::Prefix(_) => "PFX",
202 |             AffixNode::Suffix(_) => "SFX",
203 |             AffixNode::AfxCircumfixFlag(_) => "CIRCUMFIX",
204 |             AffixNode::ForbiddenWordFlag(_) => "FORBIDDENWORD",
205 |             AffixNode::AfxFullStrip => "FULLSTRIP",
206 |             AffixNode::AfxKeepCaseFlag(_) => "KEEPCASE",
207 |             AffixNode::AfxInputConversion(_) => "ICONV",
208 |             AffixNode::AfxOutputConversion(_) => "OCONV",
209 |             AffixNode::AfxLemmaPresentFlag(_) => "LEMMA_PRESENT",
210 |             AffixNode::AfxNeededFlag(_) => "NEEDAFFIX",
211 |             AffixNode::AfxPseudoRootFlag(_) => "PSEUDOROOT",
212 |             AffixNode::AfxSubstandardFlag(_) => "SUBSTANDARD",
213 |             AffixNode::AfxWordChars(_) => "WORDCHARS",
214 |             AffixNode::AfxCheckSharps => "CHECKSHARPS",
215 |             AffixNode::Comment => "#",
216 |             AffixNode::Name(_) => "NAME",
217 |             AffixNode::HomePage(_) => "HOME",
218 |             AffixNode::Version(_) => "VERSION",
219 |         }
220 |     }
221 | }
222 | 


--------------------------------------------------------------------------------
/zspell/src/affix/rule.rs:
--------------------------------------------------------------------------------
  1 | use std::sync::Arc;
  2 | 
  3 | use super::RuleType;
  4 | use crate::error::ParseErrorKind;
  5 | use crate::helpers::{compile_re_pattern, ReWrapper};
  6 | use crate::morph::MorphInfo;
  7 | use crate::Error;
  8 | 
  9 | /// A simple prefix or suffix rule
 10 | ///
 11 | /// This struct represents a prefix or suffix option that may be applied to any
 12 | /// base word. It contains multiple possible rule definitions that describe how
 13 | /// to apply the rule.
 14 | #[derive(Clone, Debug, PartialEq, Eq)]
 15 | pub struct ParsedRuleGroup {
 16 |     /// Character identifier for this specific affix, usually any uppercase
 17 |     /// letter
 18 |     pub(crate) flag: String,
 19 |     /// Prefix or suffix
 20 |     pub(crate) kind: RuleType,
 21 |     /// Whether or not this can be combined with the opposite affix
 22 |     pub(crate) can_combine: bool,
 23 |     /// Actual rules for replacing
 24 |     pub(crate) rules: Vec<ParsedRule>,
 25 | }
 26 | 
 27 | #[derive(Clone, Debug, PartialEq, Eq)]
 28 | pub struct ParsedRule {
 29 |     /// Affix to be added
 30 |     pub(crate) affix: String,
 31 |     /// Characters to remove from the beginning or end
 32 |     pub(crate) strip: Option<Arc<str>>,
 33 |     /// Regex-based rule for when this rule is true. `None` indicates `.`, i.e.,
 34 |     /// always true
 35 |     pub(crate) condition: Option<ReWrapper>,
 36 |     /// Morphological information
 37 |     pub(crate) morph_info: Vec<Arc<MorphInfo>>,
 38 | }
 39 | 
 40 | impl ParsedRule {
 41 |     #[allow(unused)]
 42 |     pub(crate) fn new(
 43 |         kind: RuleType,
 44 |         affix: &str,
 45 |         strip: Option<&str>,
 46 |         condition: Option<&str>,
 47 |         morph_info: Vec<Arc<MorphInfo>>,
 48 |     ) -> Result<Self, Error> {
 49 |         let cond_re = match condition {
 50 |             Some(c) => compile_re_pattern(c, kind)?,
 51 |             None => None,
 52 |         };
 53 | 
 54 |         Ok(Self {
 55 |             strip: strip.map(Into::into),
 56 |             affix: affix.to_owned(),
 57 |             condition: cond_re,
 58 |             morph_info,
 59 |         })
 60 |     }
 61 | 
 62 |     /// Same as `new` but don't modify the regex string
 63 |     #[allow(unused)]
 64 |     pub(crate) fn new_raw_re(
 65 |         kind: RuleType,
 66 |         affix: &str,
 67 |         strip: Option<&str>,
 68 |         condition: Option<&str>,
 69 |         morph_info: Vec<Arc<MorphInfo>>,
 70 |     ) -> Result<Self, Error> {
 71 |         let cond_re = match condition {
 72 |             Some(c) => Some(ReWrapper::new(c)?),
 73 |             None => None,
 74 |         };
 75 | 
 76 |         Ok(Self {
 77 |             strip: strip.map(Into::into),
 78 |             affix: affix.to_owned(),
 79 |             condition: cond_re,
 80 |             morph_info,
 81 |         })
 82 |     }
 83 | 
 84 |     /// Create from the information we have available during parse
 85 |     pub(crate) fn new_parse(
 86 |         kind: RuleType,
 87 |         affix: &str,
 88 |         strip: &str,
 89 |         condition: &str,
 90 |         morph_info: Vec<Arc<MorphInfo>>,
 91 |     ) -> Result<Self, ParseErrorKind> {
 92 |         let cond_re = compile_re_pattern(condition, kind)?;
 93 |         let strip_chars = if strip == "0" {
 94 |             None
 95 |         } else {
 96 |             Some(strip.into())
 97 |         };
 98 | 
 99 |         Ok(Self {
100 |             strip: strip_chars,
101 |             affix: affix.to_owned(),
102 |             condition: cond_re,
103 |             morph_info,
104 |         })
105 |     }
106 | }
107 | 


--------------------------------------------------------------------------------
/zspell/src/affix/tests.rs:
--------------------------------------------------------------------------------
1 | //! Affix tests
2 | 
3 | use super::*;
4 | 
5 | #[test]
6 | fn test_flagtype_convert_ok() {
7 |     assert_eq!(FlagType::Ascii.str_to_flag("T"), Ok(Flag(84)));
8 | }
9 | 


--------------------------------------------------------------------------------
/zspell/src/affix/tests_parse.rs:
--------------------------------------------------------------------------------
  1 | use std::fs;
  2 | 
  3 | use pretty_assertions::assert_eq;
  4 | use test_util::workspace_root;
  5 | 
  6 | use super::*;
  7 | use crate::affix::PartOfSpeech;
  8 | use crate::error::Span;
  9 | 
 10 | #[test]
 11 | fn test_line_splitter_none() {
 12 |     let s = "no key here # abcd";
 13 |     assert_eq!(line_splitter(s, "KEY"), None);
 14 | }
 15 | 
 16 | #[test]
 17 | fn test_line_splitter_some() {
 18 |     let s1 = "KEY key here\nnext line";
 19 |     let s2 = "KEY key here# comment";
 20 |     let s3 = "KEY key here\rnext line";
 21 |     let s4 = "# comment here\n#next line";
 22 |     assert_eq!(line_splitter(s1, "KEY"), Some(("key here", "\nnext line")));
 23 |     assert_eq!(line_splitter(s2, "KEY"), Some(("key here", "# comment")));
 24 |     assert_eq!(line_splitter(s3, "KEY"), Some(("key here", "\rnext line")));
 25 |     assert_eq!(
 26 |         line_splitter(s4, "#"),
 27 |         Some(("comment here", "\n#next line"))
 28 |     );
 29 | }
 30 | 
 31 | #[test]
 32 | fn test_line_key_parser_none() {
 33 |     let s = "no key here # abcd";
 34 |     assert_eq!(
 35 |         line_key_parser(s, "KEY", |_| Ok(AffixNode::Comment)),
 36 |         Ok(None)
 37 |     );
 38 | }
 39 | 
 40 | #[test]
 41 | fn test_line_key_parser_some() {
 42 |     let s = "KEY key here\nnext line";
 43 |     assert_eq!(
 44 |         line_key_parser(s, "KEY", |_| Ok(AffixNode::Comment)),
 45 |         Ok(Some((AffixNode::Comment, "\nnext line", 0)))
 46 |     );
 47 | }
 48 | 
 49 | #[test]
 50 | fn test_line_key_parser_err() {
 51 |     let s = "KEY key here\nnext line";
 52 |     let e = ParseError::new_nospan(ParseErrorKind::Boolean, "");
 53 |     assert_eq!(line_key_parser(s, "KEY", |_| Err(e.clone())), Err(e));
 54 | }
 55 | 
 56 | #[test]
 57 | fn test_line_key_parser() {
 58 |     let err = ParseError::new_nospan(ParseErrorKind::Boolean, "");
 59 |     let get_lang = |s: &str| {
 60 |         if s == "apple" {
 61 |             Ok(AffixNode::Language("apple".to_owned()))
 62 |         } else {
 63 |             Err(err.clone())
 64 |         }
 65 |     };
 66 | 
 67 |     let txt1 = "LANG apple";
 68 |     let txt2 = "LANG apple\nLANG banana";
 69 |     let txt3 = "LANG failure";
 70 | 
 71 |     assert_eq!(
 72 |         line_key_parser(txt1, "LANG", get_lang),
 73 |         Ok(Some((AffixNode::Language("apple".to_owned()), "", 0)))
 74 |     );
 75 |     assert_eq!(
 76 |         line_key_parser(txt2, "LANG", get_lang),
 77 |         Ok(Some((
 78 |             AffixNode::Language("apple".to_owned()),
 79 |             "\nLANG banana",
 80 |             0
 81 |         )))
 82 |     );
 83 |     assert_eq!(line_key_parser(txt3, "LANG", get_lang), Err(err));
 84 | }
 85 | 
 86 | #[test]
 87 | fn test_parse_neighbor_keys() {
 88 |     let s = "KEY abc|def|ghi # end";
 89 |     let res = parse_neighbor_keys(s);
 90 |     assert_eq!(
 91 |         res,
 92 |         Ok(Some((
 93 |             AffixNode::NeighborKeys(vec!["abc".to_owned(), "def".to_owned(), "ghi".to_owned()]),
 94 |             "# end",
 95 |             0
 96 |         )))
 97 |     );
 98 | }
 99 | 
100 | #[test]
101 | fn test_bool_parser_ok() {
102 |     let s = "COMPLEXPREFIXES\nmore stuff";
103 |     let res = parse_complex_prefixes(s);
104 |     assert_eq!(
105 |         res,
106 |         Ok(Some((AffixNode::ComplexPrefixes, "\nmore stuff", 0)))
107 |     );
108 | }
109 | 
110 | #[test]
111 | fn test_bool_parser_err() {
112 |     let s = "COMPLEXPREFIXES unneeded things\nmore stuff";
113 |     let res = parse_complex_prefixes(s);
114 |     assert!(res.is_err());
115 | }
116 | 
117 | #[test]
118 | fn test_munch_newline_some() {
119 |     let s1 = "    \nabc";
120 |     let s2 = "\n";
121 |     assert_eq!(munch_newline(s1), Ok(Some("abc")));
122 |     assert_eq!(munch_newline(s2), Ok(Some("")));
123 | }
124 | 
125 | #[test]
126 | fn test_munch_newline_none() {
127 |     let s = "    ";
128 |     assert_eq!(munch_newline(s), Ok(None));
129 | }
130 | 
131 | #[test]
132 | fn test_munch_newline_cmt() {
133 |     let s = "  # abcd \nresid";
134 |     assert_eq!(munch_newline(s), Ok(Some("resid")));
135 | }
136 | 
137 | #[test]
138 | fn test_munch_newline_err() {
139 |     let s = "  abcd \nresid";
140 |     assert!(munch_newline(s).is_err());
141 | }
142 | 
143 | #[test]
144 | fn test_table_parser_ok() {
145 |     let s = "REP 3\nREP a b\nREP c d\nREP longer val";
146 |     let expected = AffixNode::Replacement(vec![
147 |         Conversion::new("a", "b", false),
148 |         Conversion::new("c", "d", false),
149 |         Conversion::new("longer", "val", false),
150 |     ]);
151 |     assert_eq!(parse_replacement(s), Ok(Some((expected, "", 3))));
152 | }
153 | 
154 | #[test]
155 | fn test_afx_table_parser_err() {
156 |     // check line offset count
157 |     let s = "PFX A N 2\nPFX 10 a b x .\nPFX A 0 c a";
158 |     let res = parse_prefix(s);
159 |     assert_eq!(res.unwrap_err().span().unwrap(), &Span::new(1, 0));
160 | }
161 | 
162 | const SAMPLE_AFX_OK: &str = r#"
163 | SET UTF-8
164 | TRY abcd'
165 | # comment
166 | ICONV 2 # comment
167 | ICONV a b # comment
168 | ICONV ' "
169 | NOSUGGEST X
170 | ONLYINCOMPOUND C
171 | WORDCHARS 01234
172 | # comment
173 | PFX A N 2
174 | PFX A   0     ar   .    po:verb st:foot is:ay other:foo otherfoo po:xyz
175 | PFX A   0     br   a
176 | 
177 | SFX B Y 2
178 | SFX B   0     ar   .
179 | SFX B   0     br   [^a]
180 | 
181 | REP 2
182 | REP a b
183 | REP abcd 123
184 | 
185 | PHONE 1
186 | PHONE abcd 1234
187 | "#;
188 | 
189 | #[test]
190 | fn test_full_parse() {
191 |     let expected = vec![
192 |         AffixNode::Encoding(Encoding::Utf8),
193 |         AffixNode::TryCharacters("abcd'".into()),
194 |         AffixNode::Comment,
195 |         AffixNode::AfxInputConversion(vec![
196 |             Conversion::new("a", "b", false),
197 |             Conversion::new("'", "\"", false),
198 |         ]),
199 |         AffixNode::NoSuggestFlag("X".into()),
200 |         AffixNode::CompoundOnlyFlag("C".into()),
201 |         AffixNode::AfxWordChars("01234".into()),
202 |         AffixNode::Comment,
203 |         AffixNode::Prefix(ParsedRuleGroup {
204 |             flag: "A".to_owned(),
205 |             kind: RuleType::Prefix,
206 |             can_combine: false,
207 |             rules: vec![
208 |                 ParsedRule::new_raw_re(
209 |                     RuleType::Prefix,
210 |                     "ar",
211 |                     None,
212 |                     None,
213 |                     vec![
214 |                         MorphInfo::Part(PartOfSpeech::Verb).into(),
215 |                         MorphInfo::Stem("foot".into()).into(),
216 |                         MorphInfo::InflecSfx("ay".into()).into(),
217 |                         MorphInfo::Other("other:foo".into()).into(),
218 |                         MorphInfo::Other("otherfoo".into()).into(),
219 |                         MorphInfo::Part(PartOfSpeech::Other("xyz".into())).into(),
220 |                     ],
221 |                 )
222 |                 .unwrap(),
223 |                 ParsedRule::new_raw_re(RuleType::Prefix, "br", None, Some("^a.*$"), Vec::new())
224 |                     .unwrap(),
225 |             ],
226 |         }),
227 |         AffixNode::Suffix(ParsedRuleGroup {
228 |             flag: "B".to_owned(),
229 |             kind: RuleType::Suffix,
230 |             can_combine: true,
231 |             rules: vec![
232 |                 ParsedRule::new_raw_re(RuleType::Suffix, "ar", None, None, Vec::new()).unwrap(),
233 |                 ParsedRule::new_raw_re(RuleType::Suffix, "br", None, Some("^.*[^a]$"), Vec::new())
234 |                     .unwrap(),
235 |             ],
236 |         }),
237 |         AffixNode::Replacement(vec![
238 |             Conversion::new("a", "b", false),
239 |             Conversion::new("abcd", "123", false),
240 |         ]),
241 |         AffixNode::Phonetic(vec![Phonetic::new("abcd", "1234")]),
242 |     ];
243 | 
244 |     assert_eq!(affix_from_str(SAMPLE_AFX_OK), Ok(expected));
245 | }
246 | 
247 | #[test]
248 | fn test_large_file_parse() {
249 |     let mut aff_path = workspace_root();
250 |     aff_path.push("dictionaries");
251 |     aff_path.push("en_US.aff");
252 | 
253 |     let Ok(aff_content) = fs::read_to_string(aff_path) else {
254 |         eprintln!("skipping large test flies; not found");
255 |         return;
256 |     };
257 | 
258 |     assert!(affix_from_str(&aff_content).is_ok());
259 | }
260 | 


--------------------------------------------------------------------------------
/zspell/src/dict/flags.rs:
--------------------------------------------------------------------------------
  1 | use std::fmt::{self, Display};
  2 | use std::sync::Arc;
  3 | 
  4 | use super::rule::AfxRule;
  5 | 
  6 | /// A flag representation is either an ASCII char, unicode char, or number. We can fit
  7 | /// any of those in a u32.
  8 | #[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
  9 | pub struct Flag(pub u32);
 10 | 
 11 | impl Flag {
 12 |     pub fn new_ascii(ch: u8) -> Self {
 13 |         debug_assert!(ch.is_ascii());
 14 |         Self(ch.into())
 15 |     }
 16 | 
 17 |     pub fn new_utf8(ch: char) -> Self {
 18 |         Self(ch.into())
 19 |     }
 20 | 
 21 |     /// Must be a 2-character string
 22 |     pub fn new_long(s: &str) -> Self {
 23 |         debug_assert!(s.len() == 2, "invalid string length: {s}");
 24 |         debug_assert!(
 25 |             s.chars().all(|ch| ch.is_ascii()),
 26 |             "invalid string characters: {s}"
 27 |         );
 28 | 
 29 |         let num = u16::from_le_bytes(s[..=1].as_bytes().try_into().unwrap());
 30 | 
 31 |         Self(num.into())
 32 |     }
 33 | 
 34 |     pub fn new_number(num: u32) -> Self {
 35 |         Self(num)
 36 |     }
 37 | }
 38 | 
 39 | impl fmt::Debug for Flag {
 40 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 41 |         if let Ok(single_flag) = u8::try_from(self.0) {
 42 |             write!(f, "{}", char::from(single_flag))
 43 |         } else if let Ok(long_flag) = u16::try_from(self.0) {
 44 |             let [a, b] = long_flag.to_le_bytes();
 45 |             write!(f, "{}{}", char::from(a), char::from(b))
 46 |         } else {
 47 |             write!(f, "{:#06x}", self.0)
 48 |         }
 49 |     }
 50 | }
 51 | 
 52 | /// A representation of a flag value
 53 | #[non_exhaustive]
 54 | #[derive(Debug, Clone, Hash, PartialEq, Eq)]
 55 | pub enum FlagValue {
 56 |     // LemmaPresent and PseudoRoot are missing as they are deprecated
 57 |     AfxCircumfix,
 58 |     AfxKeepCase,
 59 |     AfxNeeded,
 60 |     AfxPseudoRoot,
 61 |     AfxSubstandard,
 62 |     Compound,
 63 |     CompoundBegin,
 64 |     CompoundEnd,
 65 |     CompoundForbid,
 66 |     CompoundForceUp,
 67 |     CompoundMiddle,
 68 |     CompoundOnly,
 69 |     CompoundPermit,
 70 |     CompoundRoot,
 71 |     ForbiddenWord,
 72 |     NoSuggest,
 73 |     WarnRare,
 74 |     /// Special case
 75 |     Rule(Arc<AfxRule>),
 76 | }
 77 | 
 78 | impl Display for FlagValue {
 79 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 80 |         match self {
 81 |             FlagValue::AfxCircumfix => write!(f, "AfxCircumfix"),
 82 |             FlagValue::AfxKeepCase => write!(f, "AfxKeepCase"),
 83 |             FlagValue::AfxNeeded => write!(f, "AfxNeeded"),
 84 |             FlagValue::AfxPseudoRoot => write!(f, "AfxPseudoRoot"),
 85 |             FlagValue::AfxSubstandard => write!(f, "AfxSubstandard"),
 86 |             FlagValue::Compound => write!(f, "Compound"),
 87 |             FlagValue::CompoundBegin => write!(f, "CompoundBegin"),
 88 |             FlagValue::CompoundEnd => write!(f, "CompoundEnd"),
 89 |             FlagValue::CompoundForbid => write!(f, "CompoundForbid"),
 90 |             FlagValue::CompoundForceUp => write!(f, "CompoundForceUp"),
 91 |             FlagValue::CompoundMiddle => write!(f, "CompoundMiddle"),
 92 |             FlagValue::CompoundOnly => write!(f, "CompoundOnly"),
 93 |             FlagValue::CompoundPermit => write!(f, "CompoundPermit"),
 94 |             FlagValue::CompoundRoot => write!(f, "CompoundRoot"),
 95 |             FlagValue::ForbiddenWord => write!(f, "ForbiddenWord"),
 96 |             FlagValue::NoSuggest => write!(f, "NoSuggest"),
 97 |             FlagValue::WarnRare => write!(f, "WarnRare"),
 98 |             FlagValue::Rule(_) => write!(f, "Rule"),
 99 |         }
100 |     }
101 | }
102 | 


--------------------------------------------------------------------------------
/zspell/src/dict/meta.rs:
--------------------------------------------------------------------------------
  1 | use std::borrow::Borrow;
  2 | use std::sync::Arc;
  3 | 
  4 | use super::rule::AfxRule;
  5 | use crate::morph::MorphInfo;
  6 | 
  7 | /// Additional information attached to an entry in a dictionary
  8 | ///
  9 | /// Cheaply cloneable
 10 | #[derive(Clone, Debug, PartialEq, Eq, Hash)]
 11 | pub struct Meta {
 12 |     stem: Arc<str>,
 13 |     source: Source,
 14 | }
 15 | 
 16 | impl Meta {
 17 |     pub(crate) fn new(stem_rc: Arc<str>, source: Source) -> Self {
 18 |         Self {
 19 |             stem: stem_rc,
 20 |             source,
 21 |         }
 22 |     }
 23 | 
 24 |     /// Return the stem of a word. Prefers the stem from the morph info if it is available
 25 |     pub fn stem(&self) -> &str {
 26 |         // If we have a dictionary source, check if we have a stem-type `MorphInfo`
 27 |         // and return it
 28 |         if let Source::Dict(morphvec) = &self.source {
 29 |             if let Some(stem) = morphvec.iter().find_map(|morph| {
 30 |                 if let MorphInfo::Stem(st) = morph.borrow() {
 31 |                     Some(st)
 32 |                 } else {
 33 |                     None
 34 |                 }
 35 |             }) {
 36 |                 return stem.as_ref();
 37 |             }
 38 |         }
 39 | 
 40 |         &self.stem
 41 |     }
 42 | 
 43 |     pub fn source(&self) -> &Source {
 44 |         &self.source
 45 |     }
 46 | }
 47 | 
 48 | /// Source information
 49 | #[allow(clippy::box_collection)]
 50 | #[non_exhaustive]
 51 | #[derive(Clone, Debug, PartialEq, Eq, Hash)]
 52 | pub enum Source {
 53 |     /// This meta came from an affix and has a full affix rule
 54 |     Affix {
 55 |         /// The full rule that created this
 56 |         rule: Arc<AfxRule>,
 57 |         /// Index of the relevant pattern within the rule. This could potentially be a reference
 58 |         /// but that might require a `RefCell`, and I don't want to risk reference
 59 |         pat_idx: usize,
 60 |     },
 61 |     /// This meta came from a .dic file, only contains morphinfo
 62 |     Dict(Arc<[Arc<MorphInfo>]>),
 63 |     /// This meta came from the personal dictionary
 64 |     Personal(Arc<PersonalMeta>),
 65 |     /// The source is a raw text file with no additional metadata
 66 |     Raw,
 67 | }
 68 | 
 69 | impl Source {
 70 |     /// Iterate through all morph info available
 71 |     pub fn morphs(&self) -> impl Iterator<Item = &MorphInfo> {
 72 |         match self {
 73 |             Source::Affix { rule, pat_idx } => rule.patterns()[*pat_idx].morph_info(),
 74 |             Source::Dict(v) => v.as_ref(),
 75 |             Source::Personal(v) => v.morph.as_ref(),
 76 |             Source::Raw => &[],
 77 |         }
 78 |         .iter()
 79 |         .map(AsRef::as_ref)
 80 |     }
 81 | 
 82 |     /// Helper to create an `Affix` source when the `Arc` already exists
 83 |     pub(crate) fn new_affix(rule: &Arc<AfxRule>, pat_idx: usize) -> Self {
 84 |         Self::Affix {
 85 |             rule: Arc::clone(rule),
 86 |             pat_idx,
 87 |         }
 88 |     }
 89 | }
 90 | 
 91 | /// Representation of meta info for a personal dictionary
 92 | #[derive(Debug, PartialEq, Eq, Hash)]
 93 | pub struct PersonalMeta {
 94 |     friend: Option<Arc<str>>,
 95 |     morph: Vec<Arc<MorphInfo>>,
 96 | }
 97 | 
 98 | impl PersonalMeta {
 99 |     pub fn new(friend: Option<Arc<str>>, morph: Vec<Arc<MorphInfo>>) -> Self {
100 |         Self { friend, morph }
101 |     }
102 | }
103 | 
104 | #[cfg(test)]
105 | #[allow(unused)]
106 | mod tests {
107 |     use std::collections::hash_map::DefaultHasher;
108 |     use std::hash::{Hash, Hasher};
109 | 
110 |     use super::*;
111 | 
112 |     fn calculate_hash<T: Hash>(t: &T) -> u64 {
113 |         let mut s = DefaultHasher::new();
114 |         t.hash(&mut s);
115 |         s.finish()
116 |     }
117 | }
118 | 


--------------------------------------------------------------------------------
/zspell/src/dict/parse.rs:
--------------------------------------------------------------------------------
  1 | //! Parse a dict file
  2 | 
  3 | use std::sync::Arc;
  4 | 
  5 | use super::Flag;
  6 | use crate::affix::FlagType;
  7 | use crate::error::ParseError;
  8 | use crate::helpers::convertu32;
  9 | use crate::morph::MorphInfo;
 10 | 
 11 | /// Represent a single line in a dictionary file
 12 | ///
 13 | /// Format is as follows:
 14 | ///
 15 | /// ```text
 16 | /// word[/flags...] [morphinfo ...]
 17 | /// band/ESGD po:noun
 18 | /// laser/M
 19 | /// fruit
 20 | /// ```
 21 | /// Flags and morph info are optional
 22 | #[derive(Clone, Debug, PartialEq, Eq, Hash)]
 23 | pub struct DictEntry {
 24 |     pub(super) stem: Arc<str>,
 25 |     pub(super) flags: Vec<Flag>,
 26 |     pub(super) morph: Vec<Arc<MorphInfo>>,
 27 | }
 28 | 
 29 | impl DictEntry {
 30 |     /// Test config: create a new `DictEntry`
 31 |     #[cfg(test)]
 32 |     pub(crate) fn new(stem: &str, flags: &[Flag], morph: &[MorphInfo]) -> Self {
 33 |         Self {
 34 |             stem: stem.into(),
 35 |             flags: flags.to_owned(),
 36 |             morph: morph.iter().map(|v| Arc::new(v.clone())).collect(),
 37 |         }
 38 |     }
 39 | 
 40 |     /// Create a `DictEntry` from a single line in a `.dic` file. Does not strip comments.
 41 |     fn parse_single(value: &str, flag_type: FlagType, line_num: u32) -> Result<Self, ParseError> {
 42 |         let (stem, flagstr, morphstr) = separate_into_parts(value);
 43 | 
 44 |         let flags: Vec<Flag> = match flagstr {
 45 |             Some(s) => flag_type
 46 |                 .parse_str(s.trim())
 47 |                 .map_err(|e| ParseError::new_nocol(e, s, line_num))?,
 48 |             None => Vec::new(),
 49 |         };
 50 |         let morph = MorphInfo::many_from_str(morphstr.trim())
 51 |             .map(Arc::new)
 52 |             .collect();
 53 |         let ret = Self {
 54 |             stem: stem.trim().into(),
 55 |             flags,
 56 |             morph,
 57 |         };
 58 |         Ok(ret)
 59 |     }
 60 | 
 61 |     /// Parse a complete dictionary file (usually `.dic`)
 62 |     ///
 63 |     /// # Errors
 64 |     ///
 65 |     /// Returns an error if any entry is incorrect.
 66 |     #[inline]
 67 |     #[allow(clippy::option_if_let_else)]
 68 |     pub fn parse_all(input: &str, flag_type: FlagType) -> Result<Vec<DictEntry>, ParseError> {
 69 |         // Ignore empty lines and
 70 |         let mut lines_iter = extract_content(input);
 71 |         let lines_backup = lines_iter.clone();
 72 | 
 73 |         let Some(first) = lines_iter.next() else {
 74 |             return Ok(Vec::new());
 75 |         };
 76 | 
 77 |         // Try to parse the first line as an integer; if not, ignore it
 78 |         let (mut ret, start) = if let Ok(cap) = first.parse::<usize>() {
 79 |             (Vec::with_capacity(cap), 2)
 80 |         } else {
 81 |             lines_iter = lines_backup;
 82 |             (Vec::new(), 1)
 83 |         };
 84 | 
 85 |         for (i, line) in lines_iter.enumerate() {
 86 |             ret.push(
 87 |                 DictEntry::parse_single(line, flag_type, convertu32(i + start))
 88 |                     .map_err(|e| e.add_offset_ret(i + start, 0))?,
 89 |             );
 90 |         }
 91 |         Ok(ret)
 92 |     }
 93 | }
 94 | 
 95 | /// Represent an entry from a personal dictionary
 96 | ///
 97 | /// Format is as follows:
 98 | ///
 99 | /// ```text
100 | /// [*]word[/friend] [morphinfo ...]
101 | /// enum/apple po:noun
102 | /// someword
103 | /// *ignoreword
104 | /// ```
105 | ///
106 | /// The hunspell spec doesn't say anything about morph info, but why not allow
107 | /// it
108 | #[derive(Clone, Debug, PartialEq, Eq, Hash)]
109 | pub struct PersonalEntry {
110 |     pub stem: Arc<str>,
111 |     /// Reference to a main word in the dictionary that this word should inherit
112 |     /// its metadata (stemming, affixes, etc) from
113 |     pub friend: Option<Box<str>>,
114 |     pub morph: Vec<MorphInfo>,
115 |     pub forbid: bool,
116 | }
117 | 
118 | impl PersonalEntry {
119 |     #[cfg(test)]
120 |     pub(crate) fn new(
121 |         stem: &str,
122 |         friend: Option<&str>,
123 |         morph: Vec<MorphInfo>,
124 |         forbid: bool,
125 |     ) -> Self {
126 |         Self {
127 |             stem: stem.into(),
128 |             friend: friend.map(Into::into),
129 |             morph,
130 |             forbid,
131 |         }
132 |     }
133 | 
134 |     pub fn parse_single(value: &str) -> Self {
135 |         let (stem, friend, morphstr) = separate_into_parts(value);
136 |         let forbid = stem.starts_with('*');
137 |         let stem = stem.strip_prefix('*').unwrap_or(stem);
138 |         let morph = MorphInfo::many_from_str(morphstr).collect();
139 | 
140 |         Self {
141 |             stem: stem.trim().into(),
142 |             friend: friend.map(|f| f.trim().into()),
143 |             morph,
144 |             forbid,
145 |         }
146 |     }
147 |     /// Parse a personal dictionary file
148 |     pub fn parse_all(s: &str) -> Vec<PersonalEntry> {
149 |         extract_content(s).map(Self::parse_single).collect()
150 |     }
151 | }
152 | 
153 | /// Separate `(stem, flagstr, morphstr)` into parts
154 | fn separate_into_parts(value: &str) -> (&str, Option<&str>, &str) {
155 |     let stem: &str;
156 |     let flagstr: Option<&str>;
157 |     let morphstr: &str;
158 | 
159 |     let value = value.split_once('#').unwrap_or((value, "")).0;
160 | 
161 |     // Split out the sections
162 |     if let Some((word, rest)) = value.split_once('/') {
163 |         // Easy case, we have an affix and can split on `/`. Then just split the first
164 |         // whitespace to separate morph from the flags.
165 |         stem = word;
166 |         let (tmpflag, tmpmorph) = rest
167 |             .split_once(|ch: char| ch.is_ascii_whitespace())
168 |             .unwrap_or((rest, ""));
169 |         flagstr = Some(tmpflag);
170 |         morphstr = tmpmorph;
171 |     } else {
172 |         // Trickier case; we look for a colon, find whitespace to the left, and assume
173 |         // everything to the left of that is the word
174 |         flagstr = None;
175 |         (stem, morphstr) = value.find(':').map_or((value, ""), |idx| {
176 |             value[..idx]
177 |                 .rfind(|ch: char| ch.is_ascii_whitespace())
178 |                 .map_or((value, ""), |ws_idx| (&value[..ws_idx], &value[ws_idx..]))
179 |         });
180 |     };
181 |     (stem, flagstr, morphstr)
182 | }
183 | 
184 | /// Extract nonempty lines that do not contain a comment
185 | fn extract_content(input: &str) -> impl Iterator<Item = &str> + Clone {
186 |     input
187 |         .lines()
188 |         // Dictionary files sometimes use tabs for comments, need to check before trim
189 |         .filter(|line| !line.starts_with('\t'))
190 |         // Trim hash comments
191 |         .map(|line| line.split_once('#').unwrap_or((line, "")).0)
192 |         .map(str::trim)
193 |         .filter(|line| !line.is_empty())
194 | }
195 | 
196 | #[cfg(test)]
197 | #[path = "tests_parse.rs"]
198 | mod tests;
199 | 


--------------------------------------------------------------------------------
/zspell/src/dict/rule.rs:
--------------------------------------------------------------------------------
  1 | //! Implementation for a stored rule
  2 | 
  3 | use std::hash::Hash;
  4 | use std::sync::Arc;
  5 | 
  6 | use crate::affix::{ParsedCfg, ParsedRuleGroup, RuleType};
  7 | use crate::helpers::ReWrapper;
  8 | use crate::morph::MorphInfo;
  9 | 
 10 | /// A single rule group
 11 | #[derive(Debug, PartialEq, Eq, Hash)]
 12 | pub struct AfxRule {
 13 |     kind: RuleType,
 14 |     can_combine: bool,
 15 |     patterns: Vec<AfxRulePattern>,
 16 | }
 17 | 
 18 | impl AfxRule {
 19 |     /// Creates a rule with a single pattern
 20 |     #[cfg(test)]
 21 |     pub fn new(
 22 |         kind: RuleType,
 23 |         affixes: &[&str],
 24 |         patterns: &[&str],
 25 |         can_combine: bool,
 26 |         _strip: Option<&str>,
 27 |         _condition: Option<&str>,
 28 |     ) -> Self {
 29 |         let mut ret = Self {
 30 |             kind,
 31 |             can_combine,
 32 |             patterns: affixes
 33 |                 .iter()
 34 |                 .map(|afx| AfxRulePattern::new(afx, None))
 35 |                 .collect(),
 36 |         };
 37 |         for (idx, pat) in patterns.iter().enumerate() {
 38 |             ret.patterns[idx].set_pattern(pat, kind).unwrap();
 39 |         }
 40 |         ret
 41 |     }
 42 | 
 43 |     /// Take a [`ParsedGroup`] and turn it into a vector of `AfxRule`
 44 |     ///
 45 |     /// NOTE: returns a vec reference and `Self`'s morph vec will be empty!
 46 |     /// Needs construction wherever the Arc target is
 47 |     // PERF: bench with & without vec reference instead of output
 48 |     pub fn from_parsed_group(_cfg: &ParsedCfg, group: &ParsedRuleGroup) -> Self {
 49 |         let mut ret = Self {
 50 |             kind: group.kind,
 51 |             can_combine: group.can_combine,
 52 |             patterns: Vec::with_capacity(group.rules.len()),
 53 |         };
 54 | 
 55 |         for rule in &group.rules {
 56 |             let morph_info: Vec<Arc<MorphInfo>> = rule.morph_info.clone();
 57 | 
 58 |             ret.patterns.push(AfxRulePattern {
 59 |                 affix: rule.affix.as_str().into(),
 60 |                 condition: rule.condition.clone(),
 61 |                 // FIXME: `rule.strip.as_ref().map(Arc::clone)` is more accurate, but flagged by
 62 |                 // clippy
 63 |                 strip: rule.strip.clone(),
 64 |                 morph_info,
 65 |             });
 66 |         }
 67 | 
 68 |         ret
 69 |     }
 70 | 
 71 |     pub fn is_pfx(&self) -> bool {
 72 |         self.kind == RuleType::Prefix
 73 |     }
 74 | 
 75 |     pub fn is_sfx(&self) -> bool {
 76 |         self.kind == RuleType::Prefix
 77 |     }
 78 | 
 79 |     pub fn can_combine(&self) -> bool {
 80 |         self.can_combine
 81 |     }
 82 | 
 83 |     /// Apply this rules patterns. Returns an iterator over the index of the
 84 |     /// pattern and the resulting string
 85 |     pub fn apply_patterns<'a>(
 86 |         &'a self,
 87 |         stem: &'a str,
 88 |     ) -> impl Iterator<Item = (usize, String)> + 'a {
 89 |         self.patterns
 90 |             .iter()
 91 |             .enumerate()
 92 |             .filter_map(|(idx, pat)| pat.apply_pattern(stem, self.kind).map(|s| (idx, s)))
 93 |     }
 94 | 
 95 |     pub(crate) fn patterns(&self) -> &[AfxRulePattern] {
 96 |         &self.patterns
 97 |     }
 98 | }
 99 | 
100 | /// A single affix rule application
101 | #[derive(Clone, Default, Debug, PartialEq, Eq, Hash)]
102 | pub struct AfxRulePattern {
103 |     affix: Box<str>,
104 |     /// Condition to be met to apply this rule.
105 |     condition: Option<ReWrapper>,
106 |     /// Characters to strip
107 |     strip: Option<Arc<str>>,
108 |     /// Associated morph info
109 |     morph_info: Vec<Arc<MorphInfo>>,
110 | }
111 | 
112 | impl AfxRulePattern {
113 |     /// New with a specified affix, otherwise default values
114 |     #[cfg(test)]
115 |     pub fn new(afx: &str, strip: Option<&str>) -> Self {
116 |         Self {
117 |             affix: afx.into(),
118 |             condition: None,
119 |             strip: strip.map(Into::into),
120 |             morph_info: Vec::new(),
121 |         }
122 |     }
123 | 
124 |     /// Helper for testing, sets the condition based on a kind
125 |     #[cfg(test)]
126 |     pub fn set_pattern(&mut self, condition: &str, kind: RuleType) -> Result<(), regex::Error> {
127 |         self.condition = crate::helpers::compile_re_pattern(condition, kind)?;
128 |         Ok(())
129 |     }
130 | 
131 |     /// Check whether a condition is applicable
132 |     #[allow(clippy::option_if_let_else)]
133 |     pub fn check_condition(&self, s: &str) -> bool {
134 |         match &self.condition {
135 |             Some(re) => re.is_match(s),
136 |             None => true,
137 |         }
138 |     }
139 | 
140 |     pub(crate) fn morph_info(&self) -> &[Arc<MorphInfo>] {
141 |         &self.morph_info
142 |     }
143 | 
144 |     // Verify the match condition and apply this rule
145 |     #[allow(clippy::option_if_let_else)]
146 |     fn apply_pattern(&self, s: &str, kind: RuleType) -> Option<String> {
147 |         // No return if condition doesn't match
148 |         if !self.check_condition(s) {
149 |             return None;
150 |         }
151 | 
152 |         match kind {
153 |             RuleType::Prefix => {
154 |                 // If stripping chars exist, strip them from the prefix
155 |                 let mut working: String = self.affix.as_ref().into();
156 | 
157 |                 if let Some(sc) = &self.strip {
158 |                     working.push_str(s.strip_prefix(sc.as_ref()).unwrap_or(s));
159 |                 } else {
160 |                     working.push_str(s);
161 |                 }
162 |                 working.shrink_to_fit();
163 |                 Some(working)
164 |             }
165 |             RuleType::Suffix => {
166 |                 // Same logic as above
167 |                 let mut working = if let Some(sc) = &self.strip {
168 |                     s.strip_suffix(sc.as_ref()).unwrap_or(s).to_owned()
169 |                 } else {
170 |                     s.to_owned()
171 |                 };
172 |                 working.push_str(&self.affix);
173 |                 working.shrink_to_fit();
174 |                 Some(working)
175 |             }
176 |         }
177 |     }
178 | }
179 | 
180 | #[cfg(test)]
181 | #[path = "tests_rule.rs"]
182 | mod tests;
183 | 


--------------------------------------------------------------------------------
/zspell/src/dict/rules_reverse.rs:
--------------------------------------------------------------------------------
 1 | //! Take rules and qpply them to a word, trying to find a match in an
 2 | //! existing wordlist.
 3 | #![allow(unused)]
 4 | 
 5 | use crate::affix::CompoundConfig;
 6 | use crate::Dictionary;
 7 | 
 8 | /// Try to create a word
 9 | fn entrypoint(dict: &Dictionary, word: &str) -> bool {
10 |     todo!()
11 | }
12 | 
13 | fn try_strip_pfx() {
14 |     todo!()
15 | }
16 | 
17 | fn try_strip_sfx() {}
18 | 
19 | /// Try splitting the word at each position and testing the parts according to
20 | /// compound rules
21 | fn compound_thing(cfg: &CompoundConfig) {}
22 | 


--------------------------------------------------------------------------------
/zspell/src/dict/tests.rs:
--------------------------------------------------------------------------------
  1 | //! Tests for a dict file
  2 | 
  3 | use std::fs;
  4 | 
  5 | use indoc::indoc;
  6 | use pretty_assertions::assert_eq;
  7 | use test_util::workspace_root;
  8 | 
  9 | use super::*;
 10 | 
 11 | #[test]
 12 | fn test_update_personal() {
 13 |     let personal_str = r"
 14 |         abcd po:verb
 15 |         efgh st:something
 16 |         *ijkl
 17 |         mnop
 18 |         qrst
 19 |         uvwx st:something
 20 |         *yz12 po:verb
 21 |         3456
 22 |     ";
 23 | 
 24 |     let mut d = Dictionary::new(ParsedCfg::default()).unwrap();
 25 |     d.parse_update_personal(personal_str, &[]).unwrap();
 26 |     assert!(d.wordlist.0.contains_key("abcd"));
 27 |     assert!(d.wordlist.0.contains_key("efgh"));
 28 |     assert!(!d.wordlist.0.contains_key("ijkl"));
 29 |     assert!(d.wordlist_forbidden.0.contains_key("ijkl"));
 30 |     assert!(d.check("abcd"));
 31 |     assert!(d.check("uvwx"));
 32 |     assert!(!d.check("ijkl"));
 33 | 
 34 |     let entry = d.entry("efgh");
 35 |     let stems: Vec<_> = entry.stems().unwrap().collect();
 36 |     assert_eq!(stems, vec!["efgh", "something"]);
 37 | }
 38 | 
 39 | #[test]
 40 | #[cfg(not(miri))] // slow!
 41 | fn test_builder() {
 42 |     let aff_content = fs::read_to_string("tests/files/w1_eng_short.aff").unwrap();
 43 |     let dic_content = fs::read_to_string("tests/files/w1_eng_short.dic").unwrap();
 44 |     let dict = DictBuilder::new()
 45 |         .config_str(&aff_content)
 46 |         .dict_str(&dic_content)
 47 |         .build()
 48 |         .unwrap();
 49 | 
 50 |     assert_eq!(dict.check("reptiles pillow bananas"), true);
 51 |     assert_eq!(dict.check("pine missssspelled"), false);
 52 | }
 53 | 
 54 | #[test]
 55 | fn test_builder_large_file() {
 56 |     let mut aff_path = workspace_root();
 57 |     aff_path.push("dictionaries");
 58 |     let mut dic_path = aff_path.clone();
 59 |     aff_path.push("en_US.aff");
 60 |     dic_path.push("en_US.dic");
 61 | 
 62 |     let Ok(aff_content) = fs::read_to_string(aff_path) else {
 63 |         eprintln!("skipping large test flies; not found");
 64 |         return;
 65 |     };
 66 | 
 67 |     let dic_content = fs::read_to_string(dic_path).unwrap();
 68 |     let dict = DictBuilder::new()
 69 |         .config_str(&aff_content)
 70 |         .dict_str(&dic_content)
 71 |         .build()
 72 |         .unwrap();
 73 | 
 74 |     assert_eq!(dict.check("reptiles pillow bananas"), true);
 75 |     assert_eq!(dict.check("pine missssspelled"), false);
 76 | }
 77 | 
 78 | // Test how data is inserted
 79 | #[test]
 80 | fn test_morph() {
 81 |     use crate::DictBuilder;
 82 | 
 83 |     let dict_str = "drink/X po:verb";
 84 |     let aff_str = indoc! {"
 85 |         SFX X Y 1
 86 |         SFX X 0 able . ds:able
 87 |     "};
 88 | 
 89 |     let d = DictBuilder::new()
 90 |         .dict_str(dict_str)
 91 |         .config_str(aff_str)
 92 |         .build()
 93 |         .unwrap();
 94 | 
 95 |     let meta = d.wordlist.0.get("drinkable").unwrap();
 96 |     assert_eq!(meta[0].stem(), "drink");
 97 |     assert_eq!(meta[1].stem(), "drink");
 98 |     assert!(matches!(
 99 |         meta[0].source(),
100 |         Source::Affix {
101 |             rule: _,
102 |             pat_idx: 0
103 |         }
104 |     ));
105 | 
106 |     let Source::Dict(mvec) = meta[1].source() else {
107 |         panic!()
108 |     };
109 | 
110 |     let po = MorphInfo::Part(crate::PartOfSpeech::Verb);
111 |     assert_eq!(mvec.as_ref(), [po.clone().into()]);
112 | 
113 |     let entry = d.entry("drinkable");
114 |     let morph = entry.analyze().unwrap().collect::<Vec<_>>();
115 |     assert_eq!(morph, [&MorphInfo::DerivSfx("able".into()), &po]);
116 | 
117 |     let stems = entry.stems().unwrap().collect::<Vec<_>>();
118 |     assert_eq!(stems, ["drink"]);
119 |     // assert_eq!(stems, ["drinkable", "drink"]);
120 | }
121 | 


--------------------------------------------------------------------------------
/zspell/src/dict/tests_parse.rs:
--------------------------------------------------------------------------------
  1 | use pretty_assertions::assert_eq;
  2 | 
  3 | use super::*;
  4 | 
  5 | #[test]
  6 | fn test_dict_entry_ok() {
  7 |     let f1 = FlagType::Utf8;
  8 |     let f2 = FlagType::Ascii;
  9 |     let f3 = FlagType::Long;
 10 | 
 11 |     let s_0f0m_1 = "abcd";
 12 |     let s_0f0m_2 = "abcd # comment";
 13 |     let s_4f0m_1 = "abcd/ABCD";
 14 |     let s_4f0m_2 = "abcd/ABCD # comment";
 15 |     let s_4f2m_1 = "abcd/ABCD ip:m1 tp:m2";
 16 |     let s_4f2m_2 = "abcd/ABCD ip:m1 tp:m2 # comment";
 17 |     let s_0f2m_1 = "abcd ip:m1 tp:m2";
 18 |     let s_0f2m_2 = "abcd ip:m1 tp:m2 # comment";
 19 | 
 20 |     // No flags
 21 |     let r_0f0m = DictEntry::new("abcd", &[], &[]);
 22 | 
 23 |     // All flags
 24 |     let r_4f0m = DictEntry::new(
 25 |         "abcd",
 26 |         &[
 27 |             Flag::new_ascii(b'A'),
 28 |             Flag::new_ascii(b'B'),
 29 |             Flag::new_ascii(b'C'),
 30 |             Flag::new_ascii(b'D'),
 31 |         ],
 32 |         &[],
 33 |     );
 34 | 
 35 |     let r_2f0m = DictEntry::new("abcd", &[Flag::new_long("AB"), Flag::new_long("CD")], &[]);
 36 | 
 37 |     // All flags plus morph info
 38 |     let r_4f2m = DictEntry::new(
 39 |         "abcd",
 40 |         &[
 41 |             Flag::new_ascii(b'A'),
 42 |             Flag::new_ascii(b'B'),
 43 |             Flag::new_ascii(b'C'),
 44 |             Flag::new_ascii(b'D'),
 45 |         ],
 46 |         &[
 47 |             MorphInfo::InflecPfx("m1".into()),
 48 |             MorphInfo::TermPfx("m2".into()),
 49 |         ],
 50 |     );
 51 | 
 52 |     let r_2f2m = DictEntry::new(
 53 |         "abcd",
 54 |         &[Flag::new_long("AB"), Flag::new_long("CD")],
 55 |         &[
 56 |             MorphInfo::InflecPfx("m1".into()),
 57 |             MorphInfo::TermPfx("m2".into()),
 58 |         ],
 59 |     );
 60 | 
 61 |     // No flags, including morph info
 62 |     let r_0f2m = DictEntry::new(
 63 |         "abcd",
 64 |         &[],
 65 |         &[
 66 |             MorphInfo::InflecPfx("m1".into()),
 67 |             MorphInfo::TermPfx("m2".into()),
 68 |         ],
 69 |     );
 70 | 
 71 |     assert_eq!(DictEntry::parse_single(s_0f0m_1, f1, 0), Ok(r_0f0m.clone()));
 72 |     assert_eq!(DictEntry::parse_single(s_0f0m_2, f1, 0), Ok(r_0f0m.clone()));
 73 |     assert_eq!(DictEntry::parse_single(s_4f0m_1, f1, 0), Ok(r_4f0m.clone()));
 74 |     assert_eq!(DictEntry::parse_single(s_4f0m_2, f1, 0), Ok(r_4f0m.clone()));
 75 |     assert_eq!(DictEntry::parse_single(s_4f2m_1, f1, 0), Ok(r_4f2m.clone()));
 76 |     assert_eq!(DictEntry::parse_single(s_4f2m_2, f1, 0), Ok(r_4f2m.clone()));
 77 |     assert_eq!(DictEntry::parse_single(s_0f2m_1, f1, 0), Ok(r_0f2m.clone()));
 78 |     assert_eq!(DictEntry::parse_single(s_0f2m_2, f1, 0), Ok(r_0f2m.clone()));
 79 | 
 80 |     assert_eq!(DictEntry::parse_single(s_0f0m_1, f2, 0), Ok(r_0f0m.clone()));
 81 |     assert_eq!(DictEntry::parse_single(s_0f0m_2, f2, 0), Ok(r_0f0m.clone()));
 82 |     assert_eq!(DictEntry::parse_single(s_4f0m_1, f2, 0), Ok(r_4f0m.clone()));
 83 |     assert_eq!(DictEntry::parse_single(s_4f0m_2, f2, 0), Ok(r_4f0m));
 84 |     assert_eq!(DictEntry::parse_single(s_4f2m_1, f2, 0), Ok(r_4f2m.clone()));
 85 |     assert_eq!(DictEntry::parse_single(s_4f2m_1, f2, 0), Ok(r_4f2m));
 86 |     assert_eq!(DictEntry::parse_single(s_0f2m_2, f2, 0), Ok(r_0f2m.clone()));
 87 |     assert_eq!(DictEntry::parse_single(s_0f2m_2, f2, 0), Ok(r_0f2m.clone()));
 88 | 
 89 |     assert_eq!(DictEntry::parse_single(s_0f0m_1, f3, 0), Ok(r_0f0m.clone()));
 90 |     assert_eq!(DictEntry::parse_single(s_0f0m_2, f3, 0), Ok(r_0f0m));
 91 |     assert_eq!(DictEntry::parse_single(s_4f0m_1, f3, 0), Ok(r_2f0m.clone()));
 92 |     assert_eq!(DictEntry::parse_single(s_4f0m_2, f3, 0), Ok(r_2f0m));
 93 |     assert_eq!(DictEntry::parse_single(s_4f2m_1, f3, 0), Ok(r_2f2m.clone()));
 94 |     assert_eq!(DictEntry::parse_single(s_4f2m_1, f3, 0), Ok(r_2f2m));
 95 |     assert_eq!(DictEntry::parse_single(s_0f2m_1, f3, 0), Ok(r_0f2m.clone()));
 96 |     assert_eq!(DictEntry::parse_single(s_0f2m_2, f3, 0), Ok(r_0f2m));
 97 | }
 98 | 
 99 | #[test]
100 | fn test_personal_entry_ok() {
101 |     let s1 = "abcd # comment";
102 |     let s2 = "abcd/ABC # comment";
103 |     let s3 = "*abcd/ABC # comment";
104 |     let s4 = "abcd/ABC ip:m1 tp:m2 # comment";
105 | 
106 |     let r1 = PersonalEntry::new("abcd", None, Vec::new(), false);
107 |     let r2 = PersonalEntry::new("abcd", Some("ABC"), Vec::new(), false);
108 |     let r3 = PersonalEntry::new("abcd", Some("ABC"), Vec::new(), true);
109 |     let r4 = PersonalEntry::new(
110 |         "abcd",
111 |         Some("ABC"),
112 |         vec![
113 |             MorphInfo::InflecPfx("m1".into()),
114 |             MorphInfo::TermPfx("m2".into()),
115 |         ],
116 |         false,
117 |     );
118 | 
119 |     assert_eq!(PersonalEntry::parse_single(s1), r1);
120 |     assert_eq!(PersonalEntry::parse_single(s2), r2);
121 |     assert_eq!(PersonalEntry::parse_single(s3), r3);
122 |     assert_eq!(PersonalEntry::parse_single(s4), r4);
123 | }
124 | 


--------------------------------------------------------------------------------
/zspell/src/dict/tests_rule.rs:
--------------------------------------------------------------------------------
 1 | use super::*;
 2 | 
 3 | #[test]
 4 | fn test_check_condition() {
 5 |     let mut kind = RuleType::Suffix;
 6 |     let mut rule = AfxRulePattern::default();
 7 |     rule.set_pattern("[^aeiou]y", kind).unwrap();
 8 | 
 9 |     // General tests, including with pattern in the middle
10 |     assert!(rule.check_condition("xxxy"));
11 |     assert!(!rule.check_condition("xxxay"));
12 |     assert!(!rule.check_condition("xxxyxx"));
13 | 
14 |     // Test with prefix
15 |     kind = RuleType::Prefix;
16 |     rule.set_pattern("y[^aeiou]", kind).unwrap();
17 |     assert!(rule.check_condition("yxxx"));
18 |     assert!(!rule.check_condition("yaxxx"));
19 |     assert!(!rule.check_condition("xxxyxxx"));
20 | 
21 |     // Test other real rules
22 |     kind = RuleType::Suffix;
23 |     rule.set_pattern("[sxzh]", kind).unwrap();
24 |     assert!(rule.check_condition("access"));
25 |     assert!(rule.check_condition("abyss"));
26 |     assert!(!rule.check_condition("accomplishment"));
27 |     assert!(rule.check_condition("mmms"));
28 |     assert!(!rule.check_condition("mmsmm"));
29 | 
30 |     // Check with default condition
31 |     rule.set_pattern(".", kind).unwrap();
32 |     assert!(rule.check_condition("xxx"));
33 | }
34 | 
35 | #[test]
36 | fn test_apply_pattern() {
37 |     let mut kind = RuleType::Suffix;
38 |     let mut rule = AfxRulePattern::new("zzz", Some("y"));
39 | 
40 |     rule.set_pattern("[^aeiou]y", kind).unwrap();
41 |     assert_eq!(rule.apply_pattern("xxxy", kind), Some("xxxzzz".to_owned()));
42 | 
43 |     kind = RuleType::Prefix;
44 |     rule.set_pattern("y[^aeiou]", kind).unwrap();
45 |     assert_eq!(rule.apply_pattern("yxxx", kind), Some("zzzxxx".to_owned()));
46 | 
47 |     kind = RuleType::Suffix;
48 |     rule.set_pattern(".", kind).unwrap();
49 |     assert_eq!(rule.apply_pattern("xxx", kind), Some("xxxzzz".to_owned()));
50 | }
51 | 
52 | // #[test]
53 | // fn test_rule_group_apply_pattern() {
54 | //     let kind = RuleType::Suffix;
55 | //     let rules= vec![
56 | //         AfxRule::new(0, kind, "iness",false, Some("y"), Some("[^aeiou]y"), Vec::new()),
57 | //         AfxRule::new(0, kind, "ness",false, None, Some("[aeiou]y"), Vec::new()),
58 | //         AfxRule::new(0, kind, "ness",false, None, Some("[^y]"), Vec::new()),
59 | //     ];
60 | 
61 | //     assert_eq!(group.apply_pattern("blurry").unwrap(), "blurriness");
62 | //     assert_eq!(group.apply_pattern("coy").unwrap(), "coyness");
63 | //     assert_eq!(group.apply_pattern("acute").unwrap(), "acuteness");
64 | // }
65 | 


--------------------------------------------------------------------------------
/zspell/src/helpers.rs:
--------------------------------------------------------------------------------
  1 | //! Various functions that are helpful throughout the crate
  2 | 
  3 | use core::fmt::Display;
  4 | use std::borrow::Cow;
  5 | use std::hash::Hash;
  6 | use std::ops::Deref;
  7 | use std::rc::Rc;
  8 | use std::sync::Arc;
  9 | 
 10 | use hashbrown::Equivalent;
 11 | use regex::Regex;
 12 | 
 13 | use crate::affix::RuleType;
 14 | 
 15 | /// Wrap `Regex` objects so they can be hashed
 16 | #[derive(Clone, Debug)]
 17 | pub struct ReWrapper(Regex);
 18 | 
 19 | impl ReWrapper {
 20 |     pub fn new(re: &str) -> Result<Self, regex::Error> {
 21 |         Ok(Self(Regex::new(re)?))
 22 |     }
 23 | }
 24 | 
 25 | impl Eq for ReWrapper {}
 26 | 
 27 | impl PartialEq for ReWrapper {
 28 |     fn eq(&self, other: &Self) -> bool {
 29 |         self.0.as_str() == other.0.as_str()
 30 |     }
 31 | }
 32 | 
 33 | impl Hash for ReWrapper {
 34 |     fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
 35 |         self.0.as_str().hash(state);
 36 |     }
 37 | }
 38 | 
 39 | impl Deref for ReWrapper {
 40 |     type Target = Regex;
 41 | 
 42 |     fn deref(&self) -> &Self::Target {
 43 |         &self.0
 44 |     }
 45 | }
 46 | 
 47 | /// Convert any integer to a u32, panic if it does not fit
 48 | #[inline]
 49 | pub fn convertu32<T: TryInto<u32> + Display + Copy>(value: T) -> u32 {
 50 |     value
 51 |         .try_into()
 52 |         .unwrap_or_else(|_| panic!("value {value} overflows u32 max of {}", u32::MAX))
 53 | }
 54 | 
 55 | /// Compile a regex pattern in the context of an affix. Returns None if
 56 | /// the universal pattern "." is provided
 57 | pub fn compile_re_pattern(
 58 |     condition: &str,
 59 |     kind: RuleType,
 60 | ) -> Result<Option<ReWrapper>, regex::Error> {
 61 |     if condition == "." {
 62 |         return Ok(None);
 63 |     }
 64 |     // Escape hyphens in groups
 65 |     let cond = condition.replace('-', r"\-");
 66 |     let re_pattern = match kind {
 67 |         RuleType::Prefix => format!("^{cond}.*$"),
 68 |         RuleType::Suffix => format!("^.*{cond}$"),
 69 |     };
 70 |     ReWrapper::new(re_pattern.as_str()).map(Some)
 71 | }
 72 | 
 73 | /// Implement a type that derefs to compare to a string
 74 | #[derive(Clone, Debug, PartialEq, Hash, Eq)]
 75 | pub struct StrWrapper<'a>(pub &'a str);
 76 | 
 77 | impl<'a> StrWrapper<'a> {
 78 |     pub fn new(s: &'a str) -> Self {
 79 |         Self(s)
 80 |     }
 81 | }
 82 | 
 83 | impl Display for StrWrapper<'_> {
 84 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 85 |         write!(f, "{}", self.0)
 86 |     }
 87 | }
 88 | 
 89 | impl Equivalent<Rc<str>> for StrWrapper<'_> {
 90 |     fn equivalent(&self, key: &Rc<str>) -> bool {
 91 |         self.0 == key.as_ref()
 92 |     }
 93 | }
 94 | 
 95 | impl Equivalent<Arc<str>> for StrWrapper<'_> {
 96 |     fn equivalent(&self, key: &Arc<str>) -> bool {
 97 |         self.0 == key.as_ref()
 98 |     }
 99 | }
100 | 
101 | #[allow(unused)]
102 | pub fn replace_cow<'a>(s: &'a str, from: char, to: &str) -> Cow<'a, str> {
103 |     if s.contains(from) {
104 |         Cow::Owned(s.replace(from, to))
105 |     } else {
106 |         Cow::Borrowed(s)
107 |     }
108 | }
109 | 


--------------------------------------------------------------------------------
/zspell/src/lib.rs:
--------------------------------------------------------------------------------
  1 | //! ZSpell is a spellchecking tool written entirely in Rust, aimed to be
  2 | //! compatible with the widely-used [Hunspell] dictionary format. This is the
  3 | //! documentation for the system library, please see the [CLI docs] if that is
  4 | //! what you expected.
  5 | //!
  6 | //! # Usage
  7 | //!
  8 | //! A Hunspell dictionary format has three main components:
  9 | //!
 10 | //! - An "affix" or "config" file, usually with extension `.aff`
 11 | //! - A dictionary word list file, ususally `.dic` or `.dict`
 12 | //! - An optional personal dictionary
 13 | //!
 14 | //! You will need to know the location of dictionary files on your system, or
 15 | //! obtain them yourself. A repository exists that has dictionaries for many
 16 | //! different languages, if you don't have any available:
 17 | //! <https://github.com/wooorm/dictionaries>.
 18 | //!
 19 | //! This library requires specifying the input from these files, then building a
 20 | //! [`Dictionary`] object that can be used to perform all other operations.
 21 | //! Usage will typically look like the following:
 22 | //!
 23 | //! ```
 24 | //! # #![cfg(not(miri))]
 25 | //! use std::fs;
 26 | //!
 27 | //! use zspell::Dictionary;
 28 | //!
 29 | //! // This example just uses some shortened files. Load them to a string
 30 | //! let aff_content =
 31 | //!     fs::read_to_string("tests/files/w1_eng_short.aff").expect("failed to load config file");
 32 | //! let dic_content =
 33 | //!     fs::read_to_string("tests/files/w1_eng_short.dic").expect("failed to load wordlist file");
 34 | //!
 35 | //! // Use the builder pattern to create our `Dictionary` object
 36 | //! let dict: Dictionary = zspell::builder()
 37 | //!     .config_str(&aff_content)
 38 | //!     .dict_str(&dic_content)
 39 | //!     .build()
 40 | //!     .expect("failed to build dictionary!");
 41 | //!
 42 | //! // The `.check(&str)` method is useful for quickly verifying entire strings
 43 | //! assert_eq!(dict.check("reptiles pillow: bananas"), true);
 44 | //! assert_eq!(dict.check("well, I misspelled soemthing this tiem"), false);
 45 | //!
 46 | //! // Or use `.check_word(&str)` to validate the input as a single word
 47 | //! assert_eq!(dict.check_word("okay"), true);
 48 | //! assert_eq!(dict.check_word("okay okay"), false);
 49 | //!
 50 | //! // `.check_indices(&str)` provides more useful information for anything other than trivial
 51 | //! // checks. It returns an iterator over `(usize, &str)`, which gives the byte offset and
 52 | //! // string reference of any spelling errors.
 53 | //! let input = "okay, I misspelled soemthing this tiem";
 54 | //! let errors: Vec<(usize, &str)> = dict.check_indices(input).collect();
 55 | //! let expected = vec![(19, "soemthing"), (34, "tiem")];
 56 | //! assert_eq!(errors, expected);
 57 | //! ```
 58 | //!
 59 | //! There is also a powerful entry-based API that allows for stemming and analysis, as well as
 60 | //! suggestions (which are currently unstable).
 61 | //!
 62 | //! ```
 63 | //! # #![cfg(not(miri))]
 64 | //!
 65 | //! # use std::fs;
 66 | //! # use zspell::Dictionary;
 67 | //! # let aff_content =
 68 | //! #     fs::read_to_string("tests/files/w1_eng_short.aff").expect("failed to load config file");
 69 | //! # let dic_content =
 70 | //! #     fs::read_to_string("tests/files/w1_eng_short.dic").expect("failed to load wordlist file");
 71 | //! # let dict: Dictionary = zspell::builder()
 72 | //! #     .config_str(&aff_content)
 73 | //! #     .dict_str(&dic_content)
 74 | //! #     .build()
 75 | //! #     .expect("failed to build dictionary!");
 76 | //! let input = "bananas rusting";
 77 | //! let mut entries = dict.entries(input);
 78 | //!
 79 | //! // We can use the entry API to do the standard checks (word position and correctness),
 80 | //! // but also to find word roots.
 81 | //! let banana_entry = entries.next().unwrap();
 82 | //! let banana_stems: Vec<&str> = banana_entry.stems().unwrap().collect();
 83 | //! assert_eq!(banana_entry.word(), "bananas");
 84 | //! assert_eq!(banana_entry.index(), 0);
 85 | //! assert_eq!(banana_entry.correct(), true);
 86 | //! assert_eq!(banana_stems, ["banana"]);
 87 | //!
 88 | //! let rust_entry = entries.next().unwrap();
 89 | //! let rust_stems: Vec<&str> = rust_entry.stems().unwrap().collect();
 90 | //! assert_eq!(rust_stems, ["rust"]);
 91 | //! ```
 92 | //!
 93 | //! See [`Dictionary`] and [`DictBuilder`] to get started.
 94 | //!
 95 | //! # Stability & Feature Flags
 96 | //!
 97 | //! At the moment, the only public functions available are `check`,
 98 | //! `check_word`, and `check_indices`. These three functions are more or less
 99 | //! guaranteed to have stable interfaces, though the internals may change.
100 | //!
101 | //! There are also some unstable components to this library:
102 | //!
103 | //! - `unstable-suggestions`: Needed for providing suggestions, this is
104 | //!   currently disabled because it is slow.
105 | //! - `unstable-system`: Needed for system interfaces like locating existing
106 | //!   dictionaries
107 | //! - `zspell-unstable`: Enable all of these options
108 | //!
109 | //! These flags can be enabled in your `Cargo.toml` if you would like to
110 | //! experiment with these featuers. Any APIs protected behind these feature
111 | //! flags are subject to change, but the need for these flags will be removed as
112 | //! they are stabalized.
113 | //!
114 | //! [Hunspell]: http://hunspell.github.io/
115 | //! [CLI docs]: https://pluots.github.io/zspell/
116 | #![forbid(unsafe_code)]
117 | #![warn(clippy::pedantic)]
118 | // #![warn(clippy::cargo)]
119 | #![warn(clippy::nursery)]
120 | #![warn(clippy::str_to_string)]
121 | #![warn(clippy::missing_inline_in_public_items)]
122 | #![warn(clippy::disallowed_types)]
123 | #![allow(clippy::use_self)]
124 | #![allow(clippy::match_same_arms)]
125 | #![allow(clippy::struct_excessive_bools)]
126 | #![allow(clippy::missing_panics_doc)]
127 | #![allow(clippy::must_use_candidate)]
128 | // #![allow(clippy::redundant_pub_crate)]
129 | #![allow(clippy::module_name_repetitions)]
130 | #![allow(clippy::missing_const_for_fn)]
131 | #![allow(clippy::derive_partial_eq_without_eq)]
132 | 
133 | mod affix;
134 | mod dict;
135 | pub mod error;
136 | mod helpers;
137 | mod meta;
138 | mod morph;
139 | mod suggestions;
140 | 
141 | #[cfg(feature = "unstable-system")]
142 | pub mod system;
143 | 
144 | pub(crate) use affix::ParsedCfg;
145 | pub use affix::PartOfSpeech;
146 | #[doc(inline)]
147 | pub use dict::{DictBuilder, Dictionary, WordEntry, WordList};
148 | #[doc(inline)]
149 | pub use error::Error;
150 | pub use morph::{MorphInfo, MorphStr};
151 | 
152 | // Make some things public when benchmarking
153 | #[cfg(feature = "unstable-bench")]
154 | pub mod bench {
155 |     pub use super::affix::{affix_from_str, FlagType};
156 |     pub use super::dict::DictEntry;
157 | }
158 | 
159 | /// Create a new [`DictBuilder`] instance (shortcut for [`DictBuilder::new`])
160 | #[inline]
161 | pub fn builder<'a>() -> DictBuilder<'a> {
162 |     DictBuilder::new()
163 | }
164 | 


--------------------------------------------------------------------------------
/zspell/src/meta.rs:
--------------------------------------------------------------------------------
1 | //! Meta-related logic
2 | 


--------------------------------------------------------------------------------
/zspell/src/morph.rs:
--------------------------------------------------------------------------------
  1 | //! Types and implementation of morphological analysis
  2 | 
  3 | use std::fmt;
  4 | 
  5 | use crate::affix::PartOfSpeech;
  6 | 
  7 | /// Morphological information about a word, used by analysis methods
  8 | #[non_exhaustive]
  9 | #[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
 10 | pub enum MorphInfo {
 11 |     /// `st:` stem word
 12 |     Stem(MorphStr),
 13 |     /// `ph:` better phonetic transliteration if available
 14 |     Phonetic(MorphStr),
 15 |     /// `al:` allomorphs (e.g. sing -> sang, sung)
 16 |     Allomorph(MorphStr),
 17 |     /// `po:` part of speech
 18 |     Part(PartOfSpeech),
 19 |     /// `ds:` derivational suffix
 20 |     DerivSfx(MorphStr),
 21 |     /// `is:` inflectional suffix
 22 |     InflecSfx(MorphStr),
 23 |     /// `ts:` terminal suffix
 24 |     TerminalSfx(MorphStr),
 25 |     /// `dp:` derivational suffix
 26 |     DerivPfx(MorphStr),
 27 |     /// `ip:` inflectional suffix
 28 |     InflecPfx(MorphStr),
 29 |     /// `tp:` terminal suffix
 30 |     TermPfx(MorphStr),
 31 |     /// `sp:` surface prefix
 32 |     SurfacePfx(MorphStr),
 33 |     /// `pa:` parts of compound words
 34 |     CompPart(MorphStr),
 35 |     /// Any unrecognized tag. This will be stored as written (e.g. `foo:bar` is stored as
 36 |     /// `foo:bar`, not just `bar`).
 37 |     Other(MorphStr),
 38 | }
 39 | 
 40 | impl MorphInfo {
 41 |     /// Parse the kind of string that a dictionary file has, usually something like:
 42 |     ///
 43 |     /// ```text
 44 |     /// po:verb st:rootword ts:abcd
 45 |     /// ```
 46 |     #[inline]
 47 |     #[allow(clippy::unnecessary_wraps)]
 48 |     pub(crate) fn many_from_str(s: &str) -> impl Iterator<Item = Self> + '_ {
 49 |         s.split_whitespace().map(MorphInfo::from)
 50 |         // FIXME:dict-parser we should be able to handle the hungarian dictionary that
 51 |         // has entries like this:
 52 |         // üzletág/UmôŇyiYcÇ       üzletágak
 53 |         // but I am not sure what that means if it is not morph info...
 54 |         // res.push(MorphInfo::try_from(morph).map_err(|e| ParseError::new_nospan(e, morph))?);
 55 |     }
 56 | }
 57 | 
 58 | impl From<&str> for MorphInfo {
 59 |     #[inline]
 60 |     fn from(value: &str) -> Self {
 61 |         let Some((tag, val)) = value.split_once(':') else {
 62 |             return Self::Other(value.into());
 63 |         };
 64 | 
 65 |         match tag {
 66 |             "st" => Self::Stem(val.into()),
 67 |             "ph" => Self::Phonetic(val.into()),
 68 |             "al" => Self::Allomorph(val.into()),
 69 |             "po" => Self::Part(val.into()),
 70 |             "ds" => Self::DerivSfx(val.into()),
 71 |             "is" => Self::InflecSfx(val.into()),
 72 |             "ts" => Self::TerminalSfx(val.into()),
 73 |             "dp" => Self::DerivPfx(val.into()),
 74 |             "ip" => Self::InflecPfx(val.into()),
 75 |             "tp" => Self::TermPfx(val.into()),
 76 |             "sp" => Self::SurfacePfx(val.into()),
 77 |             "pa" => Self::CompPart(val.into()),
 78 |             _ => Self::Other(value.into()),
 79 |         }
 80 |     }
 81 | }
 82 | 
 83 | impl fmt::Display for MorphInfo {
 84 |     #[inline]
 85 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 86 |         match self {
 87 |             MorphInfo::Stem(v) => write!(f, "st:{v}"),
 88 |             MorphInfo::Phonetic(v) => write!(f, "ph:{v}"),
 89 |             MorphInfo::Allomorph(v) => write!(f, "al:{v}"),
 90 |             MorphInfo::Part(v) => write!(f, "po:{v}"),
 91 |             MorphInfo::DerivSfx(v) => write!(f, "ds:{v}"),
 92 |             MorphInfo::InflecSfx(v) => write!(f, "is:{v}"),
 93 |             MorphInfo::TerminalSfx(v) => write!(f, "ts:{v}"),
 94 |             MorphInfo::DerivPfx(v) => write!(f, "dp:{v}"),
 95 |             MorphInfo::InflecPfx(v) => write!(f, "ip:{v}"),
 96 |             MorphInfo::TermPfx(v) => write!(f, "tp:{v}"),
 97 |             MorphInfo::SurfacePfx(v) => write!(f, "sp:{v}"),
 98 |             MorphInfo::CompPart(v) => write!(f, "pa:{v}"),
 99 |             MorphInfo::Other(v) => write!(f, "{v}"),
100 |         }
101 |     }
102 | }
103 | 
104 | /// A string used as part of morphological analysis
105 | ///
106 | /// This is a thin wrapper over a native string type to allow us to change
107 | /// the implementation as needed.
108 | #[derive(Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
109 | pub struct MorphStr(Box<str>);
110 | 
111 | impl AsRef<str> for MorphStr {
112 |     #[inline]
113 |     fn as_ref(&self) -> &str {
114 |         self.0.as_ref()
115 |     }
116 | }
117 | 
118 | impl From<&str> for MorphStr {
119 |     #[inline]
120 |     fn from(value: &str) -> Self {
121 |         Self(value.into())
122 |     }
123 | }
124 | 
125 | impl fmt::Display for MorphStr {
126 |     #[inline]
127 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
128 |         self.0.fmt(f)
129 |     }
130 | }
131 | 
132 | impl fmt::Debug for MorphStr {
133 |     #[inline]
134 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
135 |         self.0.fmt(f)
136 |     }
137 | }
138 | 
139 | #[cfg(test)]
140 | mod tests {
141 |     use super::*;
142 | 
143 |     #[test]
144 |     fn morph_single_ok() {
145 |         let tests = [
146 |             ("st:stem", MorphInfo::Stem("stem".into())),
147 |             ("ip:abc", MorphInfo::InflecPfx("abc".into())),
148 |             ("pa:xyz", MorphInfo::CompPart("xyz".into())),
149 |             ("foo:xyz", MorphInfo::Other("foo:xyz".into())),
150 |         ];
151 | 
152 |         for (input, expected) in tests {
153 |             assert_eq!(MorphInfo::from(input), expected, "failure parsing {input}");
154 |         }
155 |     }
156 | 
157 |     #[test]
158 |     fn morph_string_ok() {
159 |         let input = "st:stem ip:abcd pa:xyz    st:some-stem\tal:def";
160 |         let output = MorphInfo::many_from_str(input);
161 |         let expected = [
162 |             MorphInfo::Stem("stem".into()),
163 |             MorphInfo::InflecPfx("abcd".into()),
164 |             MorphInfo::CompPart("xyz".into()),
165 |             MorphInfo::Stem("some-stem".into()),
166 |             MorphInfo::Allomorph("def".into()),
167 |         ];
168 | 
169 |         assert_eq!(&output.collect::<Vec<_>>(), &expected);
170 |     }
171 | }
172 | 


--------------------------------------------------------------------------------
/zspell/src/suggestions.rs:
--------------------------------------------------------------------------------
1 | //! Types and implementation of suggestion logic
2 | 


--------------------------------------------------------------------------------
/zspell/src/system/tests.rs:
--------------------------------------------------------------------------------
  1 | //! Tests for the `system` module
  2 | 
  3 | // use std::{fs, io};
  4 | 
  5 | // use tempfile::tempdir;
  6 | 
  7 | // use super::*;
  8 | // use crate::errors;
  9 | 
 10 | // #[test]
 11 | // #[cfg(windows)]
 12 | // fn test_raw_paths() {
 13 | //     // Just spot check what we have here
 14 | //     let paths = create_raw_paths();
 15 | 
 16 | //     assert!(paths.contains(&PathBuf::from(
 17 | //         r"C:\Program files\OpenOffice.org*\share\dict\ooo"
 18 | //     )));
 19 | //     assert!(paths.contains(&PathBuf::from(
 20 | //         r"C:\Program files\OpenOffice.org*\share\dict\ooo\hunspell"
 21 | //     )));
 22 | // }
 23 | 
 24 | // #[test]
 25 | // #[cfg(not(windows))]
 26 | // fn test_raw_paths() {
 27 | //     // Just spot check what we have here
 28 | //     let paths = create_raw_paths();
 29 | 
 30 | //     assert!(paths.contains(&PathBuf::from("/usr/share")));
 31 | //     assert!(paths.contains(&PathBuf::from("/usr/share/zspell")));
 32 | //     assert!(paths.contains(&PathBuf::from("/usr/share/myspell")));
 33 | //     assert!(paths.contains(&PathBuf::from("/usr/share/hunspell")));
 34 | //     assert!(paths.contains(&PathBuf::from("/Library/Spelling/hunspell")));
 35 | //     assert!(paths.contains(&PathBuf::from("/Library/Spelling/hunspell")));
 36 | // }
 37 | 
 38 | // #[test]
 39 | // fn test_matching_dirs() {
 40 | //     // Create a temporary directory with contents
 41 | //     // Ensure the function locates them using wildcards
 42 | //     let dir = tempdir().unwrap();
 43 | 
 44 | //     let mut paths = vec![
 45 | //         dir.path().join("a").join("b").join("c-x-cxd"),
 46 | //         dir.path().join("a").join("b").join("c-yz-cxd"),
 47 | //         dir.path().join("a").join("b").join("c-.abc-cxd"),
 48 | //     ];
 49 | //     paths.sort();
 50 | 
 51 | //     for path in &paths {
 52 | //         fs::create_dir_all(path).unwrap();
 53 | //     }
 54 | 
 55 | //     let mut ret = find_matching_dirs(&dir.path().join("a").join("b"), "c-*-c?d");
 56 | //     ret.sort();
 57 | 
 58 | //     assert_eq!(paths, ret);
 59 | // }
 60 | 
 61 | // #[test]
 62 | // fn test_expand_dir_wildcards() {
 63 | //     let dir = tempdir().unwrap();
 64 | 
 65 | //     let paths = vec![
 66 | //         dir.path().join("aaa").join("bbb-x").join("ccc"),
 67 | //         dir.path().join("aaa").join("bbb-y").join("ccc"),
 68 | //         dir.path().join("ddd"),
 69 | //     ];
 70 | 
 71 | //     for path in &paths {
 72 | //         fs::create_dir_all(path).unwrap();
 73 | //     }
 74 | 
 75 | //     let mut input = vec![
 76 | //         dir.path().join("aaa").join("bbb*").join("ccc"),
 77 | //         dir.path().join("ddd"),
 78 | //     ];
 79 | 
 80 | //     let mut expanded = Vec::from_iter(expand_dir_wildcards(&mut input));
 81 | //     expanded.sort_unstable();
 82 | 
 83 | //     assert_eq!(paths, expanded);
 84 | // }
 85 | 
 86 | // #[test]
 87 | // fn test_find_dict_from_path() {
 88 | //     let dir = tempdir().unwrap();
 89 | 
 90 | //     let fnames = vec![
 91 | //         dir.path().join("test_found.dic"),
 92 | //         dir.path().join("test_found.aff"),
 93 | //         dir.path().join("test_found.afx"),
 94 | //         dir.path().join("test.dict"),
 95 | //         dir.path().join("test.affix"),
 96 | //         dir.path().join("notfound.dic"),
 97 | //         dir.path().join("notfound.aff"),
 98 | //         dir.path().join("test"),
 99 | //     ];
100 | 
101 | //     let mut expected = vec![
102 | //         DictPaths {
103 | //             dictionary: fnames[0].clone(),
104 | //             affix: fnames[1].clone(),
105 | //         },
106 | //         DictPaths {
107 | //             dictionary: fnames[0].clone(),
108 | //             affix: fnames[2].clone(),
109 | //         },
110 | //         DictPaths {
111 | //             dictionary: fnames[3].clone(),
112 | //             affix: fnames[4].clone(),
113 | //         },
114 | //     ];
115 | //     expected.sort();
116 | 
117 | //     for fname in fnames {
118 | //         fs::File::create(fname).unwrap();
119 | //     }
120 | //     fs::read_dir(dir.path()).unwrap();
121 | 
122 | //     let mut res = find_dicts_from_path(dir.path(), "test_found").unwrap();
123 | //     res.sort();
124 | 
125 | //     assert_eq!(res, expected);
126 | // }
127 | 
128 | // #[test]
129 | // fn test_find_dict_from_path_err() {
130 | //     let fakepath = tempdir().unwrap().path().join("fake");
131 | //     let res = find_dicts_from_path(&fakepath, "test_found");
132 | 
133 | //     assert_eq!(
134 | //         Err(errors::SystemError::IOError {
135 | //             name: fakepath.to_string_lossy().to_string(),
136 | //             e: io::ErrorKind::NotFound
137 | //         }),
138 | //         res
139 | //     );
140 | // }
141 | 


--------------------------------------------------------------------------------
/zspell/test-suite/0-example.test:
--------------------------------------------------------------------------------
 1 | %% Example test file. '%%' is our comment indicator since '#' gets passed directly
 2 | %% to the input
 3 | 
 4 | ==== afx ====
 5 | %% This section contains contents of the .aff file
 6 | 
 7 | ==== dic ====
 8 | %% This section contains contents of the .dic file
 9 | 
10 | === personal ===
11 | %% This section contains contents of a personal file
12 | 
13 | ==== valid ====
14 | %% Each line will get checked with `.check()`, i.e. treated as sentences
15 | 
16 | ==== invalid ====
17 | %% These words should not be valid
18 | 
19 | ==== wordlist ====
20 | %% Verify the word list contains exactly the listed words
21 | 
22 | ==== nosuggest ====
23 | %% Exact expected contents of the never suggested wordlist
24 | 
25 | ==== forbidden ====
26 | %% Exact expected contents of the non-accepted wordlist
27 | 
28 | ==== stem ====
29 | %% List the expected stem for a given word
30 | rooted > root | rooted
31 | 
32 | ==== morph ====
33 | %% List the expected morph info for a given word
34 | apple > po:noun | ...
35 | 
36 | ==== suggest ====
37 | %% Something like the following
38 | appl > apple | Apfel | app
39 | 


--------------------------------------------------------------------------------
/zspell/test-suite/b-affix-forward-gen-num-flags.test:
--------------------------------------------------------------------------------
 1 | %% Test numeric flags
 2 | 
 3 | ==== afx ====
 4 | SET UTF-8
 5 | FLAG num
 6 | 
 7 | SFX 1     N 1
 8 | SFX 1       0     aa         .
 9 | 
10 | SFX 999   N 1
11 | SFX 999     0     bb         .
12 | 
13 | SFX 12345 N 1
14 | SFX 12345   0     cc         .
15 | 
16 | SFX 1234  N 1
17 | SFX 1234    0     dd         .
18 | 
19 | ==== dic ====
20 | 4
21 | www/1
22 | xxx/1,999,12345
23 | yyy/999,12345
24 | zzz/999,1234
25 | 
26 | 
27 | ==== valid ====
28 | www
29 | xxx
30 | yyy
31 | zzz
32 | wwwaa
33 | xxxaa
34 | xxxbb
35 | xxxcc
36 | yyybb
37 | yyycc
38 | zzzbb
39 | zzzdd
40 | 
41 | 
42 | ==== invalid ====
43 | %% Nothing to see here
44 | nothing
45 | 
46 | ==== wordlist ====
47 | www
48 | xxx
49 | yyy
50 | zzz
51 | wwwaa
52 | xxxaa
53 | xxxbb
54 | xxxcc
55 | yyybb
56 | yyycc
57 | zzzbb
58 | zzzdd
59 | 


--------------------------------------------------------------------------------
/zspell/test-suite/b-affix-forward-gen.test:
--------------------------------------------------------------------------------
 1 | %% Test basic affix, including:
 2 | %% - Only prefix
 3 | %% - Only suffix
 4 | %% - Stripping characters
 5 | %% - Patterns
 6 | %% - Combined prefix and suffix
 7 | %% - Noncombining prefix and suffix
 8 | 
 9 | ==== afx ====
10 | SET UTF-8
11 | 
12 | PFX A Y 1
13 | PFX A   0     aa         .
14 | 
15 | SFX B Y 3
16 | SFX B   0     bb         .
17 | SFX B   y     cc         y
18 | SFX B   0     dd         [^y]
19 | 
20 | PFX C N 2
21 | PFX C   yy    ee         .
22 | PFX C   0     ff         .
23 | 
24 | 
25 | ==== dic ====
26 | 4
27 | xxx/A
28 | yyy/B
29 | zzz/AB
30 | yyyy/AC
31 | 
32 | 
33 | ==== valid ====
34 | xxx
35 | yyy
36 | zzz
37 | yyyy
38 | aaxxx
39 | yyybb
40 | yycc
41 | aazzz
42 | zzzbb
43 | zzzdd
44 | aazzzbb
45 | aazzzdd
46 | aayyyy
47 | eeyy
48 | ffyyyy
49 | 
50 | 
51 | ==== invalid ====
52 | %% Nothing to see here
53 | nothing
54 | 
55 | ==== wordlist ====
56 | xxx
57 | yyy
58 | zzz
59 | yyyy
60 | aaxxx
61 | yyybb
62 | yycc
63 | aazzz
64 | zzzbb
65 | zzzdd
66 | aazzzbb
67 | aazzzdd
68 | aayyyy
69 | eeyy
70 | ffyyyy
71 | 


--------------------------------------------------------------------------------
/zspell/test-suite/b-flag-long.test:
--------------------------------------------------------------------------------
 1 | %% Verify that multicharacter flags work
 2 | 
 3 | ==== afx ====
 4 | FLAG long
 5 | 
 6 | NEEDAFFIX ()
 7 | FORBIDDENWORD {}
 8 | KEEPCASE ||
 9 | NOSUGGEST --
10 | 
11 | %% Test same first character but different second
12 | SFX -+ Y 1
13 | SFX -+   0  aa  .
14 | 
15 | ==== dic ====
16 | foo/--
17 | bar/||--
18 | baz/-+
19 | 
20 | ==== valid ====
21 | foo bar baz bazaa
22 | 
23 | ==== wordlist ====
24 | baz
25 | bazaa
26 | 
27 | ==== nosuggest ====
28 | foo
29 | bar
30 | 


--------------------------------------------------------------------------------
/zspell/test-suite/b-nosuggest-forbid.test:
--------------------------------------------------------------------------------
 1 | %% Verify our nosuggest anf forbid flags works
 2 | 
 3 | ==== afx ====
 4 | NOSUGGEST !
 5 | FORBIDDENWORD *
 6 | 
 7 | SFX A Y 1
 8 | SFX A   0     aaa       .
 9 | 
10 | ==== dic ====
11 | nosuggest/A!
12 | forbid/A*
13 | nosuggest2/!
14 | forbid2/*
15 | ok
16 | 
17 | ==== valid ====
18 | nosuggest
19 | nosuggestaaa
20 | nosuggest2
21 | ok
22 | 
23 | ==== invalid ====
24 | forbid
25 | forbidaaa
26 | forbid2
27 | 
28 | ==== wordlist ====
29 | ok
30 | 
31 | ==== nosuggest ====
32 | nosuggest
33 | nosuggestaaa
34 | nosuggest2
35 | 
36 | ==== forbidden ====
37 | forbid
38 | forbidaaa
39 | forbid2
40 | 


--------------------------------------------------------------------------------
/zspell/test-suite/b-stemming-morph.test:
--------------------------------------------------------------------------------
 1 | ==== afx ====
 2 | SFX X Y 1
 3 | SFX X 0 able . ds:able
 4 | 
 5 | ==== dic ====
 6 | %% mice st:mouse
 7 | drink/X po:verb
 8 | 
 9 | ==== wordlist ====
10 | %% attr: allow-extra
11 | %% mice
12 | 
13 | ==== stem ====
14 | %% mice > mouse | miced
15 | drink > drink
16 | 
17 | %% FIXME: hunspell lists these as `> drinkable`, not `> drink`. Why?
18 | drinkable > drink
19 | Drinkable > drink
20 | 
21 | ==== morph ====
22 | drink > po:verb
23 | drinkable > po:verb ds:able
24 | 


--------------------------------------------------------------------------------
/zspell/test-suite/h-circumfix.test:
--------------------------------------------------------------------------------
 1 | %% SPDX-License-Identifier: MPL-1.1
 2 | %% https://github.com/hunspell/hunspell/blob/3cfd539b5b1033620b12663ee3f1d673d193add0/tests/circumfix.aff
 3 | 
 4 | ==== afx ====
 5 | # circumfixes: ~ obligate prefix/suffix combinations
 6 | # superlative in Hungarian: leg- (prefix) AND -bb (suffix)
 7 | 
 8 | CIRCUMFIX X
 9 | 
10 | PFX A Y 1
11 | PFX A 0 leg/X .
12 | 
13 | PFX B Y 1
14 | PFX B 0 legesleg/X .
15 | 
16 | SFX C Y 3
17 | SFX C 0 obb . is:COMPARATIVE
18 | SFX C 0 obb/AX . is:SUPERLATIVE
19 | SFX C 0 obb/BX . is:SUPERSUPERLATIVE
20 | 
21 | ==== dic ====
22 | 1
23 | nagy/C	po:adj
24 | 
25 | ==== valid ====
26 | nagy
27 | nagyobb
28 | 
29 | %% FIXME(circumfix): xfail
30 | %% legnagyobb
31 | %% legeslegnagyobb
32 | 
33 | 
34 | ==== stem ====
35 | nagy > nagy
36 | nagyobb > nagy
37 | 
38 | %% FIXME(circumfix): xfail
39 | %% legnagyobb > nagy
40 | %% legeslegnagyobb > nagy
41 | 
42 | ==== morph ====
43 | nagy > po:adj
44 | nagyobb > po:adj is:COMPARATIVE
45 | 
46 | %% FIXME(circumfix): xfail
47 | %% legnagyobb > fl:A po:adj is:SUPERLATIVE
48 | %% legeslegnagyobb > fl:B po:adj is:SUPERSUPERLATIVE
49 | 


--------------------------------------------------------------------------------
/zspell/test-suite/h-ignore-sug.test:
--------------------------------------------------------------------------------
 1 | %% SPDX-License-Identifier: MPL-1.1
 2 | %% https://github.com/hunspell/hunspell/blob/fbf0ce7e4737084fe84e733b031634f4a2c7559f/tests/ignoresug.aff
 3 | 
 4 | ==== afx ====
 5 | # Ignore punctuation marks used inside of Armenian words, and produce correct
 6 | # suggestion (see issue #570). However, "suggestion test" cannot be used because
 7 | # words in `.wrong` file are not wrong realy, so it leads to an error.
 8 | # Therefore, we apply "morphological analysis" which has a similar result.
 9 | 
10 | SET UTF-8
11 | WORDCHARS ֊՛՜՞՚
12 | IGNORE ֊՛՜՞՚
13 | 
14 | ==== dic ====
15 | 3
16 | ինչ
17 | մնաս
18 | որտեղ
19 | 
20 | ==== valid ====
21 | %% FIXME:ignore
22 | ինչ
23 | %% ի՞նչ
24 | մնաս
25 | %% մնա՜ս
26 | որտեղ
27 | %% որտե՞ղ
28 | 
29 | %% FIXME:morph
30 | %% ==== morph ====
31 | %% > ինչ
32 | %% analyze(ինչ) =  st:ինչ
33 | %% stem(ինչ) = ինչ
34 | %% > ի՞նչ
35 | %% analyze(ի՞նչ) =  st:ինչ
36 | %% stem(ի՞նչ) = ինչ
37 | %% > մնաս
38 | %% analyze(մնաս) =  st:մնաս
39 | %% stem(մնաս) = մնաս
40 | %% > մնա՜ս
41 | %% analyze(մնա՜ս) =  st:մնաս
42 | %% stem(մնա՜ս) = մնաս
43 | %% > որտեղ
44 | %% analyze(որտեղ) =  st:որտեղ
45 | %% stem(որտեղ) = որտեղ
46 | %% > որտե՞ղ
47 | %% analyze(որտե՞ղ) =  st:որտեղ
48 | %% stem(որտե՞ղ) = որտեղ
49 | 


--------------------------------------------------------------------------------
/zspell/test-suite/h-ignore-utf.test:
--------------------------------------------------------------------------------
 1 | %% SPDX-License-Identifier: MPL-1.1
 2 | %% https://github.com/hunspell/hunspell/blob/fbf0ce7e4737084fe84e733b031634f4a2c7559f/tests/ignoreutf.aff
 3 | 
 4 | ==== afx ====
 5 | # Arabic test for feature ignoring diacritics
 6 | SET UTF-8
 7 | # Arabic diacritics (harakat):
 8 | # sukun, shadda, kasra, damma, fatha, kasratan, dammantan, fathatan (left to right)
 9 | IGNORE ًٌٍَُِّْ
10 | WORDCHARS ًٌٍَُِّْ
11 | 
12 | ==== dic ====
13 | 9
14 | طِير
15 | فَتحة
16 | ضُمة
17 | كِسرة
18 | فتحًتان
19 | ضمتانٌ
20 | كسرتاٍن
21 | شدّة
22 | سكوْن
23 | 
24 | %% FIXME:ignore
25 | %% ==== valid ====
26 | %% طير
27 | %% فتحة
28 | %% ضمة
29 | %% كسرة
30 | %% فتحتان
31 | %% ضمتان
32 | %% كسرتان
33 | %% شدة
34 | %% سكون
35 | 


--------------------------------------------------------------------------------
/zspell/test-suite/h-keepcase.test:
--------------------------------------------------------------------------------
 1 | %% SPDX-License-Identifier: MPL-1.1
 2 | %% https://github.com/hunspell/hunspell/blob/fbf0ce7e4737084fe84e733b031634f4a2c7559f/tests/keepcase.aff
 3 | 
 4 | ==== afx ====
 5 | # keep case in signed words
 6 | KEEPCASE A
 7 | WORDCHARS .
 8 | 
 9 | ==== dic ====
10 | 4
11 | foo/A
12 | Bar/A
13 | baz./A
14 | Quux./A
15 | 
16 | ==== valid ====
17 | foo
18 | Bar
19 | %% FIXME:unknown
20 | %% baz.
21 | %% Quux.
22 | 
23 | ==== invalid ====
24 | %% FIXME:keepcase
25 | %% Foo
26 | %% FOO
27 | BAR
28 | bar
29 | Baz.
30 | BAZ.
31 | quux.
32 | QUUX.
33 | 
34 | %% FIXME:suggestions
35 | %% ==== suggest ====
36 | %% Foo > foo
37 | %% FOO > foo
38 | %% BAR > Bar
39 | %% bar > Bar, baz.
40 | %% Baz. > baz.
41 | %% BAZ. > baz.
42 | %% quux. > Quux.
43 | %% QUUX. > Quux.
44 | 


--------------------------------------------------------------------------------
/zspell/test-suite/h-korean.test:
--------------------------------------------------------------------------------
 1 | %% SPDX-License-Identifier: MPL-1.1
 2 | %% https://github.com/hunspell/hunspell/blob/fbf0ce7e4737084fe84e733b031634f4a2c7559f/tests/korean.aff
 3 | 
 4 | ==== afx ====
 5 | SET UTF-8
 6 | 
 7 | ==== dic ====
 8 | 3
 9 | 들어오세요
10 | 안녕하세요
11 | 김수한무거북이와두루미삼천갑자동방삭치치카포사리사리세ᅡ워리워리세브리캉무드셀ᅡ구름위허ᅵ케ᅵᆫᅦ담벼락서생원에ᄀ양
12 | 
13 | ==== valid ====
14 | 들어오세요
15 | 안녕하세요
16 | 
17 | ==== invalid ====
18 | 들어오세
19 | 


--------------------------------------------------------------------------------
/zspell/test-suite/h-limit-multiple-compounding.test:
--------------------------------------------------------------------------------
 1 | %% SPDX-License-Identifier: MPL-1.1
 2 | %% https://github.com/hunspell/hunspell/blob/fbf0ce7e4737084fe84e733b031634f4a2c7559f/tests/limit-multiple-compounding.aff
 3 | 
 4 | ==== afx ====
 5 | # "foo+bar" accepted, but not "foo+bar+baz"
 6 | # because 3-or-more-word compounds got a typo check, i.e.
 7 | # "foobarbaz" is rejected, because it is a typo of the dictionary word "goobarbaz"
 8 | # (but not "foobar" as typo of the dictionary word "goobar")
 9 | TRY esianrtolcdugmphbyfvkwz'
10 | COMPOUNDFLAG x
11 | 
12 | ==== dic ====
13 | 3
14 | foo/x
15 | bar/x
16 | baz/x
17 | goobar
18 | goobarbaz
19 | 
20 | %% FIXME:compound
21 | %% ==== valid ====
22 | %% foobar
23 | %% foobaz
24 | %% barfoo
25 | %% bazfoo
26 | %% barbaz
27 | %% bazbar
28 | %% foobazbar
29 | %% barfoobaz
30 | %% bazfoobar
31 | %% barbazfoo
32 | %% bazbarfoo
33 | 
34 | ==== invalid ====
35 | foobarbaz
36 | 


--------------------------------------------------------------------------------
/zspell/test-suite/h-map-utf.test:
--------------------------------------------------------------------------------
 1 | %% SPDX-License-Identifier: MPL-1.1
 2 | %% https://github.com/hunspell/hunspell/blob/fbf0ce7e4737084fe84e733b031634f4a2c7559f/tests/maputf.aff
 3 | 
 4 | ==== afx ====
 5 | # With MAP suggestion, Hunspell can add missing accents to a word.
 6 | 
 7 | SET UTF-8
 8 | 
 9 | # switch off ngram suggestion for testing
10 | MAXNGRAMSUGS 0
11 | 
12 | MAP 3
13 | MAP uúü
14 | MAP öóo
15 | MAP ß(ss)
16 | 
17 | ==== dic ====
18 | 3
19 | Frühstück
20 | tükörfúró
21 | groß
22 | 
23 | ==== valid ====
24 | 
25 | ==== invalid ====
26 | Fruhstuck
27 | tukorfuro
28 | gross
29 | 
30 | %% FIXME:suggestions
31 | %% ==== suggest ====
32 | %% Fruhstuck > Frühstück
33 | %% tukorfuro > tükörfúró
34 | %% gross > groß
35 | 


--------------------------------------------------------------------------------
/zspell/test-suite/h-map.test:
--------------------------------------------------------------------------------
1 | %% SPDX-License-Identifier: MPL-1.1
2 | %% https://github.com/hunspell/hunspell/blob/fbf0ce7e4737084fe84e733b031634f4a2c7559f/tests/map.aff
3 | 
4 | %% skipped because we really have no reason to not support utf8
5 | 


--------------------------------------------------------------------------------
/zspell/test-suite/h-morph.test:
--------------------------------------------------------------------------------
  1 | %% SPDX-License-Identifier: MPL-1.1
  2 | %% https://github.com/hunspell/hunspell/blob/fbf0ce7e4737084fe84e733b031634f4a2c7559f/tests/morph.aff
  3 | 
  4 | ==== afx ====
  5 | # example for morphological analysis, stemming and generation
  6 | PFX P Y 1
  7 | PFX P   0 un . dp:pfx_un sp:un
  8 | 
  9 | SFX S Y 1
 10 | SFX S   0 s . is:plur
 11 | 
 12 | SFX Q Y 1
 13 | SFX Q   0 s . is:sg_3
 14 | 
 15 | SFX R Y 1
 16 | SFX R   0 able/PS . ds:der_able
 17 | 
 18 | ==== dic ====
 19 | 9
 20 | drink/S	po:noun
 21 | drink/RQ	po:verb	al:drank	al:drunk	ts:present
 22 | drank	po:verb	st:drink	is:past_1
 23 | drunk	po:verb	st:drink	is:past_2
 24 | eat/RQ	po:verb	al:ate	al:eaten	ts:present
 25 | ate	po:verb	st:eat	is:past_1
 26 | eaten	po:verb	st:eat	is:past_2
 27 | phenomenon	po:noun	al:phenomena
 28 | phenomena	po:noun st:phenomenon	is:plur
 29 | 
 30 | ==== valid ====
 31 | %% FIXME:duplicates should these double words be tested as one or two?
 32 | drink
 33 | drinks
 34 | %% drinkable
 35 | %% drinkables
 36 | %% undrinkable
 37 | %% undrinkables
 38 | drank
 39 | drunk
 40 | phenomenon
 41 | phenomena
 42 | drink eat
 43 | drink eats
 44 | drink ate
 45 | drink eaten
 46 | %% drink eatable
 47 | %% drink eatables
 48 | drink phenomena
 49 | drinks eat
 50 | drinks eats
 51 | drinks ate
 52 | drinks eaten
 53 | %% drinks eatable
 54 | %% drinks eatables
 55 | drinks phenomena
 56 | %% undrinkable phenomena
 57 | phenomenon drinks
 58 | 
 59 | ==== stem ====
 60 | %% FIXME:stemming
 61 | drink > drink
 62 | drinks > drink
 63 | %% drinkable > drinkable
 64 | %% drinkables > drinkable
 65 | %% undrinkable > undrinkable
 66 | %% undrinkables > undrinkable
 67 | drank > drink
 68 | drunk > drink
 69 | phenomenon > phenomenon
 70 | phenomena > phenomenon
 71 | 
 72 | %% ==== morph ====
 73 | %% FIXME:morph. We should probably be storing these duplicates as separate meta entries (vec in our map)
 74 | %% drink > st:drink po:noun
 75 | %% drink > st:drink po:verb al:drank al:drunk ts:present
 76 | %% drinks > st:drink po:verb al:drank al:drunk ts:present is:sg_3
 77 | %% drinks > st:drink po:noun is:plur
 78 | %% drinkable > st:drink po:verb al:drank al:drunk ts:present ds:der_able
 79 | %% drinkables > st:drink po:verb al:drank al:drunk ts:present ds:der_able is:plur
 80 | %% undrinkable > dp:pfx_un sp:un st:drink po:verb al:drank al:drunk ts:present ds:der_able
 81 | %% undrinkables > dp:pfx_un sp:un st:drink po:verb al:drank al:drunk ts:present ds:der_able is:plur
 82 | %% drank > po:verb st:drink is:past_1
 83 | %% drunk > po:verb st:drink is:past_2
 84 | %% phenomenon > st:phenomenon po:noun al:phenomena
 85 | %% phenomena > po:noun st:phenomenon is:plur
 86 | 
 87 | %% ==== gen ====
 88 | %% generate(drink, eat) = drink
 89 | %% generate(drink, eats) = drinks
 90 | %% generate(drink, ate) = drank
 91 | %% generate(drink, eaten) = drunk
 92 | %% generate(drink, eatable) = drinkable
 93 | %% generate(drink, eatables) = drinkables
 94 | %% generate(drink, phenomena) = drinks
 95 | %% generate(drinks, eat) = drink
 96 | %% generate(drinks, eats) = drinks
 97 | %% generate(drinks, ate) = drank
 98 | %% generate(drinks, eaten) = drunk
 99 | %% generate(drinks, eatable) = drinkable
100 | %% generate(drinks, eatables) = drinkables
101 | %% generate(drinks, phenomena) = drinks
102 | %% generate(undrinkable, phenomena) = undrinkables
103 | %% generate(phenomenon, drinks) = phenomena
104 | 


--------------------------------------------------------------------------------
/zspell/test-suite/h-needaffix.test:
--------------------------------------------------------------------------------
 1 | %% SPDX-License-Identifier: MPL-1.1
 2 | %% https://github.com/hunspell/hunspell/blob/fbf0ce7e4737084fe84e733b031634f4a2c7559f/tests/needaffix.aff
 3 | 
 4 | ==== afx ====
 5 | NEEDAFFIX X
 6 | COMPOUNDFLAG Y
 7 | 
 8 | SFX A Y 1
 9 | SFX A 0 s/Y .
10 | 
11 | ==== dic ====
12 | 2
13 | foo/YXA
14 | bar/Y
15 | 
16 | ==== valid ====
17 | bar
18 | %% FIXME:compound
19 | %% foos
20 | %% barfoos
21 | 
22 | ==== invalid ====
23 | foo
24 | 


--------------------------------------------------------------------------------
/zspell/test-suite/h-needaffix2.test:
--------------------------------------------------------------------------------
 1 | %% SPDX-License-Identifier: MPL-1.1
 2 | %% https://github.com/hunspell/hunspell/blob/fbf0ce7e4737084fe84e733b031634f4a2c7559f/tests/needaffix2.aff
 3 | 
 4 | ==== afx ====
 5 | NEEDAFFIX X
 6 | COMPOUNDFLAG Y
 7 | 
 8 | ==== dic ====
 9 | 4
10 | foo	st:foo id:1
11 | foo/YX	st:foo id:2
12 | foo/Y	st:foo id:3
13 | bar/Y
14 | 
15 | ==== valid ====
16 | foo
17 | bar
18 | 
19 | %% FIXME:compound
20 | %% foobar
21 | %% barfoo
22 | 
23 | ==== stem ====
24 | foo > foo
25 | bar > bar
26 | 
27 | %% FIXME:compound
28 | %% foobar > foo
29 | %% barfoo > barfoo
30 | 
31 | %% ==== morph ====
32 | %% foo > st:foo id:1
33 | %% foo > st:foo id:3
34 | %% FIXME:morph:show-stem: seems like morph should always show a `st` entry?
35 | %% bar > st:bar
36 | 
37 | %% FIXME:compound
38 | %% foobar > pa:foo st:foo id:3 pa:bar
39 | %% barfoo > pa:bar st:bar pa:foo st:foo id:3
40 | 


--------------------------------------------------------------------------------
/zspell/test-suite/h-needaffix3.test:
--------------------------------------------------------------------------------
 1 | %% SPDX-License-Identifier: MPL-1.1
 2 | %% https://github.com/hunspell/hunspell/blob/fbf0ce7e4737084fe84e733b031634f4a2c7559f/tests/needaffix3.aff
 3 | 
 4 | ==== afx ====
 5 | # neeadaffix on affixes
 6 | NEEDAFFIX X
 7 | 
 8 | SFX A Y 1
 9 | SFX A 0 s/XB .
10 | 
11 | SFX B Y 1
12 | SFX B 0 baz .
13 | 
14 | ==== dic ====
15 | 2
16 | foo/A
17 | 
18 | ==== valid ====
19 | foo
20 | %% FIXME:unknown
21 | %% foosbaz
22 | 
23 | ==== invalid ====
24 | foos
25 | 


--------------------------------------------------------------------------------
/zspell/test-suite/h-needaffix4.test:
--------------------------------------------------------------------------------
 1 | %% SPDX-License-Identifier: MPL-1.1
 2 | %% https://github.com/hunspell/hunspell/blob/fbf0ce7e4737084fe84e733b031634f4a2c7559f/tests/needaffix4.aff
 3 | 
 4 | ==== afx ====
 5 | NEEDAFFIX X
 6 | COMPOUNDFLAG Y
 7 | 
 8 | ==== dic ====
 9 | 4
10 | foo/X	[1]
11 | foo/Y	[2]
12 | foo/YX	[3]
13 | bar/Y	[4]
14 | 
15 | ==== valid ====
16 | foo
17 | bar
18 | %% FIXME:compound
19 | %% foobar
20 | %% barfoo
21 | 


--------------------------------------------------------------------------------
/zspell/test-suite/h-needaffix5.test:
--------------------------------------------------------------------------------
 1 | %% SPDX-License-Identifier: MPL-1.1
 2 | %% https://github.com/hunspell/hunspell/blob/fbf0ce7e4737084fe84e733b031634f4a2c7559f/tests/needaffix5.aff
 3 | 
 4 | ==== afx ====
 5 | # on affixes
 6 | NEEDAFFIX X
 7 | 
 8 | SFX A Y 2
 9 | SFX A 0 suf/B .
10 | SFX A 0 pseudosuf/XB .
11 | 
12 | SFX B Y 1
13 | SFX B 0 bar .
14 | 
15 | PFX C Y 2
16 | PFX C 0 pre .
17 | PFX C 0 pseudopre/X .
18 | 
19 | ==== dic ====
20 | 1
21 | foo/AC
22 | 
23 | ==== valid ====
24 | foo
25 | prefoo
26 | %% FIXME:unknown
27 | %% foosuf
28 | %% prefoosuf
29 | %% foosufbar
30 | %% prefoosufbar
31 | %% pseudoprefoosuf
32 | %% pseudoprefoosufbar
33 | %% pseudoprefoopseudosufbar
34 | %% prefoopseudosuf
35 | %% prefoopseudosufbar
36 | 
37 | ==== invalid ====
38 | pseudoprefoo
39 | foopseudosuf
40 | pseudoprefoopseudosuf
41 | 


--------------------------------------------------------------------------------
/zspell/test-suite/h-nepali.test:
--------------------------------------------------------------------------------
 1 | ==== afx ====
 2 | SET UTF-8
 3 | IGNORE ￰
 4 | WORDCHARS  ःािीॉॊोौॎॏॕॖॗ‌‍
 5 | 
 6 | 
 7 | ICONV 5
 8 | ICONV ‌_ ‌
 9 | ICONV र्‌य र्‌य
10 | ICONV र्‌व र्‌व
11 | ICONV ‌ ￰
12 | ICONV ‍_ ￰
13 | 
14 | 
15 | ==== dic ====
16 | 4
17 | अलम्
18 | क्यार
19 | न्न
20 | र्‌य
21 | 
22 | ==== valid ====
23 | न्न
24 | %% FIXME:unknown
25 | %% न्‌न
26 | %% अलम्‍
27 | र्‌य
28 | 
29 | ==== invalid ====
30 | र्य
31 | क्‍यार
32 | अलम्‌
33 | 


--------------------------------------------------------------------------------
/zspell/test-suite/h-nosuggest.test:
--------------------------------------------------------------------------------
 1 | %% SPDX-License-Identifier: MPL-1.1
 2 | %% https://github.com/hunspell/hunspell/blob/fbf0ce7e4737084fe84e733b031634f4a2c7559f/tests/nosuggest.aff
 3 | 
 4 | ==== afx ====
 5 | # don't suggest word with NOSUGGEST flag (for example vulgar or obscene words)
 6 | # See OpenOffice.org Issue #55498
 7 | # (nosuggest.sug is an empty file)
 8 | NOSUGGEST A
 9 | COMPOUNDFLAG B
10 | 
11 | ==== dic ====
12 | 1
13 | foo/AB
14 | bar/B
15 | 
16 | ==== valid ====
17 | foo
18 | %% FIXME:compound
19 | %% foobar
20 | %% barfoo
21 | 
22 | ==== invalid ====
23 | foox
24 | foobarx
25 | barfoox
26 | 
27 | %% FIXME:suggestions
28 | %% ==== suggest ====
29 | 


--------------------------------------------------------------------------------
/zspell/test-suite/h-oconv.test:
--------------------------------------------------------------------------------
 1 | %% SPDX-License-Identifier: MPL-1.1
 2 | %% https://github.com/hunspell/hunspell/blob/fbf0ce7e4737084fe84e733b031634f4a2c7559f/tests/oconv.aff
 3 | 
 4 | ==== afx ====
 5 | # output conversion
 6 | SET UTF-8
 7 | 
 8 | # Testing also whitespace and comments.
 9 | OCONV 7 # space, space
10 | OCONV	a A # tab, space, space
11 | OCONV	á	Á # tab, tab, space
12 | OCONV	b	B	# tab, tab, tab
13 | OCONV  c  C		# 2xspace, 2xspace, 2xtab
14 | OCONV	 d 	D # tab+space, space+tab, space
15 | OCONV e E #
16 | OCONV é É
17 | # Only comment. Note that line above ends with space+tab.
18 | 
19 |  # space
20 |   # 2xspace
21 | 	# tab
22 | 		# 2xtab
23 |  	# space+tab
24 | 	 # tab+space
25 | 
26 | ==== dic ====
27 | 3
28 | bébé
29 | dádá
30 | aábcdeé
31 | 
32 | ==== valid ====
33 | bébé
34 | dádá
35 | 
36 | ==== invalid ====
37 | béb
38 | dád
39 | aábcde
40 | 
41 | %% FIXME:suggestions
42 | %% ==== suggest ====
43 | %% béb > BÉBÉ
44 | %% dád > DÁDÁ
45 | %% aábcde > AÁBCDEÉ
46 | 


--------------------------------------------------------------------------------
/zspell/test-suite/h-slash.test:
--------------------------------------------------------------------------------
 1 | %% SPDX-License-Identifier: MPL-1.1
 2 | %% https://github.com/hunspell/hunspell/blob/fbf0ce7e4737084fe84e733b031634f4a2c7559f/tests/slash.aff
 3 | 
 4 | ==== afx ====
 5 | # slashes in words (\/)
 6 | 
 7 | # (only for tokenization)
 8 | WORDCHARS /:
 9 | 
10 | ==== dic ====
11 | 4
12 | /
13 | 1\/2
14 | http:\/\/
15 | \/usr\/share\/myspell\/
16 | 
17 | ==== valid ====
18 | /
19 | %% FIXME:dict-parser
20 | %% 1/2
21 | %% http://
22 | %% /usr/share/myspell/
23 | 


--------------------------------------------------------------------------------
/zspell/test-suite/h-timelimit.test:
--------------------------------------------------------------------------------
 1 | %% SPDX-License-Identifier: MPL-1.1
 2 | %% https://github.com/hunspell/hunspell/blob/fbf0ce7e4737084fe84e733b031634f4a2c7559f/tests/timelimit.aff
 3 | 
 4 | ==== afx ====
 5 | WORDCHARS 01
 6 | COMPOUNDMIN 1
 7 | COMPOUNDFLAG Y
 8 | 
 9 | ==== dic ====
10 | 4
11 | 0/Y
12 | 00/Y
13 | 000/Y
14 | 1/Y
15 | 
16 | %% FIXME:compound
17 | %% ==== valid ====
18 | %% 1000000000000000000000
19 | 


--------------------------------------------------------------------------------
/zspell/test-suite/h-utf8.test:
--------------------------------------------------------------------------------
 1 | %% SPDX-License-Identifier: MPL-1.1
 2 | %% https://github.com/hunspell/hunspell/blob/fbf0ce7e4737084fe84e733b031634f4a2c7559f/tests/utf8.aff
 3 | 
 4 | ==== afx ====
 5 | SET UTF-8
 6 | 
 7 | SFX A Y 7
 8 | SFX A 0 őő .
 9 | SFX A 0 ő o
10 | SFX A 0 ő ó
11 | SFX A ó ő ó
12 | SFX A ó őoo ó
13 | SFX A o őo o
14 | SFX A 0 ó [abcdó]
15 | 
16 | ==== dic ====
17 | 2
18 | foo/A
19 | foó/A
20 | 
21 | ==== valid ====
22 | foo
23 | foó
24 | fooőő
25 | fooő
26 | foóő
27 | foő
28 | foőo
29 | foőoo
30 | foóó
31 | 


--------------------------------------------------------------------------------
/zspell/test-suite/h-utfcoumpound.test:
--------------------------------------------------------------------------------
 1 | %% SPDX-License-Identifier: MPL-1.1
 2 | %% https://github.com/hunspell/hunspell/blob/fbf0ce7e4737084fe84e733b031634f4a2c7559f/tests/utfcompound.aff
 3 | 
 4 | %% FIXME:compound
 5 | %% ==== afx ====
 6 | %% SET UTF-8
 7 | %% COMPOUNDMIN 3
 8 | %% COMPOUNDFLAG A
 9 | %%
10 | %% ==== dic ====
11 | %% 8
12 | %% foo/A
13 | %% bar/A
14 | %% fóó/A
15 | %% áár/A
16 | %% xy/A
17 | %% yz/A
18 | %% éé/A
19 | %% őő/A
20 | %%
21 | %% ==== valid ====
22 | %% foobar
23 | %% barfoo
24 | %% foobarfoo
25 | %% fóóáár
26 | %% áárfóó
27 | %%
28 | %% ==== invalid ====
29 | %% xyyz
30 | %% fooxy
31 | %% xyfoo
32 | %% fooxybar
33 | %% ééőő
34 | %% fóóéé
35 | %% őőáár
36 | 


--------------------------------------------------------------------------------
/zspell/test-suite/h-zeroaffix.test:
--------------------------------------------------------------------------------
 1 | %% SPDX-License-Identifier: MPL-1.1
 2 | %% https://github.com/hunspell/hunspell/blob/fbf0ce7e4737084fe84e733b031634f4a2c7559f/tests/zeroaffix.aff
 3 | 
 4 | ==== afx ====
 5 | PSEUDOROOT X
 6 | COMPOUNDFLAG Y
 7 | SFX A Y 1
 8 | SFX A 0 0 . >
 9 | SFX B Y 1
10 | SFX B 0 0 . <ZERO>>
11 | SFX C Y 2
12 | SFX C 0 0/XAB . <ZERODERIV>
13 | SFX C 0 baz/XAB . <DERIV>
14 | 
15 | ==== dic ====
16 | 2
17 | foo/XA	<FOO
18 | bar/XABC	<BAR
19 | 
20 | ==== valid ====
21 | bar
22 | foo
23 | 
24 | %% FIXME: compount
25 | %% barbaz
26 | 
27 | ==== stem ====
28 | foo > foo
29 | bar > bar
30 | 
31 | %% FIXME: compount
32 | %% barbaz > bar
33 | 
34 | %% ==== morph ====
35 | %% > bar
36 | %% analyze(bar) =  st:bar <BAR <ZERO>>
37 | %% analyze(bar) =  st:bar <BAR >
38 | %% analyze(bar) =  st:bar <BAR <ZERODERIV> <ZERO>>
39 | %% analyze(bar) =  st:bar <BAR <ZERODERIV> >
40 | %% stem(bar) = bar
41 | %% > foo
42 | %% analyze(foo) =  st:foo <FOO >
43 | %% stem(foo) = foo
44 | %% > barbaz
45 | %% analyze(barbaz) =  st:bar <BAR <DERIV> <ZERO>>
46 | %% analyze(barbaz) =  st:bar <BAR <DERIV> >
47 | %% stem(barbaz) = bar
48 | 


--------------------------------------------------------------------------------
/zspell/test-suite/i071-number-affixes.test:
--------------------------------------------------------------------------------
 1 | %% Based on https://github.com/pluots/zspell/issues/71
 2 | 
 3 | ==== afx ====
 4 | FLAG num
 5 | SFX 10 Y 3
 6 | SFX 10 0 0 . is:tens
 7 | SFX 10 0 00 . is:hundreds
 8 | SFX 10 0 000 . is:thousands
 9 | 
10 | ==== dic ====
11 | 10
12 | 0 po:number
13 | 1/10 po:number
14 | 2/10 po:number
15 | 3/10 po:number
16 | 4/10 po:number
17 | 5/10 po:number
18 | 6/10 po:number
19 | 7/10 po:number
20 | 8/10 po:number
21 | 9/10 po:number
22 | 
23 | ==== valid ====
24 | 1
25 | 10
26 | 100
27 | 1000
28 | 9
29 | 90
30 | 900
31 | 
32 | ==== morph ====
33 | 9 > po:number
34 | 10 > po:number is:tens
35 | 600 > po:number is:hundreds
36 | 


--------------------------------------------------------------------------------
/zspell/test-suite/i093-separate-dict-afx-flags.test:
--------------------------------------------------------------------------------
 1 | %% SPDX-License-Identifier: GPL-2.0-or-later
 2 | %% Flags that apply to dictionary items and flags that apply to other things in
 3 | %% the affix file have separate namespaces.
 4 | %%
 5 | %% Examples come from the German dictionary
 6 | %% <https://github.com/wooorm/dictionaries/blob/836d7c2032167880e639a1fe522748733e528c2d/dictionaries/de/index.aff>
 7 | 
 8 | 
 9 | ==== afx ====
10 | # Afffix using `f`
11 | SFX f Y 4
12 | SFX f   ph      f          ph
13 | SFX f   ph      fen        ph
14 | SFX f   phie    fie        phie
15 | SFX f   phie    fien       phie
16 | 
17 | # Meta flag using `f`
18 | # Affixes  signed  with CIRCUMFIX flag may be on a word when this word also has
19 | # a prefix with CIRCUMFIX flag and vice versa.
20 | # for decapitalizing nouns with fogemorphemes
21 | CIRCUMFIX f
22 | 
23 | # Affix that adds the flag `f`
24 | # for Uppercased end-words to prepend - and lowercase: (Tier/EPSm) (EX: Bettbezüge und *-laken*)
25 | # AND
26 | # for lowercased end-words to prepend - and re-uppercase : (tier/EPSozm) (EX: Arbeits*-Tier*)
27 | PFX m Y 1
28 | PFX m f -F/co f
29 | 
30 | %% FIXME(added flags): affixes need to be able to add flags
31 | 


--------------------------------------------------------------------------------
/zspell/test-util/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "test-util"
 3 | version = "0.5.5"
 4 | edition = "2021"
 5 | publish = false
 6 | 
 7 | [dependencies]
 8 | zspell = { path = "../", features = ["zspell-unstable"] }
 9 | regex = "1.10"
10 | pretty_assertions = "1.4"
11 | 


--------------------------------------------------------------------------------
/zspell/tests/files/tortoise_hare_misspelled.txt:
--------------------------------------------------------------------------------
 1 | A Hare was mking fun of the Tortoise one day for being so slow.
 2 | 
 3 | Do you ever get anywhere? he asked with a mocking laugh.
 4 | 
 5 | Yes, replied the Tortoise, and I get there sooner than you think. I'll
 6 | run you a race and prove it.
 7 | 
 8 | The Hare was much amused at the iea of running a race with the Tortise,
 9 | but for the fun of the thing he agreed. So the Fox, who had consented to
10 | act as judge, maarked the distance and started the runners off.
11 | 
12 | The Hare was soon far out of sight, and to make the Tortoise feel very
13 | deeply how ridiculous it was for him to try a race with a Hare, he lay
14 | down beside the course to take a nap until the Tortoise should catch up.
15 | 
16 | The Tortoise meanwhile kept going sloly but steadily, and, after a time,
17 | passed the place where the Hare was sleeping. But the Hare slept on very
18 | peacefully; and when at last he did wake up, the Tortoise was near the goal.
19 | The Hare now ran his swiftest, but he could not overtaake the Tortoise
20 | in time.
21 | 


--------------------------------------------------------------------------------
/zspell/tests/files/w1_eng_short.aff:
--------------------------------------------------------------------------------
 1 | # This is a shortened en_US affix file
 2 | SET UTF-8
 3 | TRY esianrtolcdugmphbyfvkwzESIANRTOLCDUGMPHBYFVKWZ'
 4 | ICONV 1
 5 | ICONV ’ '
 6 | NOSUGGEST !
 7 | 
 8 | # ordinal numbers
 9 | COMPOUNDMIN 1
10 | # only in compounds: 1th, 2th, 3th
11 | ONLYINCOMPOUND c
12 | # compound rules:
13 | # 1. [0-9]*1[0-9]th (10th, 11th, 12th, 56714th, etc.)
14 | # 2. [0-9]*[02-9](1st|2nd|3rd|[4-9]th) (21st, 22nd, 123rd, 1234th, etc.)
15 | COMPOUNDRULE 2
16 | COMPOUNDRULE n*1t
17 | COMPOUNDRULE n*mp
18 | WORDCHARS 0123456789
19 | 
20 | PFX A Y 1
21 | PFX A   0     re         .
22 | 
23 | SFX V N 2
24 | SFX V   e     ive        e
25 | SFX V   0     ive        [^e]
26 | 
27 | SFX N Y 3
28 | SFX N   e     ion        e
29 | SFX N   y     ication    y
30 | SFX N   0     en         [^ey]
31 | 
32 | SFX G Y 2
33 | SFX G   e     ing        e
34 | SFX G   0     ing        [^e]
35 | 
36 | SFX D Y 4
37 | SFX D   0     d          e
38 | SFX D   y     ied        [^aeiou]y
39 | SFX D   0     ed         [^ey]
40 | SFX D   0     ed         [aeiou]y
41 | 
42 | SFX T N 4
43 | SFX T   0     st         e
44 | SFX T   y     iest       [^aeiou]y
45 | SFX T   0     est        [aeiou]y
46 | SFX T   0     est        [^ey]
47 | 
48 | SFX R Y 4
49 | SFX R   0     r          e
50 | SFX R   y     ier        [^aeiou]y
51 | SFX R   0     er         [aeiou]y
52 | SFX R   0     er         [^ey]
53 | 
54 | SFX S Y 4
55 | SFX S   y     ies        [^aeiou]y
56 | SFX S   0     s          [aeiou]y
57 | SFX S   0     es         [sxzh]
58 | SFX S   0     s          [^sxzhy]
59 | 
60 | SFX M Y 1
61 | SFX M   0     's         .
62 | 
63 | REP 12
64 | REP a ei
65 | REP ei a
66 | REP a ey
67 | REP ey a
68 | REP ai ie
69 | REP ie ai
70 | REP alot a_lot
71 | REP are air
72 | REP are ear
73 | REP are eir
74 | REP air are
75 | REP air ere
76 | 


--------------------------------------------------------------------------------
/zspell/tests/files/w1_eng_short.dic:
--------------------------------------------------------------------------------
 1 | 4
 2 | banana/SM
 3 | pine/AGDS
 4 | pillow/GMDS
 5 | reptile/SM
 6 | rust/MDGS
 7 | okay
 8 | I
 9 | misspelled
10 | alright
11 | something
12 | well
13 | this
14 | time
15 | 


--------------------------------------------------------------------------------
/zspell/tests/suite.rs:
--------------------------------------------------------------------------------
1 | // include tests generated by `build.rs`, one test per file in tests/suite
2 | include!(concat!(env!("OUT_DIR"), "/auto_suite.rs"));
3 | 


--------------------------------------------------------------------------------