├── .dockerignore ├── .github ├── FUNDING.yml └── workflows │ ├── build.yml │ └── test.yml ├── .gitignore ├── CHANGELOG.md ├── CODE_OF_CONDUCT.md ├── CONFIGURATION.md ├── CONTRIBUTING.md ├── Cargo.lock ├── Cargo.toml ├── Dockerfile ├── INNER_WORKINGS.md ├── LICENSE.md ├── PACKAGING.md ├── PROTOCOL.md ├── README.md ├── config.cfg ├── data └── store │ ├── fst │ └── .gitkeep │ └── kv │ └── .gitkeep ├── debian ├── changelog ├── compat ├── control ├── copyright ├── rules ├── sonic.install ├── sonic.postinst ├── sonic.service └── source │ └── format ├── scripts ├── build_packages.sh ├── release_binaries.sh └── sign_binaries.sh ├── src ├── channel │ ├── command.rs │ ├── format.rs │ ├── handle.rs │ ├── listen.rs │ ├── macros.rs │ ├── message.rs │ ├── mod.rs │ ├── mode.rs │ └── statistics.rs ├── config │ ├── defaults.rs │ ├── env_var.rs │ ├── logger.rs │ ├── mod.rs │ ├── options.rs │ └── reader.rs ├── executor │ ├── count.rs │ ├── flushb.rs │ ├── flushc.rs │ ├── flusho.rs │ ├── list.rs │ ├── macros.rs │ ├── mod.rs │ ├── pop.rs │ ├── push.rs │ ├── search.rs │ └── suggest.rs ├── lexer │ ├── mod.rs │ ├── ranges.rs │ ├── stopwords.rs │ └── token.rs ├── main.rs ├── query │ ├── actions.rs │ ├── builder.rs │ ├── mod.rs │ └── types.rs ├── stopwords │ ├── afr.rs │ ├── aka.rs │ ├── amh.rs │ ├── ara.rs │ ├── aze.rs │ ├── bel.rs │ ├── ben.rs │ ├── bul.rs │ ├── cat.rs │ ├── ces.rs │ ├── cmn.rs │ ├── dan.rs │ ├── deu.rs │ ├── ell.rs │ ├── eng.rs │ ├── epo.rs │ ├── est.rs │ ├── fin.rs │ ├── fra.rs │ ├── guj.rs │ ├── heb.rs │ ├── hin.rs │ ├── hrv.rs │ ├── hun.rs │ ├── hye.rs │ ├── ind.rs │ ├── ita.rs │ ├── jav.rs │ ├── jpn.rs │ ├── kan.rs │ ├── kat.rs │ ├── khm.rs │ ├── kor.rs │ ├── lat.rs │ ├── lav.rs │ ├── lit.rs │ ├── mal.rs │ ├── mar.rs │ ├── mkd.rs │ ├── mod.rs │ ├── mya.rs │ ├── nep.rs │ ├── nld.rs │ ├── nob.rs │ ├── ori.rs │ ├── pan.rs │ ├── pes.rs │ ├── pol.rs │ ├── por.rs │ ├── ron.rs │ ├── rus.rs │ ├── sin.rs │ ├── slk.rs │ ├── slv.rs │ ├── sna.rs │ ├── spa.rs │ ├── srp.rs │ ├── swe.rs │ ├── tam.rs │ ├── tel.rs │ ├── tgl.rs │ ├── tha.rs │ ├── tuk.rs │ ├── tur.rs │ ├── ukr.rs │ ├── urd.rs │ ├── uzb.rs │ ├── vie.rs │ ├── yid.rs │ └── zul.rs ├── store │ ├── fst.rs │ ├── generic.rs │ ├── identifiers.rs │ ├── item.rs │ ├── keyer.rs │ ├── kv.rs │ ├── macros.rs │ ├── mod.rs │ └── operation.rs └── tasker │ ├── mod.rs │ ├── runtime.rs │ └── shutdown.rs └── tests └── integration ├── .gitignore ├── instance └── config.cfg ├── runner ├── package-lock.json ├── package.json └── runner.js ├── scenarios ├── insert.js └── ping.js └── scripts └── run.sh /.dockerignore: -------------------------------------------------------------------------------- 1 | tests/* 2 | target/* 3 | data/* 4 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: valeriansaliou 4 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | tags: 4 | - "v*.*.*" 5 | 6 | name: Build and Release 7 | 8 | jobs: 9 | build-releases: 10 | runs-on: ubuntu-22.04 11 | 12 | steps: 13 | - name: Checkout code 14 | uses: actions/checkout@v2 15 | 16 | - name: Cache build artifacts 17 | id: cache-cargo 18 | uses: actions/cache@v4 19 | with: 20 | path: | 21 | ~/.cargo/bin 22 | ~/.cargo/registry 23 | ~/.cargo/git 24 | target 25 | key: build-${{ runner.os }}-cargo-any 26 | 27 | - name: Install Rust toolchain 28 | uses: actions-rs/toolchain@v1 29 | with: 30 | toolchain: stable 31 | components: rustfmt 32 | override: true 33 | 34 | - name: Verify versions 35 | run: rustc --version && rustup --version && cargo --version 36 | 37 | - name: Get current tag 38 | id: current_tag 39 | uses: WyriHaximus/github-action-get-previous-tag@v1 40 | 41 | - name: Release package 42 | run: cargo publish --no-verify --token ${CRATES_TOKEN} 43 | env: 44 | CRATES_TOKEN: ${{ secrets.CRATES_TOKEN }} 45 | 46 | - name: Release binaries 47 | run: ./scripts/release_binaries.sh --version=${{ steps.current_tag.outputs.tag }} 48 | 49 | - name: Release new version 50 | uses: softprops/action-gh-release@v1 51 | with: 52 | tag_name: ${{ steps.current_tag.outputs.tag }} 53 | name: Sonic ${{ steps.current_tag.outputs.tag }} 54 | body: "⚠️ Changelog not yet provided." 55 | files: ./${{ steps.current_tag.outputs.tag }}-*.tar.gz 56 | env: 57 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 58 | 59 | build-packages: 60 | needs: build-releases 61 | runs-on: ubuntu-latest 62 | 63 | steps: 64 | - name: Checkout code 65 | uses: actions/checkout@v2 66 | 67 | - name: Build packages 68 | run: ./scripts/build_packages.sh 69 | 70 | - name: Push packages to Packagecloud 71 | uses: faucetsdn/action-packagecloud-upload-debian-packages@v1 72 | with: 73 | path: ./packages 74 | repo: ${{ secrets.PACKAGECLOUD_REPO }} 75 | token: ${{ secrets.PACKAGECLOUD_TOKEN }} 76 | 77 | build-docker: 78 | runs-on: ubuntu-latest 79 | 80 | steps: 81 | - name: Checkout code 82 | uses: actions/checkout@v4 83 | 84 | - name: Acquire Docker image metadata 85 | id: metadata 86 | uses: docker/metadata-action@v4 87 | with: 88 | images: valeriansaliou/sonic 89 | 90 | - name: Login to Docker Hub 91 | uses: docker/login-action@v3 92 | with: 93 | username: ${{ secrets.DOCKERHUB_USERNAME }} 94 | password: ${{ secrets.DOCKERHUB_TOKEN }} 95 | 96 | - name: Build and push Docker image 97 | uses: docker/build-push-action@v4 98 | id: build 99 | with: 100 | context: . 101 | tags: ${{ steps.metadata.outputs.tags }} 102 | labels: ${{ steps.metadata.outputs.labels }} 103 | push: true 104 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | on: [push, pull_request] 2 | 3 | name: Test and Build 4 | 5 | jobs: 6 | test: 7 | strategy: 8 | matrix: 9 | os: [ubuntu-latest] 10 | rust-toolchain: [stable] 11 | fail-fast: false 12 | 13 | runs-on: ${{ matrix.os }} 14 | 15 | steps: 16 | - name: Checkout code 17 | uses: actions/checkout@v2 18 | 19 | - name: Cache build artifacts 20 | id: cache-cargo 21 | uses: actions/cache@v4 22 | with: 23 | path: | 24 | ~/.cargo/registry 25 | ~/.cargo/git 26 | target 27 | key: test-${{ runner.os }}-cargo-${{ matrix.rust-toolchain }} 28 | 29 | - name: Cache integration artifacts 30 | id: cache-integration 31 | uses: actions/cache@v4 32 | with: 33 | path: | 34 | tests/integration/runner/node_modules 35 | key: test-${{ runner.os }}-integration-${{ matrix.rust-toolchain }} 36 | 37 | - name: Install Rust toolchain 38 | uses: actions-rs/toolchain@v1 39 | with: 40 | toolchain: ${{ matrix.rust-toolchain }} 41 | components: rustfmt 42 | override: true 43 | 44 | - name: Install NodeJS 45 | uses: actions/setup-node@v1 46 | 47 | - name: Verify versions 48 | run: rustc --version && rustup --version && cargo --version && node --version && npm --version 49 | 50 | - name: Build code 51 | run: cargo build 52 | 53 | - name: Test code 54 | run: cargo test 55 | 56 | - name: Check code style 57 | run: cargo fmt -- --check 58 | 59 | - name: Run integration tests 60 | run: tests/integration/scripts/run.sh 61 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target/* 2 | .DS_Store 3 | *~ 4 | *# 5 | .cargo 6 | 7 | data/store/fst/* 8 | data/store/kv/* 9 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies both within project spaces and in public spaces 49 | when an individual is representing the project or its community. Examples of 50 | representing a project or community include using an official project e-mail 51 | address, posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. Representation of a project may be 53 | further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at valerian@valeriansaliou.name. All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 72 | 73 | [homepage]: https://www.contributor-covenant.org 74 | 75 | For answers to common questions about this code of conduct, see 76 | https://www.contributor-covenant.org/faq 77 | -------------------------------------------------------------------------------- /CONFIGURATION.md: -------------------------------------------------------------------------------- 1 | Sonic Configuration 2 | =================== 3 | 4 | # File: config.cfg 5 | 6 | **All available configuration options are commented below, with allowed values:** 7 | 8 | **[server]** 9 | 10 | * `log_level` (type: _string_, allowed: `debug`, `info`, `warn`, `error`, default: `error`) — Verbosity of logging, set it to `error` in production 11 | 12 | **[channel]** 13 | 14 | * `inet` (type: _string_, allowed: IPv4 / IPv6 + port, default: `[::1]:1491`) — Host and TCP port Sonic Channel should listen on 15 | * `tcp_timeout` (type: _integer_, allowed: seconds, default: `300`) — Timeout of idle/dead client connections to Sonic Channel 16 | * `auth_password` (type: _string_, allowed: password values, default: none) — Authentication password required to connect to the channel (optional but recommended) 17 | 18 | **[channel.search]** 19 | 20 | * `query_limit_default` (type: _integer_, allowed: numbers, default: `10`) — Default search results limit for a query command (if the LIMIT command modifier is not used when issuing a QUERY command) 21 | * `query_limit_maximum` (type: _integer_, allowed: numbers, default: `100`) — Maximum search results limit for a query command (if the LIMIT command modifier is being used when issuing a QUERY command) 22 | * `query_alternates_try` (type: _integer_, allowed: numbers, default: `4`) — Number of alternate words that look like query word to try if there are not enough query results (if zero, no alternate will be tried; if too high there may be a noticeable performance penalty) 23 | * `suggest_limit_default` (type: _integer_, allowed: numbers, default: `5`) — Default suggested words limit for a suggest command (if the LIMIT command modifier is not used when issuing a SUGGEST command) 24 | * `suggest_limit_maximum` (type: _integer_, allowed: numbers, default: `20`) — Maximum suggested words limit for a suggest command (if the LIMIT command modifier is being used when issuing a SUGGEST command) 25 | * `list_limit_default` (type: _integer_, allowed: numbers, default: `100`) — Default listed words limit for a list command (if the LIMIT command modifier is not used when issuing a LIST command) 26 | * `list_limit_maximum` (type: _integer_, allowed: numbers, default: `500`) — Maximum listed words limit for a list command (if the LIMIT command modifier is being used when issuing a LIST command) 27 | 28 | **[store]** 29 | 30 | **[store.kv]** 31 | 32 | * `path` (type: _string_, allowed: UNIX path, default: `./data/store/kv/`) — Path to the Key-Value database store 33 | * `retain_word_objects` (type: _integer_, allowed: numbers, default: `1000`) — Maximum number of objects a given word in the index can be linked to (older objects are cleared using a sliding window) 34 | 35 | **[store.kv.pool]** 36 | 37 | * `inactive_after` (type: _integer_, allowed: seconds, default: `1800`) — Time after which a cached database is considered inactive and can be closed (if it is not used, ie. re-activated) 38 | 39 | **[store.kv.database]** 40 | 41 | * `flush_after` (type: _integer_, allowed: seconds, default: `900`) — Time after which pending database updates should be flushed from memory to disk (increase this delay if you encounter high-CPU usage issues when a flush task kicks-in; this value should be lower than `store.kv.pool.inactive_after`) 42 | * `compress` (type: _boolean_, allowed: `true`, `false`, default: `true`) — Whether to compress database or not (uses Zstandard) 43 | * `parallelism` (type: _integer_, allowed: numbers, default: `2`) — Limit on the number of compaction and flush threads that can run at the same time 44 | * `max_files` (type: _integer_, allowed: numbers, no default) — Maximum number of database files kept open at the same time per-database (if any; otherwise there are no limits) 45 | * `max_compactions` (type: _integer_, allowed: numbers, default: `1`) — Limit on the number of concurrent database compaction jobs 46 | * `max_flushes` (type: _integer_, allowed: numbers, default: `1`) — Limit on the number of concurrent database flush jobs 47 | * `write_buffer` (type: _integer_, allowed: numbers, default: `16384`) — Maximum size in KB of the database write buffer, after which data gets flushed to disk (ie. `16384` is `16MB`; the size should be a multiple of `1024`, eg. `128 * 1024 = 131072` for `128MB`) 48 | * `write_ahead_log` (type: _boolean_, allowed: `true`, `false`, default: `true`) — Whether to enable Write-Ahead Log or not (it avoids losing non-flushed data in case of server crash) 49 | 50 | **[store.fst]** 51 | 52 | * `path` (type: _string_, allowed: UNIX path, default: `./data/store/fst/`) — Path to the Finite-State Transducer database store 53 | 54 | **[store.fst.pool]** 55 | 56 | * `inactive_after` (type: _integer_, allowed: seconds, default: `300`) — Time after which a cached graph is considered inactive and can be closed (if it is not used, ie. re-activated) 57 | 58 | **[store.fst.graph]** 59 | 60 | * `consolidate_after` (type: _integer_, allowed: seconds, default: `180`) — Time after which a graph that has pending updates should be consolidated (increase this delay if you encounter high-CPU usage issues when a consolidation task kicks-in; this value should be lower than `store.fst.pool.inactive_after`) 61 | * `max_size` (type: _integer_, allowed: numbers, default: `2048`) — Maximum size in KB of the graph file on disk, after which further words are not inserted anymore (ie. `2048` is `2MB`; the size should be a multiple of `1024`, eg. `8 * 1024 = 8192` for `8MB`; use this limit to prevent heavy graphs to be consolidating forever; this limit is enforced in pair with `store.fst.graph.max_words`, whichever is reached first) 62 | * `max_words` (type: _integer_, allowed: numbers, default: `250000`) — Maximum number of words that can be held at the same time in the graph, after which further words are not inserted anymore (use this limit to prevent heavy graphs to be consolidating forever; this limit is enforced in pair with `store.fst.graph.max_size`, whichever is reached first) 63 | 64 | # Command-Line: Environment variables 65 | 66 | You are allowed to use environment variables in the configuration file. 67 | 68 | **You can provide them as follows:** 69 | 70 | ```toml 71 | [channel] 72 | 73 | auth_password = "${env.SECRET}" 74 | ``` 75 | 76 | **Then, you can run Sonic providing a defined environment variable:** 77 | 78 | ```bash 79 | SECRET=secretphrase ./sonic -c /path/to/config.cfg 80 | ``` 81 | 82 | _Note that this can only be used with string-like values._ 83 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | Sonic Contributing Guide 2 | ======================== 3 | 4 | # Get Started 5 | 6 | - First of all, fork and clone this repo; 7 | - Install Rust and Cargo (to build and test Sonic); 8 | - Install NPM (for integration tests); 9 | 10 | ## Build Sonic 11 | 12 | From the repository root, run: 13 | 14 | ```sh 15 | cargo build 16 | ``` 17 | 18 | ## Start Sonic 19 | 20 | From the repository root, run: 21 | 22 | ```sh 23 | cargo run 24 | ``` 25 | 26 | ## Run unit tests 27 | 28 | From the repository root, run: 29 | 30 | ```sh 31 | cargo test 32 | ``` 33 | 34 | ## Run integration tests 35 | 36 | From the directory: `/tests/integration/scripts/`, run: 37 | 38 | ```sh 39 | ./run.sh 40 | ``` 41 | 42 | # Report Issues & Request Features 43 | 44 | **If you encounter an issue with Sonic, or would like to request a feature to be implemented, please do [open an issue](https://github.com/valeriansaliou/sonic/issues/new).** 45 | 46 | Note that before opening an issue, you should always search for other similar issues as to avoid opening a duplicate issue. This makes the life of the project maintainer much easier. 47 | 48 | When writing your issue title and command, make sure to be as precise as possible, giving away the maximum amount of details (even if you have a feeling some details are useless, they might make debugging or understanding easier for us). 49 | 50 | # Submit Your Code 51 | 52 | **If you would like to contribute directly by writing code, you should fork this repository and edit it right away from your GitHub namespace.** 53 | 54 | Once you are done with your work, always ensure to format your Rust code according to guidelines, via the [rustfmt](https://github.com/rust-lang/rustfmt) utility: `rustfmt src/*.rs` 55 | 56 | When this is done, you may open a Pull Request (PR), then explain your changes and their purpose precisely. We will finally accept or comment on your Pull Request, if we need more changes done on your code. 57 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "sonic-server" 3 | version = "1.4.9" 4 | description = "Fast, lightweight and schema-less search backend." 5 | readme = "README.md" 6 | license = "MPL-2.0" 7 | edition = "2018" 8 | homepage = "https://github.com/valeriansaliou/sonic" 9 | repository = "https://github.com/valeriansaliou/sonic.git" 10 | keywords = ["search", "query", "server", "index"] 11 | categories = ["database-implementations", "web-programming"] 12 | authors = ["Valerian Saliou ", "Baptiste Jamin "] 13 | 14 | [[bin]] 15 | name = "sonic" 16 | path = "src/main.rs" 17 | doc = false 18 | 19 | [dependencies] 20 | log = "0.4" 21 | toml = "0.8" 22 | clap = { version = "3.2", features = ["std", "cargo"] } 23 | lazy_static = "1.4" 24 | serde = "1.0" 25 | serde_derive = "1.0" 26 | rand = "0.8" 27 | unicode-segmentation = "1.6" 28 | radix = "0.6" 29 | rocksdb = { version = "0.22", features = ["zstd"] } 30 | fst = "0.3" 31 | fst-levenshtein = "0.3" 32 | fst-regex = "0.3" 33 | regex-syntax = "0.8" 34 | twox-hash = "1.5" 35 | byteorder = "1.4" 36 | hashbrown = "0.14" 37 | linked_hash_set = "0.1" 38 | whatlang = "0.16" 39 | regex = "1.6" 40 | jieba-rs = { version = "0.7", optional = true } 41 | lindera-core = { version = "0.31", optional = true } 42 | lindera-dictionary = { version = "0.31", features = ["unidic"], optional = true } 43 | lindera-tokenizer = { version = "0.31", features = ["unidic"], optional = true } 44 | 45 | [target.'cfg(unix)'.dependencies] 46 | nix = "0.18" 47 | tikv-jemallocator = { version = "0.4", optional = true } 48 | 49 | [target.'cfg(windows)'.dependencies] 50 | winapi = { version = "0.3", features = ["minwindef", "consoleapi"] } 51 | 52 | [features] 53 | default = ["allocator-jemalloc", "tokenizer-chinese"] 54 | allocator-jemalloc = ["tikv-jemallocator"] 55 | tokenizer-chinese = ["jieba-rs"] 56 | tokenizer-japanese = ["lindera-core", "lindera-dictionary", "lindera-tokenizer"] 57 | benchmark = [] 58 | 59 | [profile.dev] 60 | opt-level = 0 61 | debug = true 62 | debug-assertions = true 63 | 64 | [profile.release] 65 | opt-level = 3 66 | lto = true 67 | debug = false 68 | debug-assertions = false 69 | strip = true 70 | 71 | [profile.bench] 72 | opt-level = 3 73 | debug = false 74 | debug-assertions = false 75 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM rust:slim-bullseye AS build 2 | 3 | RUN apt-get update 4 | RUN apt-get install -y build-essential clang 5 | 6 | RUN rustup --version 7 | RUN rustup component add rustfmt 8 | 9 | RUN rustc --version && \ 10 | rustup --version && \ 11 | cargo --version 12 | 13 | WORKDIR /app 14 | COPY . /app 15 | 16 | RUN cargo clean && cargo build --release 17 | RUN strip ./target/release/sonic 18 | 19 | FROM gcr.io/distroless/cc 20 | 21 | WORKDIR /usr/src/sonic 22 | 23 | COPY --from=build /app/target/release/sonic /usr/local/bin/sonic 24 | 25 | CMD [ "sonic", "-c", "/etc/sonic.cfg" ] 26 | 27 | EXPOSE 1491 28 | -------------------------------------------------------------------------------- /PACKAGING.md: -------------------------------------------------------------------------------- 1 | Packaging 2 | ========= 3 | 4 | This file contains quick reminders and notes on how to package Sonic. 5 | 6 | We consider here the packaging flow of Sonic version `1.0.0` for Linux. 7 | 8 | 1. **How to bump Sonic version before a release:** 9 | 1. Bump version in `Cargo.toml` to `1.0.0` 10 | 2. Execute `cargo update` to bump `Cargo.lock` 11 | 3. Bump Debian package version in `debian/rules` to `1.0.0` 12 | 13 | 2. **How to build Sonic, package it and release it on Crates, GitHub, Docker Hub and Packagecloud (multiple architectures):** 14 | 1. Tag the latest Git commit corresponding to the release with tag `v1.0.0`, and push the tag 15 | 2. Wait for all release jobs to complete on the [actions](https://github.com/valeriansaliou/sonic/actions) page on GitHub 16 | 3. Download all release archives, and sign them locally using: `./scripts/sign_binaries.sh --version=1.0.0` 17 | 4. Publish a changelog and upload all the built archives, as well as their signatures on the [releases](https://github.com/valeriansaliou/sonic/releases) page on GitHub 18 | -------------------------------------------------------------------------------- /config.cfg: -------------------------------------------------------------------------------- 1 | # Sonic 2 | # Fast, lightweight and schema-less search backend 3 | # Configuration file 4 | # Example: https://github.com/valeriansaliou/sonic/blob/master/config.cfg 5 | 6 | 7 | [server] 8 | 9 | log_level = "debug" 10 | 11 | 12 | [channel] 13 | 14 | inet = "[::1]:1491" 15 | tcp_timeout = 300 16 | 17 | auth_password = "SecretPassword" 18 | 19 | [channel.search] 20 | 21 | query_limit_default = 10 22 | query_limit_maximum = 100 23 | query_alternates_try = 4 24 | 25 | suggest_limit_default = 5 26 | suggest_limit_maximum = 20 27 | 28 | list_limit_default = 100 29 | list_limit_maximum = 500 30 | 31 | 32 | [store] 33 | 34 | [store.kv] 35 | 36 | path = "./data/store/kv/" 37 | 38 | retain_word_objects = 1000 39 | 40 | [store.kv.pool] 41 | 42 | inactive_after = 1800 43 | 44 | [store.kv.database] 45 | 46 | flush_after = 900 47 | 48 | compress = true 49 | parallelism = 2 50 | max_files = 100 51 | max_compactions = 1 52 | max_flushes = 1 53 | write_buffer = 16384 54 | write_ahead_log = true 55 | 56 | [store.fst] 57 | 58 | path = "./data/store/fst/" 59 | 60 | [store.fst.pool] 61 | 62 | inactive_after = 300 63 | 64 | [store.fst.graph] 65 | 66 | consolidate_after = 180 67 | 68 | max_size = 2048 69 | max_words = 250000 70 | -------------------------------------------------------------------------------- /data/store/fst/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/valeriansaliou/sonic/722bfece335c8e9cc926684c85d92c52368f5b1f/data/store/fst/.gitkeep -------------------------------------------------------------------------------- /data/store/kv/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/valeriansaliou/sonic/722bfece335c8e9cc926684c85d92c52368f5b1f/data/store/kv/.gitkeep -------------------------------------------------------------------------------- /debian/changelog: -------------------------------------------------------------------------------- 1 | sonic (0.0.0-1) UNRELEASED; urgency=medium 2 | 3 | * Initial release. 4 | 5 | -- Valerian Saliou Tue, 31 Aug 2023 12:00:00 +0000 6 | -------------------------------------------------------------------------------- /debian/compat: -------------------------------------------------------------------------------- 1 | 10 2 | -------------------------------------------------------------------------------- /debian/control: -------------------------------------------------------------------------------- 1 | Source: sonic 2 | Section: net 3 | Priority: ext 4 | Maintainer: Valerian Saliou 5 | Standards-Version: 3.9.4 6 | Build-Depends: wget, ca-certificates 7 | Homepage: https://github.com/valeriansaliou/sonic 8 | 9 | Package: sonic 10 | Architecture: any 11 | Depends: adduser 12 | Provides: sonic 13 | Description: Fast, lightweight & schema-less search backend. An alternative to Elasticsearch that runs on a few MBs of RAM. 14 | -------------------------------------------------------------------------------- /debian/copyright: -------------------------------------------------------------------------------- 1 | Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ 2 | Upstream-Name: sonic 3 | Upstream-Contact: Valerian Saliou 4 | Source: https://github.com/valeriansaliou/sonic 5 | 6 | Files: * 7 | Copyright: 2023 Valerian Saliou 8 | License: MPL-2 9 | 10 | License: MPL-2 11 | This Source Code Form is subject to the terms of the Mozilla Public License, 12 | v. 2.0. If a copy of the MPL was not distributed with this file, 13 | You can obtain one at http://mozilla.org/MPL/2.0/. 14 | -------------------------------------------------------------------------------- /debian/rules: -------------------------------------------------------------------------------- 1 | #!/usr/bin/make -f 2 | 3 | DISTRIBUTION = $(shell lsb_release -sr) 4 | VERSION = 1.4.9 5 | PACKAGEVERSION = $(VERSION)-0~$(DISTRIBUTION)0 6 | URL = https://github.com/valeriansaliou/sonic/releases/download/v$(VERSION)/ 7 | 8 | %: 9 | dh $@ --with systemd 10 | 11 | override_dh_auto_clean: 12 | override_dh_auto_test: 13 | override_dh_auto_build: 14 | override_dh_auto_install: 15 | $(eval ENV_ARCH := $(shell dpkg --print-architecture)) 16 | $(eval ENV_ISA := $(shell if [ "$(ENV_ARCH)" = "amd64" ]; then echo "x86_64"; else echo "$(ENV_ARCH)"; fi)) 17 | $(eval ENV_TARBALL := v$(VERSION)-$(ENV_ISA)-gnu.tar.gz) 18 | 19 | echo "Architecture: $(ENV_ARCH)" 20 | echo "Instruction Set: $(ENV_ISA)" 21 | echo "Target: $(URL)$(ENV_TARBALL)" 22 | 23 | wget -N --progress=dot:mega $(URL)$(ENV_TARBALL) 24 | tar -xf $(ENV_TARBALL) 25 | strip sonic/sonic 26 | mv sonic/config.cfg sonic/sonic.cfg 27 | mkdir sonic/store/ 28 | sed -i 's/path = ".\/data\/store\//path = "\/var\/lib\/sonic\/store\//g' sonic/sonic.cfg 29 | 30 | override_dh_gencontrol: 31 | dh_gencontrol -- -v$(PACKAGEVERSION) 32 | -------------------------------------------------------------------------------- /debian/sonic.install: -------------------------------------------------------------------------------- 1 | sonic/sonic usr/bin/ 2 | sonic/sonic.cfg etc/ 3 | sonic/store/ var/lib/sonic/ 4 | -------------------------------------------------------------------------------- /debian/sonic.postinst: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | 5 | case "$1" in 6 | configure) 7 | adduser --system --disabled-password --disabled-login --home /var/empty \ 8 | --no-create-home --quiet --group sonic && \ 9 | chown sonic:sonic -R /var/lib/sonic/ 10 | ;; 11 | esac 12 | 13 | #DEBHELPER# 14 | 15 | exit 0 16 | -------------------------------------------------------------------------------- /debian/sonic.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Sonic Search Index 3 | After=network.target 4 | 5 | [Service] 6 | Type=simple 7 | User=sonic 8 | Group=sonic 9 | ExecStart=/usr/bin/sonic -c /etc/sonic.cfg 10 | Restart=on-failure 11 | LimitNOFILE=infinity 12 | 13 | [Install] 14 | WantedBy=multi-user.target 15 | -------------------------------------------------------------------------------- /debian/source/format: -------------------------------------------------------------------------------- 1 | 3.0 (quilt) 2 | -------------------------------------------------------------------------------- /scripts/build_packages.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ## 4 | # Sonic 5 | # 6 | # Fast, lightweight and schema-less search backend 7 | # Copyright: 2023, Valerian Saliou 8 | # License: Mozilla Public License v2.0 (MPL v2.0) 9 | ## 10 | 11 | # Define build pipeline 12 | function build_for_target { 13 | OS="$2" DIST="$3" ARCH="$1" ./packpack/packpack 14 | release_result=$? 15 | 16 | if [ $release_result -eq 0 ]; then 17 | mkdir -p "./packages/$2_$3/" 18 | mv ./build/*$4 "./packages/$2_$3/" 19 | 20 | echo "Result: Packaged architecture: $1 for OS: $2:$3 (*$4)" 21 | fi 22 | 23 | return $release_result 24 | } 25 | 26 | # Run release tasks 27 | ABSPATH=$(cd "$(dirname "$0")"; pwd) 28 | BASE_DIR="$ABSPATH/../" 29 | 30 | rc=0 31 | 32 | pushd "$BASE_DIR" > /dev/null 33 | echo "Executing packages build steps for Sonic..." 34 | 35 | # Initialize `packpack` 36 | rm -rf ./packpack && \ 37 | git clone https://github.com/packpack/packpack.git packpack 38 | rc=$? 39 | 40 | # Proceed build for each target? 41 | if [ $rc -eq 0 ]; then 42 | build_for_target "x86_64" "debian" "bookworm" ".deb" 43 | rc=$? 44 | fi 45 | 46 | # Cleanup environment 47 | rm -rf ./build ./packpack 48 | 49 | if [ $rc -eq 0 ]; then 50 | echo "Success: Done executing packages build steps for Sonic" 51 | else 52 | echo "Error: Failed executing packages build steps for Sonic" 53 | fi 54 | popd > /dev/null 55 | 56 | exit $rc 57 | -------------------------------------------------------------------------------- /scripts/release_binaries.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ## 4 | # Sonic 5 | # 6 | # Fast, lightweight and schema-less search backend 7 | # Copyright: 2023, Valerian Saliou 8 | # License: Mozilla Public License v2.0 (MPL v2.0) 9 | ## 10 | 11 | # Read arguments 12 | while [ "$1" != "" ]; do 13 | argument_key=`echo $1 | awk -F= '{print $1}'` 14 | argument_value=`echo $1 | awk -F= '{print $2}'` 15 | 16 | case $argument_key in 17 | -v | --version) 18 | # Notice: strip any leading 'v' to the version number 19 | SONIC_VERSION="${argument_value/v}" 20 | ;; 21 | *) 22 | echo "Unknown argument received: '$argument_key'" 23 | exit 1 24 | ;; 25 | esac 26 | 27 | shift 28 | done 29 | 30 | # Ensure release version is provided 31 | if [ -z "$SONIC_VERSION" ]; then 32 | echo "No Sonic release version was provided, please provide it using '--version'" 33 | 34 | exit 1 35 | fi 36 | 37 | # Define release pipeline 38 | function release_for_architecture { 39 | final_tar="v$SONIC_VERSION-$1-$2.tar.gz" 40 | 41 | rm -rf ./sonic/ && \ 42 | cargo build --target "$3" --release && \ 43 | mkdir ./sonic && \ 44 | cp -p "target/$3/release/sonic" ./sonic/ && \ 45 | cp -r ./config.cfg sonic/ && \ 46 | tar --owner=0 --group=0 -czvf "$final_tar" ./sonic && \ 47 | rm -r ./sonic/ 48 | release_result=$? 49 | 50 | if [ $release_result -eq 0 ]; then 51 | echo "Result: Packed architecture: $1 ($2) to file: $final_tar" 52 | fi 53 | 54 | return $release_result 55 | } 56 | 57 | # Run release tasks 58 | ABSPATH=$(cd "$(dirname "$0")"; pwd) 59 | BASE_DIR="$ABSPATH/../" 60 | 61 | rc=0 62 | 63 | pushd "$BASE_DIR" > /dev/null 64 | echo "Executing release steps for Sonic v$SONIC_VERSION..." 65 | 66 | release_for_architecture "x86_64" "gnu" "x86_64-unknown-linux-gnu" 67 | rc=$? 68 | 69 | if [ $rc -eq 0 ]; then 70 | echo "Success: Done executing release steps for Sonic v$SONIC_VERSION" 71 | else 72 | echo "Error: Failed executing release steps for Sonic v$SONIC_VERSION" 73 | fi 74 | popd > /dev/null 75 | 76 | exit $rc 77 | -------------------------------------------------------------------------------- /scripts/sign_binaries.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ## 4 | # Sonic 5 | # 6 | # Fast, lightweight and schema-less search backend 7 | # Copyright: 2023, Valerian Saliou 8 | # License: Mozilla Public License v2.0 (MPL v2.0) 9 | ## 10 | 11 | # Read arguments 12 | while [ "$1" != "" ]; do 13 | argument_key=`echo $1 | awk -F= '{print $1}'` 14 | argument_value=`echo $1 | awk -F= '{print $2}'` 15 | 16 | case $argument_key in 17 | -v | --version) 18 | # Notice: strip any leading 'v' to the version number 19 | SONIC_VERSION="${argument_value/v}" 20 | ;; 21 | *) 22 | echo "Unknown argument received: '$argument_key'" 23 | exit 1 24 | ;; 25 | esac 26 | 27 | shift 28 | done 29 | 30 | # Ensure release version is provided 31 | if [ -z "$SONIC_VERSION" ]; then 32 | echo "No Sonic release version was provided, please provide it using '--version'" 33 | 34 | exit 1 35 | fi 36 | 37 | # Define sign pipeline 38 | function sign_for_architecture { 39 | final_tar="v$SONIC_VERSION-$1-$2.tar.gz" 40 | gpg_signer="valerian@valeriansaliou.name" 41 | 42 | gpg -u "$gpg_signer" --armor --detach-sign "$final_tar" 43 | sign_result=$? 44 | 45 | if [ $sign_result -eq 0 ]; then 46 | echo "Result: Signed architecture: $1 ($2) for file: $final_tar" 47 | fi 48 | 49 | return $sign_result 50 | } 51 | 52 | # Run sign tasks 53 | ABSPATH=$(cd "$(dirname "$0")"; pwd) 54 | BASE_DIR="$ABSPATH/../" 55 | 56 | rc=0 57 | 58 | pushd "$BASE_DIR" > /dev/null 59 | echo "Executing sign steps for Sonic v$SONIC_VERSION..." 60 | 61 | sign_for_architecture "x86_64" "gnu" 62 | rc=$? 63 | 64 | if [ $rc -eq 0 ]; then 65 | echo "Success: Done executing sign steps for Sonic v$SONIC_VERSION" 66 | else 67 | echo "Error: Failed executing sign steps for Sonic v$SONIC_VERSION" 68 | fi 69 | popd > /dev/null 70 | 71 | exit $rc 72 | -------------------------------------------------------------------------------- /src/channel/format.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | pub fn unescape(text: &str) -> String { 8 | // Pre-reserve a byte-aware required capacity as to avoid heap resizes (30% performance \ 9 | // gain relative to initializing this with a zero-capacity) 10 | let mut unescaped = String::with_capacity(text.as_bytes().len()); 11 | let mut characters = text.chars(); 12 | 13 | while let Some(character) = characters.next() { 14 | if character == '\\' { 15 | // Found escaped character 16 | match characters.next() { 17 | Some('n') => unescaped.push('\n'), 18 | Some('\"') => unescaped.push('\"'), 19 | _ => unescaped.push(character), 20 | }; 21 | } else { 22 | unescaped.push(character); 23 | } 24 | } 25 | 26 | unescaped 27 | } 28 | 29 | #[cfg(test)] 30 | mod tests { 31 | use super::*; 32 | 33 | #[test] 34 | fn it_unescapes_command_text() { 35 | assert_eq!(unescape(r#"hello world!"#), r#"hello world!"#.to_string()); 36 | assert_eq!( 37 | unescape(r#"i'm so good at this"#), 38 | r#"i'm so good at this"#.to_string() 39 | ); 40 | assert_eq!( 41 | unescape(r#"look at \\\\"\\\" me i'm \\"\"trying to hack you\""#), 42 | r#"look at \\"\" me i'm \""trying to hack you""#.to_string() 43 | ); 44 | } 45 | } 46 | 47 | #[cfg(all(feature = "benchmark", test))] 48 | mod benches { 49 | extern crate test; 50 | 51 | use super::*; 52 | use test::Bencher; 53 | 54 | #[bench] 55 | fn bench_unescape_command_text(b: &mut Bencher) { 56 | b.iter(|| unescape(r#"i'm \\"\"trying to hack you\""#)); 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /src/channel/listen.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | use std::net::TcpListener; 8 | use std::process; 9 | use std::sync::RwLock; 10 | use std::thread; 11 | 12 | use super::handle::ChannelHandle; 13 | use crate::{APP_CONF, THREAD_NAME_CHANNEL_CLIENT}; 14 | 15 | pub struct ChannelListenBuilder; 16 | pub struct ChannelListen; 17 | 18 | lazy_static! { 19 | pub static ref CHANNEL_AVAILABLE: RwLock = RwLock::new(true); 20 | } 21 | 22 | impl ChannelListenBuilder { 23 | pub fn build() -> ChannelListen { 24 | ChannelListen {} 25 | } 26 | } 27 | 28 | impl ChannelListen { 29 | pub fn run(&self) { 30 | match TcpListener::bind(APP_CONF.channel.inet) { 31 | Ok(listener) => { 32 | info!("listening on tcp://{}", APP_CONF.channel.inet); 33 | 34 | for stream in listener.incoming() { 35 | match stream { 36 | Ok(stream) => { 37 | thread::Builder::new() 38 | .name(THREAD_NAME_CHANNEL_CLIENT.to_string()) 39 | .spawn(move || { 40 | if let Ok(peer_addr) = stream.peer_addr() { 41 | debug!("channel client connecting: {}", peer_addr); 42 | } 43 | 44 | // Create client 45 | ChannelHandle::client(stream); 46 | }) 47 | .ok(); 48 | } 49 | Err(err) => { 50 | warn!("error handling stream: {}", err); 51 | } 52 | } 53 | } 54 | } 55 | Err(err) => { 56 | error!("error binding channel listener: {}", err); 57 | 58 | // Exit Sonic 59 | process::exit(1); 60 | } 61 | } 62 | } 63 | 64 | pub fn teardown() { 65 | // Channel cannot be used anymore 66 | *CHANNEL_AVAILABLE.write().unwrap() = false; 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /src/channel/macros.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | #[macro_export] 8 | macro_rules! gen_channel_message_mode_handle { 9 | ($message:ident, $commands:ident, { $($external:expr => $internal:expr),+, }) => {{ 10 | let (command, parts) = ChannelMessage::extract($message); 11 | 12 | if command.is_empty() == true || $commands.contains(&command.as_str()) == true { 13 | match command.as_str() { 14 | "" => Ok(vec![ChannelCommandResponse::Void]), 15 | $( 16 | $external => $internal(parts), 17 | )+ 18 | "PING" => ChannelCommandBase::dispatch_ping(parts), 19 | "QUIT" => ChannelCommandBase::dispatch_quit(parts), 20 | _ => Ok(vec![ChannelCommandResponse::Err( 21 | ChannelCommandError::InternalError, 22 | )]), 23 | } 24 | } else { 25 | Ok(vec![ChannelCommandResponse::Err( 26 | ChannelCommandError::UnknownCommand, 27 | )]) 28 | } 29 | }}; 30 | } 31 | -------------------------------------------------------------------------------- /src/channel/mod.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | #[macro_use] 8 | mod macros; 9 | 10 | mod command; 11 | mod format; 12 | mod handle; 13 | mod message; 14 | mod mode; 15 | 16 | pub mod listen; 17 | pub mod statistics; 18 | -------------------------------------------------------------------------------- /src/channel/mode.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | pub enum ChannelMode { 8 | Search, 9 | Ingest, 10 | Control, 11 | } 12 | 13 | impl ChannelMode { 14 | pub fn from_str(value: &str) -> Result { 15 | match value { 16 | "search" => Ok(ChannelMode::Search), 17 | "ingest" => Ok(ChannelMode::Ingest), 18 | "control" => Ok(ChannelMode::Control), 19 | _ => Err(()), 20 | } 21 | } 22 | 23 | pub fn to_str(&self) -> &'static str { 24 | match *self { 25 | ChannelMode::Search => "search", 26 | ChannelMode::Ingest => "ingest", 27 | ChannelMode::Control => "control", 28 | } 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/channel/statistics.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | use std::ops::Deref; 8 | use std::sync::RwLock; 9 | use std::time::Instant; 10 | 11 | use crate::store::fst::StoreFSTPool; 12 | use crate::store::kv::StoreKVPool; 13 | 14 | lazy_static! { 15 | static ref START_TIME: Instant = Instant::now(); 16 | pub static ref CLIENTS_CONNECTED: RwLock = RwLock::new(0); 17 | pub static ref COMMANDS_TOTAL: RwLock = RwLock::new(0); 18 | pub static ref COMMAND_LATENCY_BEST: RwLock = RwLock::new(0); 19 | pub static ref COMMAND_LATENCY_WORST: RwLock = RwLock::new(0); 20 | } 21 | 22 | #[derive(Default)] 23 | pub struct ChannelStatistics { 24 | pub uptime: u64, 25 | pub clients_connected: u32, 26 | pub commands_total: u64, 27 | pub command_latency_best: u32, 28 | pub command_latency_worst: u32, 29 | pub kv_open_count: usize, 30 | pub fst_open_count: usize, 31 | pub fst_consolidate_count: usize, 32 | } 33 | 34 | pub fn ensure_states() { 35 | // Ensure all statics are initialized (a `deref` is enough to lazily initialize them) 36 | let (_, _, _, _, _) = ( 37 | START_TIME.deref(), 38 | CLIENTS_CONNECTED.deref(), 39 | COMMANDS_TOTAL.deref(), 40 | COMMAND_LATENCY_BEST.deref(), 41 | COMMAND_LATENCY_WORST.deref(), 42 | ); 43 | } 44 | 45 | impl ChannelStatistics { 46 | pub fn gather() -> ChannelStatistics { 47 | let (kv_count, fst_count) = (StoreKVPool::count(), StoreFSTPool::count()); 48 | 49 | ChannelStatistics { 50 | uptime: START_TIME.elapsed().as_secs(), 51 | clients_connected: *CLIENTS_CONNECTED.read().unwrap(), 52 | commands_total: *COMMANDS_TOTAL.read().unwrap(), 53 | command_latency_best: *COMMAND_LATENCY_BEST.read().unwrap(), 54 | command_latency_worst: *COMMAND_LATENCY_WORST.read().unwrap(), 55 | kv_open_count: kv_count, 56 | fst_open_count: fst_count.0, 57 | fst_consolidate_count: fst_count.1, 58 | } 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/config/defaults.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | use std::net::SocketAddr; 8 | use std::path::PathBuf; 9 | 10 | pub fn server_log_level() -> String { 11 | "error".to_string() 12 | } 13 | 14 | pub fn channel_inet() -> SocketAddr { 15 | "[::1]:1491".parse().unwrap() 16 | } 17 | 18 | pub fn channel_tcp_timeout() -> u64 { 19 | 300 20 | } 21 | 22 | pub fn channel_search_query_limit_default() -> u16 { 23 | 10 24 | } 25 | 26 | pub fn channel_search_query_limit_maximum() -> u16 { 27 | 100 28 | } 29 | 30 | pub fn channel_search_query_alternates_try() -> usize { 31 | 4 32 | } 33 | 34 | pub fn channel_search_suggest_limit_default() -> u16 { 35 | 5 36 | } 37 | 38 | pub fn channel_search_suggest_limit_maximum() -> u16 { 39 | 20 40 | } 41 | 42 | pub fn channel_search_list_limit_default() -> u16 { 43 | 100 44 | } 45 | 46 | pub fn channel_search_list_limit_maximum() -> u16 { 47 | 500 48 | } 49 | 50 | pub fn store_kv_path() -> PathBuf { 51 | PathBuf::from("./data/store/kv/") 52 | } 53 | 54 | pub fn store_kv_retain_word_objects() -> usize { 55 | 1000 56 | } 57 | 58 | pub fn store_kv_pool_inactive_after() -> u64 { 59 | 1800 60 | } 61 | 62 | pub fn store_kv_database_flush_after() -> u64 { 63 | 900 64 | } 65 | 66 | pub fn store_kv_database_compress() -> bool { 67 | true 68 | } 69 | 70 | pub fn store_kv_database_parallelism() -> u16 { 71 | 2 72 | } 73 | 74 | pub fn store_kv_database_max_compactions() -> u16 { 75 | 1 76 | } 77 | 78 | pub fn store_kv_database_max_flushes() -> u16 { 79 | 1 80 | } 81 | 82 | pub fn store_kv_database_write_buffer() -> usize { 83 | 16384 84 | } 85 | 86 | pub fn store_kv_database_write_ahead_log() -> bool { 87 | true 88 | } 89 | 90 | pub fn store_fst_path() -> PathBuf { 91 | PathBuf::from("./data/store/fst/") 92 | } 93 | 94 | pub fn store_fst_pool_inactive_after() -> u64 { 95 | 300 96 | } 97 | 98 | pub fn store_fst_graph_consolidate_after() -> u64 { 99 | 180 100 | } 101 | 102 | pub fn store_fst_graph_max_size() -> usize { 103 | 2048 104 | } 105 | 106 | pub fn store_fst_graph_max_words() -> usize { 107 | 250000 108 | } 109 | -------------------------------------------------------------------------------- /src/config/env_var.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | use regex::Regex; 8 | use serde::{Deserialize, Deserializer}; 9 | use std::net::SocketAddr; 10 | use std::path::PathBuf; 11 | 12 | #[derive(Deserialize, PartialEq)] 13 | struct WrappedString(String); 14 | 15 | pub fn str<'de, D>(deserializer: D) -> Result 16 | where 17 | D: Deserializer<'de>, 18 | { 19 | let value = String::deserialize(deserializer)?; 20 | 21 | match is_env_var(&value) { 22 | true => Ok(get_env_var(&value)), 23 | false => Ok(value), 24 | } 25 | } 26 | 27 | pub fn opt_str<'de, D>(deserializer: D) -> Result, D::Error> 28 | where 29 | D: Deserializer<'de>, 30 | { 31 | Option::::deserialize(deserializer).map(|option: Option| { 32 | option.map(|wrapped: WrappedString| { 33 | let value = wrapped.0; 34 | 35 | match is_env_var(&value) { 36 | true => get_env_var(&value), 37 | false => value, 38 | } 39 | }) 40 | }) 41 | } 42 | 43 | pub fn socket_addr<'de, D>(deserializer: D) -> Result 44 | where 45 | D: Deserializer<'de>, 46 | { 47 | let value = String::deserialize(deserializer)?; 48 | 49 | match is_env_var(&value) { 50 | true => Ok(get_env_var(&value).parse().unwrap()), 51 | false => Ok(value.parse().unwrap()), 52 | } 53 | } 54 | 55 | pub fn path_buf<'de, D>(deserializer: D) -> Result 56 | where 57 | D: Deserializer<'de>, 58 | { 59 | let value = String::deserialize(deserializer)?; 60 | 61 | match is_env_var(&value) { 62 | true => Ok(PathBuf::from(get_env_var(&value))), 63 | false => Ok(PathBuf::from(value)), 64 | } 65 | } 66 | 67 | fn is_env_var(value: &str) -> bool { 68 | Regex::new(r"^\$\{env\.\w+\}$") 69 | .expect("env_var: regex is invalid") 70 | .is_match(value) 71 | } 72 | 73 | fn get_env_var(wrapped_key: &str) -> String { 74 | let key: String = String::from(wrapped_key) 75 | .drain(6..(wrapped_key.len() - 1)) 76 | .collect(); 77 | 78 | std::env::var(key.clone()).unwrap_or_else(|_| panic!("env_var: variable '{}' is not set", key)) 79 | } 80 | 81 | #[cfg(test)] 82 | mod tests { 83 | use super::*; 84 | 85 | #[test] 86 | fn it_checks_environment_variable_patterns() { 87 | assert!(is_env_var("${env.XXX}")); 88 | assert!(!is_env_var("${env.XXX")); 89 | assert!(!is_env_var("${env.XXX}a")); 90 | assert!(!is_env_var("a${env.XXX}")); 91 | assert!(!is_env_var("{env.XXX}")); 92 | assert!(!is_env_var("$env.XXX}")); 93 | assert!(!is_env_var("${envXXX}")); 94 | assert!(!is_env_var("${.XXX}")); 95 | assert!(!is_env_var("${XXX}")); 96 | } 97 | 98 | #[test] 99 | fn it_gets_environment_variable() { 100 | std::env::set_var("TEST", "test"); 101 | 102 | assert_eq!(get_env_var("${env.TEST}"), "test"); 103 | 104 | std::env::remove_var("TEST"); 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /src/config/logger.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | use log::{Level, LevelFilter, Metadata, Record, SetLoggerError}; 8 | 9 | pub struct ConfigLogger; 10 | 11 | impl log::Log for ConfigLogger { 12 | fn enabled(&self, metadata: &Metadata) -> bool { 13 | metadata.level() <= Level::Debug 14 | } 15 | 16 | fn log(&self, record: &Record) { 17 | if self.enabled(record.metadata()) { 18 | println!("({}) - {}", record.level(), record.args()); 19 | } 20 | } 21 | 22 | fn flush(&self) {} 23 | } 24 | 25 | impl ConfigLogger { 26 | pub fn init(level: LevelFilter) -> Result<(), SetLoggerError> { 27 | log::set_max_level(level); 28 | log::set_logger(&ConfigLogger) 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/config/mod.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | mod defaults; 8 | mod env_var; 9 | 10 | pub mod logger; 11 | pub mod options; 12 | pub mod reader; 13 | -------------------------------------------------------------------------------- /src/config/options.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | use std::net::SocketAddr; 8 | use std::path::PathBuf; 9 | 10 | use super::defaults; 11 | use super::env_var; 12 | 13 | #[derive(Deserialize)] 14 | pub struct Config { 15 | pub server: ConfigServer, 16 | pub channel: ConfigChannel, 17 | pub store: ConfigStore, 18 | } 19 | 20 | #[derive(Deserialize)] 21 | pub struct ConfigServer { 22 | #[serde( 23 | default = "defaults::server_log_level", 24 | deserialize_with = "env_var::str" 25 | )] 26 | pub log_level: String, 27 | } 28 | 29 | #[derive(Deserialize)] 30 | pub struct ConfigChannel { 31 | #[serde( 32 | default = "defaults::channel_inet", 33 | deserialize_with = "env_var::socket_addr" 34 | )] 35 | pub inet: SocketAddr, 36 | 37 | #[serde(default = "defaults::channel_tcp_timeout")] 38 | pub tcp_timeout: u64, 39 | 40 | #[serde(default, deserialize_with = "env_var::opt_str")] 41 | pub auth_password: Option, 42 | 43 | pub search: ConfigChannelSearch, 44 | } 45 | 46 | #[derive(Deserialize)] 47 | pub struct ConfigChannelSearch { 48 | #[serde(default = "defaults::channel_search_query_limit_default")] 49 | pub query_limit_default: u16, 50 | 51 | #[serde(default = "defaults::channel_search_query_limit_maximum")] 52 | pub query_limit_maximum: u16, 53 | 54 | #[serde(default = "defaults::channel_search_query_alternates_try")] 55 | pub query_alternates_try: usize, 56 | 57 | #[serde(default = "defaults::channel_search_suggest_limit_default")] 58 | pub suggest_limit_default: u16, 59 | 60 | #[serde(default = "defaults::channel_search_suggest_limit_maximum")] 61 | pub suggest_limit_maximum: u16, 62 | 63 | #[serde(default = "defaults::channel_search_list_limit_default")] 64 | pub list_limit_default: u16, 65 | 66 | #[serde(default = "defaults::channel_search_list_limit_maximum")] 67 | pub list_limit_maximum: u16, 68 | } 69 | 70 | #[derive(Deserialize)] 71 | pub struct ConfigStore { 72 | pub kv: ConfigStoreKV, 73 | pub fst: ConfigStoreFST, 74 | } 75 | 76 | #[derive(Deserialize)] 77 | pub struct ConfigStoreKV { 78 | #[serde( 79 | default = "defaults::store_kv_path", 80 | deserialize_with = "env_var::path_buf" 81 | )] 82 | pub path: PathBuf, 83 | 84 | #[serde(default = "defaults::store_kv_retain_word_objects")] 85 | pub retain_word_objects: usize, 86 | 87 | pub pool: ConfigStoreKVPool, 88 | pub database: ConfigStoreKVDatabase, 89 | } 90 | 91 | #[derive(Deserialize)] 92 | pub struct ConfigStoreKVPool { 93 | #[serde(default = "defaults::store_kv_pool_inactive_after")] 94 | pub inactive_after: u64, 95 | } 96 | 97 | #[derive(Deserialize)] 98 | pub struct ConfigStoreKVDatabase { 99 | #[serde(default = "defaults::store_kv_database_flush_after")] 100 | pub flush_after: u64, 101 | 102 | #[serde(default = "defaults::store_kv_database_compress")] 103 | pub compress: bool, 104 | 105 | #[serde(default = "defaults::store_kv_database_parallelism")] 106 | pub parallelism: u16, 107 | 108 | pub max_files: Option, 109 | 110 | #[serde(default = "defaults::store_kv_database_max_compactions")] 111 | pub max_compactions: u16, 112 | 113 | #[serde(default = "defaults::store_kv_database_max_flushes")] 114 | pub max_flushes: u16, 115 | 116 | #[serde(default = "defaults::store_kv_database_write_buffer")] 117 | pub write_buffer: usize, 118 | 119 | #[serde(default = "defaults::store_kv_database_write_ahead_log")] 120 | pub write_ahead_log: bool, 121 | } 122 | 123 | #[derive(Deserialize)] 124 | pub struct ConfigStoreFST { 125 | #[serde( 126 | default = "defaults::store_fst_path", 127 | deserialize_with = "env_var::path_buf" 128 | )] 129 | pub path: PathBuf, 130 | 131 | pub pool: ConfigStoreFSTPool, 132 | pub graph: ConfigStoreFSTGraph, 133 | } 134 | 135 | #[derive(Deserialize)] 136 | pub struct ConfigStoreFSTPool { 137 | #[serde(default = "defaults::store_fst_pool_inactive_after")] 138 | pub inactive_after: u64, 139 | } 140 | 141 | #[derive(Deserialize)] 142 | pub struct ConfigStoreFSTGraph { 143 | #[serde(default = "defaults::store_fst_graph_consolidate_after")] 144 | pub consolidate_after: u64, 145 | 146 | #[serde(default = "defaults::store_fst_graph_max_size")] 147 | pub max_size: usize, 148 | 149 | #[serde(default = "defaults::store_fst_graph_max_words")] 150 | pub max_words: usize, 151 | } 152 | -------------------------------------------------------------------------------- /src/config/reader.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | use std::fs::File; 8 | use std::io::Read; 9 | 10 | use super::options::Config; 11 | use crate::APP_ARGS; 12 | 13 | pub struct ConfigReader; 14 | 15 | impl ConfigReader { 16 | pub fn make() -> Config { 17 | debug!("reading config file: {}", &APP_ARGS.config); 18 | 19 | let mut file = File::open(&APP_ARGS.config).expect("cannot find config file"); 20 | let mut conf = String::new(); 21 | 22 | file.read_to_string(&mut conf) 23 | .expect("cannot read config file"); 24 | 25 | debug!("read config file: {}", &APP_ARGS.config); 26 | 27 | // Parse configuration 28 | let config = toml::from_str(&conf).expect("syntax error in config file"); 29 | 30 | // Validate configuration 31 | Self::validate(&config); 32 | 33 | config 34 | } 35 | 36 | fn validate(config: &Config) { 37 | // Check 'write_buffer' for KV 38 | if config.store.kv.database.write_buffer == 0 { 39 | panic!("write_buffer for kv must not be zero"); 40 | } 41 | 42 | // Check 'flush_after' for KV 43 | if config.store.kv.database.flush_after >= config.store.kv.pool.inactive_after { 44 | panic!("flush_after for kv must be strictly lower than inactive_after"); 45 | } 46 | 47 | // Check 'consolidate_after' for FST 48 | if config.store.fst.graph.consolidate_after >= config.store.fst.pool.inactive_after { 49 | panic!("consolidate_after for fst must be strictly lower than inactive_after"); 50 | } 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/executor/count.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | use crate::store::fst::StoreFSTPool; 8 | use crate::store::fst::{StoreFSTActionBuilder, StoreFSTMisc}; 9 | use crate::store::item::StoreItem; 10 | use crate::store::kv::StoreKVActionBuilder; 11 | use crate::store::kv::{StoreKVAcquireMode, StoreKVPool}; 12 | 13 | pub struct ExecutorCount; 14 | 15 | impl ExecutorCount { 16 | pub fn execute(store: StoreItem) -> Result { 17 | match store { 18 | // Count terms in (collection, bucket, object) from KV 19 | StoreItem(collection, Some(bucket), Some(object)) => { 20 | // Important: acquire database access read lock, and reference it in context. This \ 21 | // prevents the database from being erased while using it in this block. 22 | general_kv_access_lock_read!(); 23 | 24 | if let Ok(kv_store) = StoreKVPool::acquire(StoreKVAcquireMode::OpenOnly, collection) 25 | { 26 | // Important: acquire bucket store read lock 27 | executor_kv_lock_read!(kv_store); 28 | 29 | let kv_action = StoreKVActionBuilder::access(bucket, kv_store); 30 | 31 | // Try to resolve existing OID to IID 32 | let oid = object.as_str(); 33 | 34 | kv_action 35 | .get_oid_to_iid(oid) 36 | .unwrap_or(None) 37 | .map(|iid| { 38 | // List terms for IID 39 | if let Some(terms) = kv_action.get_iid_to_terms(iid).unwrap_or(None) { 40 | terms.len() as u32 41 | } else { 42 | 0 43 | } 44 | }) 45 | .ok_or(()) 46 | .or(Ok(0)) 47 | } else { 48 | Err(()) 49 | } 50 | } 51 | // Count terms in (collection, bucket) from FST 52 | StoreItem(collection, Some(bucket), None) => { 53 | // Important: acquire graph access read lock, and reference it in context. This \ 54 | // prevents the graph from being erased while using it in this block. 55 | general_fst_access_lock_read!(); 56 | 57 | if let Ok(fst_store) = StoreFSTPool::acquire(collection, bucket) { 58 | let fst_action = StoreFSTActionBuilder::access(fst_store); 59 | 60 | Ok(fst_action.count_words() as u32) 61 | } else { 62 | Err(()) 63 | } 64 | } 65 | // Count buckets in (collection) from FS 66 | StoreItem(collection, None, None) => { 67 | StoreFSTMisc::count_collection_buckets(collection).map(|count| count as u32) 68 | } 69 | _ => Err(()), 70 | } 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /src/executor/flushb.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | use crate::store::fst::StoreFSTActionBuilder; 8 | use crate::store::item::StoreItem; 9 | use crate::store::kv::{StoreKVAcquireMode, StoreKVActionBuilder, StoreKVPool}; 10 | 11 | pub struct ExecutorFlushB; 12 | 13 | impl ExecutorFlushB { 14 | pub fn execute(store: StoreItem) -> Result { 15 | if let StoreItem(collection, Some(bucket), None) = store { 16 | // Important: acquire database access read lock, and reference it in context. This \ 17 | // prevents the database from being erased while using it in this block. 18 | // Notice: acquire FST lock in write mode, as we will erase it. 19 | general_kv_access_lock_read!(); 20 | general_fst_access_lock_write!(); 21 | 22 | if let Ok(kv_store) = StoreKVPool::acquire(StoreKVAcquireMode::OpenOnly, collection) { 23 | // Important: acquire bucket store write lock 24 | executor_kv_lock_write!(kv_store); 25 | 26 | if kv_store.is_some() { 27 | // Store exists, proceed erasure. 28 | debug!( 29 | "collection store exists, erasing: {} from {}", 30 | bucket.as_str(), 31 | collection.as_str() 32 | ); 33 | 34 | let kv_action = StoreKVActionBuilder::access(bucket, kv_store); 35 | 36 | // Notice: we cannot use the provided KV bucket erasure helper there, as \ 37 | // erasing a bucket requires a database lock, which would incur a dead-lock, \ 38 | // thus we need to perform the erasure from there. 39 | if let Ok(erase_count) = kv_action.batch_erase_bucket() { 40 | if StoreFSTActionBuilder::erase(collection, Some(bucket)).is_ok() { 41 | debug!("done with bucket erasure"); 42 | 43 | return Ok(erase_count); 44 | } 45 | } 46 | } else { 47 | // Store does not exist, consider as already erased. 48 | debug!( 49 | "collection store does not exist, consider {} from {} already erased", 50 | bucket.as_str(), 51 | collection.as_str() 52 | ); 53 | 54 | return Ok(0); 55 | } 56 | } 57 | } 58 | 59 | Err(()) 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /src/executor/flushc.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | use crate::store::fst::StoreFSTActionBuilder; 8 | use crate::store::item::StoreItem; 9 | use crate::store::kv::StoreKVActionBuilder; 10 | 11 | pub struct ExecutorFlushC; 12 | 13 | impl ExecutorFlushC { 14 | pub fn execute(store: StoreItem) -> Result { 15 | // Important: do not acquire the store from there, as otherwise it will remain open \ 16 | // even if dropped in the inner function, as this caller would still own a reference to \ 17 | // it. 18 | if let StoreItem(collection, None, None) = store { 19 | // Acquire KV + FST locks in write mode, as we will erase them, we need to prevent any \ 20 | // other consumer to use them. 21 | general_kv_access_lock_write!(); 22 | general_fst_access_lock_write!(); 23 | 24 | match ( 25 | StoreKVActionBuilder::erase(collection, None), 26 | StoreFSTActionBuilder::erase(collection, None), 27 | ) { 28 | (Ok(erase_count), Ok(_)) => Ok(erase_count), 29 | _ => Err(()), 30 | } 31 | } else { 32 | Err(()) 33 | } 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/executor/flusho.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | use crate::store::item::StoreItem; 8 | use crate::store::kv::{StoreKVAcquireMode, StoreKVActionBuilder, StoreKVPool}; 9 | 10 | pub struct ExecutorFlushO; 11 | 12 | impl ExecutorFlushO { 13 | pub fn execute(store: StoreItem) -> Result { 14 | if let StoreItem(collection, Some(bucket), Some(object)) = store { 15 | // Important: acquire database access read lock, and reference it in context. This \ 16 | // prevents the database from being erased while using it in this block. 17 | general_kv_access_lock_read!(); 18 | 19 | if let Ok(kv_store) = StoreKVPool::acquire(StoreKVAcquireMode::OpenOnly, collection) { 20 | // Important: acquire bucket store write lock 21 | executor_kv_lock_write!(kv_store); 22 | 23 | let kv_action = StoreKVActionBuilder::access(bucket, kv_store); 24 | 25 | // Try to resolve existing OID to IID (if it does not exist, there is nothing to \ 26 | // be flushed) 27 | let oid = object.as_str(); 28 | 29 | if let Ok(iid_value) = kv_action.get_oid_to_iid(oid) { 30 | let mut count_flushed = 0; 31 | 32 | if let Some(iid) = iid_value { 33 | // Resolve terms associated to IID 34 | let iid_terms = { 35 | if let Ok(iid_terms_value) = kv_action.get_iid_to_terms(iid) { 36 | iid_terms_value.unwrap_or_default() 37 | } else { 38 | error!("failed getting flusho executor iid-to-terms"); 39 | 40 | Vec::new() 41 | } 42 | }; 43 | 44 | // Flush bucket (batch operation, as it is shared w/ other executors) 45 | if let Ok(batch_count) = kv_action.batch_flush_bucket(iid, oid, &iid_terms) 46 | { 47 | count_flushed += batch_count; 48 | } else { 49 | error!("failed executing batch-flush-bucket in flusho executor"); 50 | } 51 | } 52 | 53 | return Ok(count_flushed); 54 | } else { 55 | error!("failed getting flusho executor oid-to-iid"); 56 | } 57 | } 58 | } 59 | 60 | Err(()) 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /src/executor/list.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2022, Troy Kohler 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | use crate::query::types::{QuerySearchID, QuerySearchLimit, QuerySearchOffset}; 8 | use crate::store::fst::StoreFSTActionBuilder; 9 | use crate::store::fst::StoreFSTPool; 10 | use crate::store::item::StoreItem; 11 | 12 | pub struct ExecutorList; 13 | 14 | impl ExecutorList { 15 | pub fn execute( 16 | store: StoreItem, 17 | _event_id: QuerySearchID, 18 | limit: QuerySearchLimit, 19 | offset: QuerySearchOffset, 20 | ) -> Result, ()> { 21 | if let StoreItem(collection, Some(bucket), None) = store { 22 | // Important: acquire graph access read lock, and reference it in context. This \ 23 | // prevents the graph from being erased while using it in this block. 24 | general_fst_access_lock_read!(); 25 | 26 | if let Ok(fst_store) = StoreFSTPool::acquire(collection, bucket) { 27 | let fst_action = StoreFSTActionBuilder::access(fst_store); 28 | 29 | debug!("running list"); 30 | 31 | return fst_action.list_words(limit as usize, offset as usize); 32 | } 33 | } 34 | 35 | Err(()) 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/executor/macros.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | #[macro_export] 8 | macro_rules! executor_ensure_op { 9 | ($operation:expr) => { 10 | match $operation { 11 | Ok(_) => {} 12 | Err(err) => error!("executor operation failed: {:?}", err), 13 | } 14 | }; 15 | } 16 | 17 | #[macro_export] 18 | macro_rules! executor_kv_lock_read { 19 | ($store:ident) => { 20 | let kv_store_reference = $store.clone(); 21 | 22 | let _kv_store_lock = kv_store_reference 23 | .as_ref() 24 | .map(|inner| inner.lock.read().unwrap()); 25 | }; 26 | } 27 | 28 | #[macro_export] 29 | macro_rules! executor_kv_lock_write { 30 | ($store:ident) => { 31 | let kv_store_reference = $store.clone(); 32 | 33 | let _kv_store_lock = kv_store_reference 34 | .as_ref() 35 | .map(|inner| inner.lock.write().unwrap()); 36 | }; 37 | } 38 | 39 | #[macro_export] 40 | macro_rules! general_kv_access_lock_read { 41 | () => { 42 | use crate::store::kv::STORE_ACCESS_LOCK; 43 | 44 | let _kv_access = STORE_ACCESS_LOCK.read().unwrap(); 45 | }; 46 | } 47 | 48 | #[macro_export] 49 | macro_rules! general_kv_access_lock_write { 50 | () => { 51 | use crate::store::kv::STORE_ACCESS_LOCK; 52 | 53 | let _kv_access = STORE_ACCESS_LOCK.write().unwrap(); 54 | }; 55 | } 56 | 57 | #[macro_export] 58 | macro_rules! general_fst_access_lock_read { 59 | () => { 60 | use crate::store::fst::GRAPH_ACCESS_LOCK; 61 | 62 | let _fst_access = GRAPH_ACCESS_LOCK.read().unwrap(); 63 | }; 64 | } 65 | 66 | #[macro_export] 67 | macro_rules! general_fst_access_lock_write { 68 | () => { 69 | use crate::store::fst::GRAPH_ACCESS_LOCK; 70 | 71 | let _fst_access = GRAPH_ACCESS_LOCK.write().unwrap(); 72 | }; 73 | } 74 | -------------------------------------------------------------------------------- /src/executor/mod.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | #[macro_use] 8 | mod macros; 9 | 10 | pub mod count; 11 | pub mod flushb; 12 | pub mod flushc; 13 | pub mod flusho; 14 | pub mod list; 15 | pub mod pop; 16 | pub mod push; 17 | pub mod search; 18 | pub mod suggest; 19 | -------------------------------------------------------------------------------- /src/executor/suggest.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | use crate::lexer::token::TokenLexer; 8 | use crate::query::types::{QuerySearchID, QuerySearchLimit}; 9 | use crate::store::fst::{StoreFSTActionBuilder, StoreFSTPool}; 10 | use crate::store::item::StoreItem; 11 | 12 | pub struct ExecutorSuggest; 13 | 14 | impl ExecutorSuggest { 15 | pub fn execute<'a>( 16 | store: StoreItem<'a>, 17 | _event_id: QuerySearchID, 18 | mut lexer: TokenLexer<'a>, 19 | limit: QuerySearchLimit, 20 | ) -> Result>, ()> { 21 | if let StoreItem(collection, Some(bucket), None) = store { 22 | // Important: acquire graph access read lock, and reference it in context. This \ 23 | // prevents the graph from being erased while using it in this block. 24 | general_fst_access_lock_read!(); 25 | 26 | if let Ok(fst_store) = StoreFSTPool::acquire(collection, bucket) { 27 | let fst_action = StoreFSTActionBuilder::access(fst_store); 28 | 29 | if let (Some(word), None) = (lexer.next(), lexer.next()) { 30 | debug!("running suggest on word: {}", word.0); 31 | 32 | return Ok(fst_action.suggest_words(&word.0, limit as usize, None)); 33 | } 34 | } 35 | } 36 | 37 | Err(()) 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/lexer/mod.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | mod stopwords; 8 | 9 | pub mod ranges; 10 | pub mod token; 11 | -------------------------------------------------------------------------------- /src/lexer/ranges.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | use std::fmt; 8 | use whatlang::{detect_script, Script}; 9 | 10 | struct LexerRange; 11 | 12 | #[derive(PartialEq, Debug)] 13 | pub struct LexerRegexRange(&'static [(char, char)]); 14 | 15 | const RANGE_LATIN: &[(char, char)] = &[('\u{0000}', '\u{024F}')]; 16 | const RANGE_CYRILLIC: &[(char, char)] = &[('\u{0400}', '\u{052F}')]; 17 | const RANGE_ARABIC: &[(char, char)] = &[('\u{0600}', '\u{06FF}'), ('\u{0750}', '\u{077F}')]; 18 | const RANGE_ARMENIAN: &[(char, char)] = &[('\u{0530}', '\u{058F}')]; 19 | const RANGE_DEVANAGARI: &[(char, char)] = &[('\u{0900}', '\u{097F}')]; 20 | const RANGE_HIRAGANA: &[(char, char)] = &[('\u{3040}', '\u{309F}')]; 21 | const RANGE_KATAKANA: &[(char, char)] = &[('\u{30A0}', '\u{30FF}'), ('\u{31F0}', '\u{31FF}')]; 22 | const RANGE_ETHIOPIC: &[(char, char)] = &[('\u{1200}', '\u{139F}'), ('\u{2D80}', '\u{2DDF}')]; 23 | const RANGE_HEBREW: &[(char, char)] = &[('\u{0590}', '\u{05FF}')]; 24 | const RANGE_BENGALI: &[(char, char)] = &[('\u{0980}', '\u{09FF}')]; 25 | const RANGE_GEORGIAN: &[(char, char)] = &[('\u{10A0}', '\u{10FF}'), ('\u{2D00}', '\u{2D2F}')]; 26 | const RANGE_MANDARIN: &[(char, char)] = &[ 27 | ('\u{4E00}', '\u{9FFF}'), 28 | ('\u{3400}', '\u{4DBF}'), 29 | ('\u{20000}', '\u{2A6DF}'), 30 | ('\u{2A700}', '\u{2CEAF}'), 31 | ]; 32 | const RANGE_HANGUL: &[(char, char)] = &[('\u{1100}', '\u{11FF}'), ('\u{3130}', '\u{318F}')]; 33 | const RANGE_GREEK: &[(char, char)] = &[('\u{0370}', '\u{03FF}'), ('\u{1F00}', '\u{1FFF}')]; 34 | const RANGE_KANNADA: &[(char, char)] = &[('\u{0C80}', '\u{0CFF}')]; 35 | const RANGE_TAMIL: &[(char, char)] = &[('\u{0B80}', '\u{0BFF}')]; 36 | const RANGE_THAI: &[(char, char)] = &[('\u{0E00}', '\u{0E7F}')]; 37 | const RANGE_GUJARATI: &[(char, char)] = &[('\u{0A80}', '\u{0AFF}')]; 38 | const RANGE_GURMUKHI: &[(char, char)] = &[('\u{0A00}', '\u{0A7F}')]; 39 | const RANGE_TELUGU: &[(char, char)] = &[('\u{0C00}', '\u{0C7F}')]; 40 | const RANGE_MALAYALAM: &[(char, char)] = &[('\u{0D00}', '\u{0D7F}')]; 41 | const RANGE_ORIYA: &[(char, char)] = &[('\u{0B00}', '\u{0B7F}')]; 42 | const RANGE_MYANMAR: &[(char, char)] = &[('\u{1000}', '\u{109F}')]; 43 | const RANGE_SINHALA: &[(char, char)] = &[('\u{0D80}', '\u{0DFF}')]; 44 | const RANGE_KHMER: &[(char, char)] = &[('\u{1780}', '\u{17FF}'), ('\u{19E0}', '\u{19FF}')]; 45 | 46 | impl LexerRange { 47 | pub fn from(text: &str) -> Option<&'static [(char, char)]> { 48 | detect_script(text).map(|script| match script { 49 | Script::Latin => RANGE_LATIN, 50 | Script::Cyrillic => RANGE_CYRILLIC, 51 | Script::Arabic => RANGE_ARABIC, 52 | Script::Armenian => RANGE_ARMENIAN, 53 | Script::Devanagari => RANGE_DEVANAGARI, 54 | Script::Hiragana => RANGE_HIRAGANA, 55 | Script::Katakana => RANGE_KATAKANA, 56 | Script::Ethiopic => RANGE_ETHIOPIC, 57 | Script::Hebrew => RANGE_HEBREW, 58 | Script::Bengali => RANGE_BENGALI, 59 | Script::Georgian => RANGE_GEORGIAN, 60 | Script::Mandarin => RANGE_MANDARIN, 61 | Script::Hangul => RANGE_HANGUL, 62 | Script::Greek => RANGE_GREEK, 63 | Script::Kannada => RANGE_KANNADA, 64 | Script::Tamil => RANGE_TAMIL, 65 | Script::Thai => RANGE_THAI, 66 | Script::Gujarati => RANGE_GUJARATI, 67 | Script::Gurmukhi => RANGE_GURMUKHI, 68 | Script::Telugu => RANGE_TELUGU, 69 | Script::Malayalam => RANGE_MALAYALAM, 70 | Script::Oriya => RANGE_ORIYA, 71 | Script::Myanmar => RANGE_MYANMAR, 72 | Script::Sinhala => RANGE_SINHALA, 73 | Script::Khmer => RANGE_KHMER, 74 | }) 75 | } 76 | } 77 | 78 | impl LexerRegexRange { 79 | pub fn from(text: &str) -> Option { 80 | LexerRange::from(text).map(LexerRegexRange) 81 | } 82 | 83 | pub fn write_to(&self, formatter: &mut W) -> Result<(), fmt::Error> { 84 | // Format range to regex range 85 | formatter.write_char('[')?; 86 | 87 | for range in self.0 { 88 | write!( 89 | formatter, 90 | "\\x{{{:X}}}-\\x{{{:X}}}", 91 | range.0 as u32, range.1 as u32 92 | )?; 93 | } 94 | 95 | formatter.write_char(']')?; 96 | 97 | Ok(()) 98 | } 99 | } 100 | 101 | impl Default for LexerRegexRange { 102 | fn default() -> Self { 103 | LexerRegexRange(RANGE_LATIN) 104 | } 105 | } 106 | 107 | #[cfg(test)] 108 | mod tests { 109 | use super::*; 110 | 111 | #[test] 112 | fn it_gives_ranges() { 113 | assert_eq!(LexerRange::from("fox"), Some(RANGE_LATIN)); 114 | assert_eq!(LexerRange::from("快狐跨懒狗"), Some(RANGE_MANDARIN)); 115 | assert_eq!(LexerRange::from("Доброе утро."), Some(RANGE_CYRILLIC)); 116 | } 117 | 118 | #[test] 119 | fn it_gives_regex_range() { 120 | assert_eq!( 121 | LexerRegexRange::from("fox"), 122 | Some(LexerRegexRange(RANGE_LATIN)) 123 | ); 124 | } 125 | } 126 | 127 | #[cfg(all(feature = "benchmark", test))] 128 | mod benches { 129 | extern crate test; 130 | 131 | use super::*; 132 | use test::Bencher; 133 | 134 | #[bench] 135 | fn bench_give_ranges_latin(b: &mut Bencher) { 136 | b.iter(|| LexerRange::from("fox")); 137 | } 138 | 139 | #[bench] 140 | fn bench_give_ranges_mandarin(b: &mut Bencher) { 141 | b.iter(|| LexerRange::from("快狐跨懒狗")); 142 | } 143 | 144 | #[bench] 145 | fn bench_give_ranges_cyrillic(b: &mut Bencher) { 146 | b.iter(|| LexerRange::from("Доброе утро.")); 147 | } 148 | 149 | #[bench] 150 | fn bench_give_regex_range_latin(b: &mut Bencher) { 151 | b.iter(|| LexerRegexRange::from("fox")); 152 | } 153 | } 154 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | #![cfg_attr(feature = "benchmark", feature(test))] 8 | #![deny(unstable_features, unused_imports, unused_qualifications, clippy::all)] 9 | 10 | #[macro_use] 11 | extern crate log; 12 | #[macro_use] 13 | extern crate lazy_static; 14 | #[macro_use] 15 | extern crate serde_derive; 16 | 17 | mod channel; 18 | mod config; 19 | mod executor; 20 | mod lexer; 21 | mod query; 22 | mod stopwords; 23 | mod store; 24 | mod tasker; 25 | 26 | use std::ops::Deref; 27 | use std::str::FromStr; 28 | use std::thread; 29 | use std::time::Duration; 30 | 31 | use clap::{App, Arg}; 32 | use log::LevelFilter; 33 | 34 | use channel::listen::{ChannelListen, ChannelListenBuilder}; 35 | use channel::statistics::ensure_states as ensure_states_channel_statistics; 36 | use config::logger::ConfigLogger; 37 | use config::options::Config; 38 | use config::reader::ConfigReader; 39 | use store::fst::StoreFSTPool; 40 | use store::kv::StoreKVPool; 41 | use tasker::runtime::TaskerBuilder; 42 | use tasker::shutdown::ShutdownSignal; 43 | 44 | struct AppArgs { 45 | config: String, 46 | } 47 | 48 | #[cfg(unix)] 49 | #[cfg(feature = "allocator-jemalloc")] 50 | #[global_allocator] 51 | static ALLOC: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc; 52 | 53 | pub static LINE_FEED: &str = "\r\n"; 54 | 55 | pub static THREAD_NAME_CHANNEL_MASTER: &str = "sonic-channel-master"; 56 | pub static THREAD_NAME_CHANNEL_CLIENT: &str = "sonic-channel-client"; 57 | pub static THREAD_NAME_TASKER: &str = "sonic-tasker"; 58 | 59 | macro_rules! gen_spawn_managed { 60 | ($name:expr, $method:ident, $thread_name:ident, $managed_fn:ident) => { 61 | fn $method() { 62 | debug!("spawn managed thread: {}", $name); 63 | 64 | let worker = thread::Builder::new() 65 | .name($thread_name.to_string()) 66 | .spawn(|| $managed_fn::build().run()); 67 | 68 | // Block on worker thread (join it) 69 | let has_error = if let Ok(worker_thread) = worker { 70 | worker_thread.join().is_err() 71 | } else { 72 | true 73 | }; 74 | 75 | // Worker thread crashed? 76 | if has_error == true { 77 | error!("managed thread crashed ({}), setting it up again", $name); 78 | 79 | // Prevents thread start loop floods 80 | thread::sleep(Duration::from_secs(1)); 81 | 82 | $method(); 83 | } 84 | } 85 | }; 86 | } 87 | 88 | lazy_static! { 89 | static ref APP_ARGS: AppArgs = make_app_args(); 90 | static ref APP_CONF: Config = ConfigReader::make(); 91 | } 92 | 93 | gen_spawn_managed!( 94 | "channel", 95 | spawn_channel, 96 | THREAD_NAME_CHANNEL_MASTER, 97 | ChannelListenBuilder 98 | ); 99 | gen_spawn_managed!("tasker", spawn_tasker, THREAD_NAME_TASKER, TaskerBuilder); 100 | 101 | fn make_app_args() -> AppArgs { 102 | let matches = App::new(clap::crate_name!()) 103 | .version(clap::crate_version!()) 104 | .author(clap::crate_authors!()) 105 | .about(clap::crate_description!()) 106 | .arg( 107 | Arg::new("config") 108 | .short('c') 109 | .long("config") 110 | .help("Path to configuration file") 111 | .default_value("./config.cfg") 112 | .takes_value(true), 113 | ) 114 | .get_matches(); 115 | 116 | // Generate owned app arguments 117 | AppArgs { 118 | config: String::from(matches.value_of("config").expect("invalid config value")), 119 | } 120 | } 121 | 122 | fn ensure_states() { 123 | // Ensure all statics are valid (a `deref` is enough to lazily initialize them) 124 | let (_, _) = (APP_ARGS.deref(), APP_CONF.deref()); 125 | 126 | // Ensure per-module states 127 | ensure_states_channel_statistics(); 128 | } 129 | 130 | fn main() { 131 | let _logger = ConfigLogger::init( 132 | LevelFilter::from_str(&APP_CONF.server.log_level).expect("invalid log level"), 133 | ); 134 | 135 | let shutdown_signal = ShutdownSignal::new(); 136 | 137 | info!("starting up"); 138 | 139 | // Ensure all states are bound 140 | ensure_states(); 141 | 142 | // Spawn tasker (background thread) 143 | thread::spawn(spawn_tasker); 144 | 145 | // Spawn channel (foreground thread) 146 | thread::spawn(spawn_channel); 147 | 148 | info!("started"); 149 | 150 | shutdown_signal.at_exit(move |signal| { 151 | info!("stopping gracefully (got signal: {})", signal); 152 | 153 | // Teardown Sonic Channel 154 | ChannelListen::teardown(); 155 | 156 | // Perform a KV flush (ensures all in-memory changes are synced on-disk before shutdown) 157 | StoreKVPool::flush(true); 158 | 159 | // Perform a FST consolidation (ensures all in-memory items are synced on-disk before \ 160 | // shutdown; otherwise we would lose all non-consolidated FST changes) 161 | StoreFSTPool::consolidate(true); 162 | 163 | info!("stopped"); 164 | }); 165 | } 166 | -------------------------------------------------------------------------------- /src/query/actions.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | use super::types::*; 8 | use crate::lexer::token::TokenLexer; 9 | use crate::store::item::StoreItem; 10 | 11 | pub enum Query<'a> { 12 | Search( 13 | StoreItem<'a>, 14 | QuerySearchID<'a>, 15 | TokenLexer<'a>, 16 | QuerySearchLimit, 17 | QuerySearchOffset, 18 | ), 19 | Suggest( 20 | StoreItem<'a>, 21 | QuerySearchID<'a>, 22 | TokenLexer<'a>, 23 | QuerySearchLimit, 24 | ), 25 | List( 26 | StoreItem<'a>, 27 | QuerySearchID<'a>, 28 | QuerySearchLimit, 29 | QuerySearchOffset, 30 | ), 31 | Push(StoreItem<'a>, TokenLexer<'a>), 32 | Pop(StoreItem<'a>, TokenLexer<'a>), 33 | Count(StoreItem<'a>), 34 | FlushC(StoreItem<'a>), 35 | FlushB(StoreItem<'a>), 36 | FlushO(StoreItem<'a>), 37 | } 38 | -------------------------------------------------------------------------------- /src/query/mod.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | pub mod actions; 8 | pub mod builder; 9 | pub mod types; 10 | -------------------------------------------------------------------------------- /src/query/types.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | use whatlang::Lang; 8 | 9 | #[derive(Debug, PartialEq)] 10 | pub enum QueryGenericLang { 11 | Enabled(Lang), 12 | Disabled, 13 | } 14 | 15 | pub type QuerySearchID<'a> = &'a str; 16 | pub type QuerySearchLimit = u16; 17 | pub type QuerySearchOffset = u32; 18 | 19 | pub type QueryMetaData = ( 20 | Option, 21 | Option, 22 | Option, 23 | ); 24 | 25 | pub type ListMetaData = (Option, Option); 26 | 27 | impl QueryGenericLang { 28 | pub fn from_value(value: &str) -> Option { 29 | if value == "none" { 30 | Some(QueryGenericLang::Disabled) 31 | } else { 32 | Lang::from_code(value).map(QueryGenericLang::Enabled) 33 | } 34 | } 35 | } 36 | 37 | #[cfg(test)] 38 | mod tests { 39 | use super::*; 40 | 41 | #[test] 42 | fn it_parses_generic_lang_from_value() { 43 | assert_eq!( 44 | QueryGenericLang::from_value("none"), 45 | Some(QueryGenericLang::Disabled) 46 | ); 47 | assert_eq!( 48 | QueryGenericLang::from_value("fra"), 49 | Some(QueryGenericLang::Enabled(Lang::Fra)) 50 | ); 51 | assert_eq!(QueryGenericLang::from_value("xxx"), None); 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/stopwords/afr.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | pub static STOPWORDS_AFR: &[&str] = &[ 8 | "'n", "aan", "af", "al", "as", "baie", "by", "daar", "dag", "dat", "die", "dit", "een", "ek", 9 | "en", "gaan", "gesê", "haar", "het", "hom", "hulle", "hy", "in", "is", "jou", "jy", "kan", 10 | "kom", "ma", "maar", "met", "my", "na", "nie", "om", "ons", "op", "saam", "sal", "se", "sien", 11 | "so", "sy", "te", "toe", "uit", "van", "vir", "was", "wat", "ʼn", 12 | ]; 13 | -------------------------------------------------------------------------------- /src/stopwords/aka.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | // Notice: we do not have stopwords for this language yet. 8 | pub static STOPWORDS_AKA: &[&str] = &[]; 9 | -------------------------------------------------------------------------------- /src/stopwords/amh.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | // Notice: we do not have stopwords for this language yet. 8 | pub static STOPWORDS_AMH: &[&str] = &[]; 9 | -------------------------------------------------------------------------------- /src/stopwords/aze.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | pub static STOPWORDS_AZE: &[&str] = &[ 8 | "a", 9 | "ad", 10 | "altı", 11 | "altmış", 12 | "amma", 13 | "arasında", 14 | "artıq", 15 | "ay", 16 | "az", 17 | "bax", 18 | "belə", 19 | "bəli", 20 | "bəlkə", 21 | "beş", 22 | "bəy", 23 | "bəzən", 24 | "bəzi", 25 | "bilər", 26 | "bir", 27 | "biraz", 28 | "biri", 29 | "birşey", 30 | "biz", 31 | "bizim", 32 | "bizlər", 33 | "bu", 34 | "buna", 35 | "bundan", 36 | "bunların", 37 | "bunu", 38 | "bunun", 39 | "buradan", 40 | "bütün", 41 | "ci", 42 | "cı", 43 | "çox", 44 | "cu", 45 | "cü", 46 | "çünki", 47 | "da", 48 | "daha", 49 | "də", 50 | "dedi", 51 | "dək", 52 | "dən", 53 | "dəqiqə", 54 | "deyil", 55 | "dir", 56 | "doqquz", 57 | "doqsan", 58 | "dörd", 59 | "düz", 60 | "ə", 61 | "edən", 62 | "edir", 63 | "əgər", 64 | "əlbəttə", 65 | "elə", 66 | "əlli", 67 | "ən", 68 | "əslində", 69 | "et", 70 | "etdi", 71 | "etmə", 72 | "etmək", 73 | "faiz", 74 | "gilə", 75 | "görə", 76 | "ha", 77 | "haqqında", 78 | "harada", 79 | "hə", 80 | "heç", 81 | "həm", 82 | "həmin", 83 | "həmişə", 84 | "hər", 85 | "ı", 86 | "idi", 87 | "iki", 88 | "il", 89 | "ildə", 90 | "ilə", 91 | "ilk", 92 | "in", 93 | "indi", 94 | "isə", 95 | "istifadə", 96 | "iyirmi", 97 | "ki", 98 | "kim", 99 | "kimə", 100 | "kimi", 101 | "lakin", 102 | "lap", 103 | "məhz", 104 | "mən", 105 | "mənə", 106 | "mirşey", 107 | "nə", 108 | "nəhayət", 109 | "niyə", 110 | "o", 111 | "obirisi", 112 | "of", 113 | "olan", 114 | "olar", 115 | "olaraq", 116 | "oldu", 117 | "olduğu", 118 | "olmadı", 119 | "olmaz", 120 | "olmuşdur", 121 | "olsun", 122 | "olur", 123 | "on", 124 | "ona", 125 | "ondan", 126 | "onlar", 127 | "onlardan", 128 | "onların ", 129 | "onsuzda", 130 | "onu", 131 | "onun", 132 | "oradan", 133 | "otuz", 134 | "öz", 135 | "özü", 136 | "qarşı", 137 | "qədər", 138 | "qırx", 139 | "saat", 140 | "sadəcə", 141 | "saniyə", 142 | "səhv", 143 | "səkkiz", 144 | "səksən", 145 | "sən", 146 | "sənə", 147 | "sənin", 148 | "siz", 149 | "sizin", 150 | "sizlər", 151 | "sonra", 152 | "təəssüf", 153 | "ü", 154 | "üç", 155 | "üçün", 156 | "var", 157 | "və", 158 | "xan", 159 | "xanım", 160 | "xeyr", 161 | "ya", 162 | "yalnız", 163 | "yaxşı", 164 | "yeddi", 165 | "yenə", 166 | "yəni", 167 | "yetmiş", 168 | "yox", 169 | "yoxdur", 170 | "yoxsa", 171 | "yüz", 172 | "zaman", 173 | ]; 174 | -------------------------------------------------------------------------------- /src/stopwords/bel.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | // Notice: we do not have stopwords for this language yet. 8 | pub static STOPWORDS_BEL: &[&str] = &[]; 9 | -------------------------------------------------------------------------------- /src/stopwords/ben.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | pub static STOPWORDS_BEN: &[&str] = &[ 8 | "অতএব", 9 | "অথচ", 10 | "অথবা", 11 | "অনুযায়ী", 12 | "অনেক", 13 | "অনেকে", 14 | "অনেকেই", 15 | "অন্তত", 16 | "অন্য", 17 | "অবধি", 18 | "অবশ্য", 19 | "অর্থাত", 20 | "আই", 21 | "আগামী", 22 | "আগে", 23 | "আগেই", 24 | "আছে", 25 | "আজ", 26 | "আদ্যভাগে", 27 | "আপনার", 28 | "আপনি", 29 | "আবার", 30 | "আমরা", 31 | "আমাকে", 32 | "আমাদের", 33 | "আমার", 34 | "আমি", 35 | "আর", 36 | "আরও", 37 | "ই", 38 | "ইত্যাদি", 39 | "ইহা", 40 | "উচিত", 41 | "উত্তর", 42 | "উনি", 43 | "উপর", 44 | "উপরে", 45 | "এ", 46 | "এঁদের", 47 | "এঁরা", 48 | "এই", 49 | "একই", 50 | "একটি", 51 | "একবার", 52 | "একে", 53 | "এক্", 54 | "এখন", 55 | "এখনও", 56 | "এখানে", 57 | "এখানেই", 58 | "এটা", 59 | "এটাই", 60 | "এটি", 61 | "এত", 62 | "এতটাই", 63 | "এতে", 64 | "এদের", 65 | "এব", 66 | "এবং", 67 | "এবার", 68 | "এমন", 69 | "এমনকী", 70 | "এমনি", 71 | "এর", 72 | "এরা", 73 | "এল", 74 | "এস", 75 | "এসে", 76 | "ঐ", 77 | "ও", 78 | "ওঁদের", 79 | "ওঁর", 80 | "ওঁরা", 81 | "ওই", 82 | "ওকে", 83 | "ওখানে", 84 | "ওদের", 85 | "ওর", 86 | "ওরা", 87 | "কখনও", 88 | "কত", 89 | "কবে", 90 | "কমনে", 91 | "কয়েক", 92 | "কয়েকটি", 93 | "করছে", 94 | "করছেন", 95 | "করতে", 96 | "করবে", 97 | "করবেন", 98 | "করলে", 99 | "করলেন", 100 | "করা", 101 | "করাই", 102 | "করায়", 103 | "করার", 104 | "করি", 105 | "করিতে", 106 | "করিয়া", 107 | "করিয়ে", 108 | "করে", 109 | "করেই", 110 | "করেছিলেন", 111 | "করেছে", 112 | "করেছেন", 113 | "করেন", 114 | "কাউকে", 115 | "কাছ", 116 | "কাছে", 117 | "কাজ", 118 | "কাজে", 119 | "কারও", 120 | "কারণ", 121 | "কি", 122 | "কিংবা", 123 | "কিছু", 124 | "কিছুই", 125 | "কিন্তু", 126 | "কী", 127 | "কে", 128 | "কেউ", 129 | "কেউই", 130 | "কেখা", 131 | "কেন", 132 | "কোটি", 133 | "কোন", 134 | "কোনও", 135 | "কোনো", 136 | "ক্ষেত্রে", 137 | "কয়েক", 138 | "খুব", 139 | "গিয়ে", 140 | "গিয়েছে", 141 | "গিয়ে", 142 | "গুলি", 143 | "গেছে", 144 | "গেল", 145 | "গেলে", 146 | "গোটা", 147 | "চলে", 148 | "চান", 149 | "চায়", 150 | "চার", 151 | "চালু", 152 | "চেয়ে", 153 | "চেষ্টা", 154 | "ছাড়া", 155 | "ছাড়াও", 156 | "ছিল", 157 | "ছিলেন", 158 | "জন", 159 | "জনকে", 160 | "জনের", 161 | "জন্য", 162 | "জন্যওজে", 163 | "জানতে", 164 | "জানা", 165 | "জানানো", 166 | "জানায়", 167 | "জানিয়ে", 168 | "জানিয়েছে", 169 | "জে", 170 | "জ্নজন", 171 | "টি", 172 | "ঠিক", 173 | "তখন", 174 | "তত", 175 | "তথা", 176 | "তবু", 177 | "তবে", 178 | "তা", 179 | "তাঁকে", 180 | "তাঁদের", 181 | "তাঁর", 182 | "তাঁরা", 183 | "তাঁাহারা", 184 | "তাই", 185 | "তাও", 186 | "তাকে", 187 | "তাতে", 188 | "তাদের", 189 | "তার", 190 | "তারপর", 191 | "তারা", 192 | "তারৈ", 193 | "তাহলে", 194 | "তাহা", 195 | "তাহাতে", 196 | "তাহার", 197 | "তিনঐ", 198 | "তিনি", 199 | "তিনিও", 200 | "তুমি", 201 | "তুলে", 202 | "তেমন", 203 | "তো", 204 | "তোমার", 205 | "থাকবে", 206 | "থাকবেন", 207 | "থাকা", 208 | "থাকায়", 209 | "থাকে", 210 | "থাকেন", 211 | "থেকে", 212 | "থেকেই", 213 | "থেকেও", 214 | "দিকে", 215 | "দিতে", 216 | "দিন", 217 | "দিয়ে", 218 | "দিয়েছে", 219 | "দিয়েছেন", 220 | "দিলেন", 221 | "দু", 222 | "দুই", 223 | "দুটি", 224 | "দুটো", 225 | "দেওয়া", 226 | "দেওয়ার", 227 | "দেওয়া", 228 | "দেখতে", 229 | "দেখা", 230 | "দেখে", 231 | "দেন", 232 | "দেয়", 233 | "দ্বারা", 234 | "ধরা", 235 | "ধরে", 236 | "ধামার", 237 | "নতুন", 238 | "নয়", 239 | "না", 240 | "নাই", 241 | "নাকি", 242 | "নাগাদ", 243 | "নানা", 244 | "নিজে", 245 | "নিজেই", 246 | "নিজেদের", 247 | "নিজের", 248 | "নিতে", 249 | "নিয়ে", 250 | "নিয়ে", 251 | "নেই", 252 | "নেওয়া", 253 | "নেওয়ার", 254 | "নেওয়া", 255 | "নয়", 256 | "পক্ষে", 257 | "পর", 258 | "পরে", 259 | "পরেই", 260 | "পরেও", 261 | "পর্যন্ত", 262 | "পাওয়া", 263 | "পাচ", 264 | "পারি", 265 | "পারে", 266 | "পারেন", 267 | "পি", 268 | "পেয়ে", 269 | "পেয়্র্", 270 | "প্রতি", 271 | "প্রথম", 272 | "প্রভৃতি", 273 | "প্রযন্ত", 274 | "প্রাথমিক", 275 | "প্রায়", 276 | "প্রায়", 277 | "ফলে", 278 | "ফিরে", 279 | "ফের", 280 | "বক্তব্য", 281 | "বদলে", 282 | "বন", 283 | "বরং", 284 | "বলতে", 285 | "বলল", 286 | "বললেন", 287 | "বলা", 288 | "বলে", 289 | "বলেছেন", 290 | "বলেন", 291 | "বসে", 292 | "বহু", 293 | "বা", 294 | "বাদে", 295 | "বার", 296 | "বি", 297 | "বিনা", 298 | "বিভিন্ন", 299 | "বিশেষ", 300 | "বিষয়টি", 301 | "বেশ", 302 | "বেশি", 303 | "ব্যবহার", 304 | "ব্যাপারে", 305 | "ভাবে", 306 | "ভাবেই", 307 | "মতো", 308 | "মতোই", 309 | "মধ্যভাগে", 310 | "মধ্যে", 311 | "মধ্যেই", 312 | "মধ্যেও", 313 | "মনে", 314 | "মাত্র", 315 | "মাধ্যমে", 316 | "মোট", 317 | "মোটেই", 318 | "যখন", 319 | "যত", 320 | "যতটা", 321 | "যথেষ্ট", 322 | "যদি", 323 | "যদিও", 324 | "যা", 325 | "যাঁর", 326 | "যাঁরা", 327 | "যাওয়া", 328 | "যাওয়ার", 329 | "যাওয়া", 330 | "যাকে", 331 | "যাচ্ছে", 332 | "যাতে", 333 | "যাদের", 334 | "যান", 335 | "যাবে", 336 | "যায়", 337 | "যার", 338 | "যারা", 339 | "যিনি", 340 | "যে", 341 | "যেখানে", 342 | "যেতে", 343 | "যেন", 344 | "যেমন", 345 | "র", 346 | "রকম", 347 | "রয়েছে", 348 | "রাখা", 349 | "রেখে", 350 | "লক্ষ", 351 | "শুধু", 352 | "শুরু", 353 | "সঙ্গে", 354 | "সঙ্গেও", 355 | "সব", 356 | "সবার", 357 | "সমস্ত", 358 | "সম্প্রতি", 359 | "সহ", 360 | "সহিত", 361 | "সাধারণ", 362 | "সামনে", 363 | "সি", 364 | "সুতরাং", 365 | "সে", 366 | "সেই", 367 | "সেখান", 368 | "সেখানে", 369 | "সেটা", 370 | "সেটাই", 371 | "সেটাও", 372 | "সেটি", 373 | "স্পষ্ট", 374 | "স্বয়ং", 375 | "হইতে", 376 | "হইবে", 377 | "হইয়া", 378 | "হওয়া", 379 | "হওয়ায়", 380 | "হওয়ার", 381 | "হচ্ছে", 382 | "হত", 383 | "হতে", 384 | "হতেই", 385 | "হন", 386 | "হবে", 387 | "হবেন", 388 | "হয়", 389 | "হয়তো", 390 | "হয়নি", 391 | "হয়ে", 392 | "হয়েই", 393 | "হয়েছিল", 394 | "হয়েছে", 395 | "হয়েছেন", 396 | "হল", 397 | "হলে", 398 | "হলেই", 399 | "হলেও", 400 | "হলো", 401 | "হাজার", 402 | "হিসাবে", 403 | "হৈলে", 404 | "হোক", 405 | "হয়", 406 | ]; 407 | -------------------------------------------------------------------------------- /src/stopwords/dan.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | pub static STOPWORDS_DAN: &[&str] = &[ 8 | "ad", "af", "aldrig", "alle", "alt", "anden", "andet", "andre", "at", "bare", "begge", "blev", 9 | "blive", "bliver", "da", "de", "dem", "den", "denne", "der", "deres", "det", "dette", "dig", 10 | "din", "dine", "disse", "dit", "dog", "du", "efter", "ej", "eller", "en", "end", "ene", 11 | "eneste", "enhver", "er", "et", "far", "fem", "fik", "fire", "flere", "fleste", "for", "fordi", 12 | "forrige", "fra", "få", "får", "før", "god", "godt", "ham", "han", "hans", "har", "havde", 13 | "have", "hej", "helt", "hende", "hendes", "her", "hos", "hun", "hvad", "hvem", "hver", 14 | "hvilken", "hvis", "hvor", "hvordan", "hvorfor", "hvornår", "i", "ikke", "ind", "ingen", 15 | "intet", "ja", "jeg", "jer", "jeres", "jo", "kan", "kom", "komme", "kommer", "kun", "kunne", 16 | "lad", "lav", "lidt", "lige", "lille", "man", "mand", "mange", "med", "meget", "men", "mens", 17 | "mere", "mig", "min", "mine", "mit", "mod", "må", "ned", "nej", "ni", "nogen", "noget", 18 | "nogle", "nu", "ny", "nyt", "når", "nær", "næste", "næsten", "og", "også", "okay", "om", "op", 19 | "os", "otte", "over", "på", "se", "seks", "selv", "ser", "ses", "sig", "sige", "sin", "sine", 20 | "sit", "skal", "skulle", "som", "stor", "store", "syv", "så", "sådan", "tag", "tage", "thi", 21 | "ti", "til", "to", "tre", "ud", "under", "var", "ved", "vi", "vil", "ville", "vor", "vores", 22 | "være", "været", 23 | ]; 24 | -------------------------------------------------------------------------------- /src/stopwords/ell.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | pub static STOPWORDS_ELL: &[&str] = &[ 8 | "αλλα", 9 | "αν", 10 | "αντι", 11 | "απο", 12 | "αυτα", 13 | "αυτεσ", 14 | "αυτη", 15 | "αυτο", 16 | "αυτοι", 17 | "αυτοσ", 18 | "αυτουσ", 19 | "αυτων", 20 | "αἱ", 21 | "αἳ", 22 | "αἵ", 23 | "αὐτόσ", 24 | "αὐτὸς", 25 | "αὖ", 26 | "γάρ", 27 | "γα", 28 | "γα^", 29 | "γε", 30 | "για", 31 | "γοῦν", 32 | "γὰρ", 33 | "δ'", 34 | "δέ", 35 | "δή", 36 | "δαί", 37 | "δαίσ", 38 | "δαὶ", 39 | "δαὶς", 40 | "δε", 41 | "δεν", 42 | "δι'", 43 | "διά", 44 | "διὰ", 45 | "δὲ", 46 | "δὴ", 47 | "δ’", 48 | "εαν", 49 | "ειμαι", 50 | "ειμαστε", 51 | "ειναι", 52 | "εισαι", 53 | "ειστε", 54 | "εκεινα", 55 | "εκεινεσ", 56 | "εκεινη", 57 | "εκεινο", 58 | "εκεινοι", 59 | "εκεινοσ", 60 | "εκεινουσ", 61 | "εκεινων", 62 | "ενω", 63 | "επ", 64 | "επι", 65 | "εἰ", 66 | "εἰμί", 67 | "εἰμὶ", 68 | "εἰς", 69 | "εἰσ", 70 | "εἴ", 71 | "εἴμι", 72 | "εἴτε", 73 | "η", 74 | "θα", 75 | "ισωσ", 76 | "κ", 77 | "καί", 78 | "καίτοι", 79 | "καθ", 80 | "και", 81 | "κατ", 82 | "κατά", 83 | "κατα", 84 | "κατὰ", 85 | "καὶ", 86 | "κι", 87 | "κἀν", 88 | "κἂν", 89 | "μέν", 90 | "μή", 91 | "μήτε", 92 | "μα", 93 | "με", 94 | "μεθ", 95 | "μετ", 96 | "μετά", 97 | "μετα", 98 | "μετὰ", 99 | "μη", 100 | "μην", 101 | "μἐν", 102 | "μὲν", 103 | "μὴ", 104 | "μὴν", 105 | "να", 106 | "ο", 107 | "οι", 108 | "ομωσ", 109 | "οπωσ", 110 | "οσο", 111 | "οτι", 112 | "οἱ", 113 | "οἳ", 114 | "οἷς", 115 | "οὐ", 116 | "οὐδ", 117 | "οὐδέ", 118 | "οὐδείσ", 119 | "οὐδεὶς", 120 | "οὐδὲ", 121 | "οὐδὲν", 122 | "οὐκ", 123 | "οὐχ", 124 | "οὐχὶ", 125 | "οὓς", 126 | "οὔτε", 127 | "οὕτω", 128 | "οὕτως", 129 | "οὕτωσ", 130 | "οὖν", 131 | "οὗ", 132 | "οὗτος", 133 | "οὗτοσ", 134 | "παρ", 135 | "παρά", 136 | "παρα", 137 | "παρὰ", 138 | "περί", 139 | "περὶ", 140 | "ποια", 141 | "ποιεσ", 142 | "ποιο", 143 | "ποιοι", 144 | "ποιοσ", 145 | "ποιουσ", 146 | "ποιων", 147 | "ποτε", 148 | "που", 149 | "ποῦ", 150 | "προ", 151 | "προσ", 152 | "πρόσ", 153 | "πρὸ", 154 | "πρὸς", 155 | "πως", 156 | "πωσ", 157 | "σε", 158 | "στη", 159 | "στην", 160 | "στο", 161 | "στον", 162 | "σόσ", 163 | "σύ", 164 | "σύν", 165 | "σὸς", 166 | "σὺ", 167 | "σὺν", 168 | "τά", 169 | "τήν", 170 | "τί", 171 | "τίς", 172 | "τίσ", 173 | "τα", 174 | "ταῖς", 175 | "τε", 176 | "την", 177 | "τησ", 178 | "τι", 179 | "τινα", 180 | "τις", 181 | "τισ", 182 | "το", 183 | "τοί", 184 | "τοι", 185 | "τοιοῦτος", 186 | "τοιοῦτοσ", 187 | "τον", 188 | "τοτε", 189 | "του", 190 | "τούσ", 191 | "τοὺς", 192 | "τοῖς", 193 | "τοῦ", 194 | "των", 195 | "τό", 196 | "τόν", 197 | "τότε", 198 | "τὰ", 199 | "τὰς", 200 | "τὴν", 201 | "τὸ", 202 | "τὸν", 203 | "τῆς", 204 | "τῆσ", 205 | "τῇ", 206 | "τῶν", 207 | "τῷ", 208 | "ωσ", 209 | "ἀλλ'", 210 | "ἀλλά", 211 | "ἀλλὰ", 212 | "ἀλλ’", 213 | "ἀπ", 214 | "ἀπό", 215 | "ἀπὸ", 216 | "ἀφ", 217 | "ἂν", 218 | "ἃ", 219 | "ἄλλος", 220 | "ἄλλοσ", 221 | "ἄν", 222 | "ἄρα", 223 | "ἅμα", 224 | "ἐάν", 225 | "ἐγώ", 226 | "ἐγὼ", 227 | "ἐκ", 228 | "ἐμόσ", 229 | "ἐμὸς", 230 | "ἐν", 231 | "ἐξ", 232 | "ἐπί", 233 | "ἐπεὶ", 234 | "ἐπὶ", 235 | "ἐστι", 236 | "ἐφ", 237 | "ἐὰν", 238 | "ἑαυτοῦ", 239 | "ἔτι", 240 | "ἡ", 241 | "ἢ", 242 | "ἣ", 243 | "ἤ", 244 | "ἥ", 245 | "ἧς", 246 | "ἵνα", 247 | "ὁ", 248 | "ὃ", 249 | "ὃν", 250 | "ὃς", 251 | "ὅ", 252 | "ὅδε", 253 | "ὅθεν", 254 | "ὅπερ", 255 | "ὅς", 256 | "ὅσ", 257 | "ὅστις", 258 | "ὅστισ", 259 | "ὅτε", 260 | "ὅτι", 261 | "ὑμόσ", 262 | "ὑπ", 263 | "ὑπέρ", 264 | "ὑπό", 265 | "ὑπὲρ", 266 | "ὑπὸ", 267 | "ὡς", 268 | "ὡσ", 269 | "ὥς", 270 | "ὥστε", 271 | "ὦ", 272 | "ᾧ", 273 | ]; 274 | -------------------------------------------------------------------------------- /src/stopwords/epo.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | pub static STOPWORDS_EPO: &[&str] = &[ 8 | "adiaŭ", 9 | "ajn", 10 | "al", 11 | "ankoraŭ", 12 | "antaŭ", 13 | "aŭ", 14 | "bonan", 15 | "bonvole", 16 | "bonvolu", 17 | "bv", 18 | "ci", 19 | "cia", 20 | "cian", 21 | "cin", 22 | "d-ro", 23 | "da", 24 | "de", 25 | "dek", 26 | "deka", 27 | "do", 28 | "doktor'", 29 | "doktoro", 30 | "du", 31 | "dua", 32 | "dum", 33 | "eble", 34 | "ekz", 35 | "ekzemple", 36 | "en", 37 | "estas", 38 | "estis", 39 | "estos", 40 | "estu", 41 | "estus", 42 | "eĉ", 43 | "f-no", 44 | "feliĉan", 45 | "for", 46 | "fraŭlino", 47 | "ha", 48 | "havas", 49 | "havis", 50 | "havos", 51 | "havu", 52 | "havus", 53 | "he", 54 | "ho", 55 | "hu", 56 | "ili", 57 | "ilia", 58 | "ilian", 59 | "ilin", 60 | "inter", 61 | "io", 62 | "ion", 63 | "iu", 64 | "iujn", 65 | "iun", 66 | "ja", 67 | "jam", 68 | "je", 69 | "jes", 70 | "k", 71 | "kaj", 72 | "ke", 73 | "kio", 74 | "kion", 75 | "kiu", 76 | "kiujn", 77 | "kiun", 78 | "kvankam", 79 | "kvar", 80 | "kvara", 81 | "kvazaŭ", 82 | "kvin", 83 | "kvina", 84 | "la", 85 | "li", 86 | "lia", 87 | "lian", 88 | "lin", 89 | "malantaŭ", 90 | "male", 91 | "malgraŭ", 92 | "mem", 93 | "mi", 94 | "mia", 95 | "mian", 96 | "min", 97 | "minus", 98 | "naŭ", 99 | "naŭa", 100 | "ne", 101 | "nek", 102 | "nenio", 103 | "nenion", 104 | "neniu", 105 | "neniun", 106 | "nepre", 107 | "ni", 108 | "nia", 109 | "nian", 110 | "nin", 111 | "nu", 112 | "nun", 113 | "nur", 114 | "ok", 115 | "oka", 116 | "oni", 117 | "onia", 118 | "onian", 119 | "onin", 120 | "plej", 121 | "pli", 122 | "plu", 123 | "plus", 124 | "por", 125 | "post", 126 | "preter", 127 | "s-no", 128 | "s-ro", 129 | "se", 130 | "sed", 131 | "sep", 132 | "sepa", 133 | "ses", 134 | "sesa", 135 | "si", 136 | "sia", 137 | "sian", 138 | "sin", 139 | "sinjor'", 140 | "sinjorino", 141 | "sinjoro", 142 | "sub", 143 | "super", 144 | "supren", 145 | "sur", 146 | "tamen", 147 | "tio", 148 | "tion", 149 | "tiu", 150 | "tiujn", 151 | "tiun", 152 | "tra", 153 | "tri", 154 | "tria", 155 | "tuj", 156 | "tute", 157 | "unu", 158 | "unua", 159 | "ve", 160 | "verŝajne", 161 | "vi", 162 | "via", 163 | "vian", 164 | "vin", 165 | "ĉi", 166 | "ĉio", 167 | "ĉion", 168 | "ĉiu", 169 | "ĉiujn", 170 | "ĉiun", 171 | "ĉu", 172 | "ĝi", 173 | "ĝia", 174 | "ĝian", 175 | "ĝin", 176 | "ĝis", 177 | "ĵus", 178 | "ŝi", 179 | "ŝia", 180 | "ŝin", 181 | ]; 182 | -------------------------------------------------------------------------------- /src/stopwords/est.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | pub static STOPWORDS_EST: &[&str] = &[ 8 | "aga", "ei", "et", "ja", "jah", "kas", "kui", "kõik", "ma", "me", "mida", "midagi", "mind", 9 | "minu", "mis", "mu", "mul", "mulle", "nad", "nii", "oled", "olen", "oli", "oma", "on", "pole", 10 | "sa", "seda", "see", "selle", "siin", "siis", "ta", "te", "ära", 11 | ]; 12 | -------------------------------------------------------------------------------- /src/stopwords/guj.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | pub static STOPWORDS_GUJ: &[&str] = &[ 8 | "અંગે", 9 | "અંદર", 10 | "અથવા", 11 | "અને", 12 | "અમને", 13 | "અમારું", 14 | "અમે", 15 | "અહીં", 16 | "આ", 17 | "આગળ", 18 | "આથી", 19 | "આનું", 20 | "આને", 21 | "આપણને", 22 | "આપણું", 23 | "આપણે", 24 | "આપી", 25 | "આર", 26 | "આવી", 27 | "આવે", 28 | "ઉપર", 29 | "ઉભા", 30 | "ઊંચે", 31 | "ઊભું", 32 | "એ", 33 | "એક", 34 | "એન", 35 | "એના", 36 | "એનાં", 37 | "એની", 38 | "એનું", 39 | "એને", 40 | "એનો", 41 | "એમ", 42 | "એવા", 43 | "એવાં", 44 | "એવી", 45 | "એવું", 46 | "એવો", 47 | "ઓછું", 48 | "કંઈક", 49 | "કઈ", 50 | "કયું", 51 | "કયો", 52 | "કરતાં", 53 | "કરવું", 54 | "કરી", 55 | "કરીએ", 56 | "કરું", 57 | "કરે", 58 | "કરેલું", 59 | "કર્યા", 60 | "કર્યાં", 61 | "કર્યું", 62 | "કર્યો", 63 | "કાંઈ", 64 | "કે", 65 | "કેટલું", 66 | "કેમ", 67 | "કેવી", 68 | "કેવું", 69 | "કોઈ", 70 | "કોઈક", 71 | "કોણ", 72 | "કોણે", 73 | "કોને", 74 | "ક્યાં", 75 | "ક્યારે", 76 | "ખૂબ", 77 | "ગઈ", 78 | "ગયા", 79 | "ગયાં", 80 | "ગયું", 81 | "ગયો", 82 | "ઘણું", 83 | "છ", 84 | "છતાં", 85 | "છીએ", 86 | "છું", 87 | "છે", 88 | "છેક", 89 | "છો", 90 | "જ", 91 | "જાય", 92 | "જી", 93 | "જે", 94 | "જેટલું", 95 | "જેને", 96 | "જેમ", 97 | "જેવી", 98 | "જેવું", 99 | "જેવો", 100 | "જો", 101 | "જોઈએ", 102 | "જ્યાં", 103 | "જ્યારે", 104 | "ઝાઝું", 105 | "તને", 106 | "તમને", 107 | "તમારું", 108 | "તમે", 109 | "તા", 110 | "તારાથી", 111 | "તારામાં", 112 | "તારું", 113 | "તું", 114 | "તે", 115 | "તેં", 116 | "તેઓ", 117 | "તેણે", 118 | "તેથી", 119 | "તેના", 120 | "તેની", 121 | "તેનું", 122 | "તેને", 123 | "તેમ", 124 | "તેમનું", 125 | "તેમને", 126 | "તેવી", 127 | "તેવું", 128 | "તો", 129 | "ત્યાં", 130 | "ત્યારે", 131 | "થઇ", 132 | "થઈ", 133 | "થઈએ", 134 | "થતા", 135 | "થતાં", 136 | "થતી", 137 | "થતું", 138 | "થતો", 139 | "થયા", 140 | "થયાં", 141 | "થયું", 142 | "થયેલું", 143 | "થયો", 144 | "થવું", 145 | "થાઉં", 146 | "થાઓ", 147 | "થાય", 148 | "થી", 149 | "થોડું", 150 | "દરેક", 151 | "ન", 152 | "નં", 153 | "નં.", 154 | "નથી", 155 | "નહિ", 156 | "નહી", 157 | "નહીં", 158 | "ના", 159 | "ની", 160 | "નીચે", 161 | "નું", 162 | "ને", 163 | "નો", 164 | "પછી", 165 | "પણ", 166 | "પર", 167 | "પરંતુ", 168 | "પહેલાં", 169 | "પાછળ", 170 | "પાસે", 171 | "પોતાનું", 172 | "પ્રત્યેક", 173 | "ફક્ત", 174 | "ફરી", 175 | "ફરીથી", 176 | "બંને", 177 | "બધા", 178 | "બધું", 179 | "બની", 180 | "બહાર", 181 | "બહુ", 182 | "બાદ", 183 | "બે", 184 | "મને", 185 | "મા", 186 | "માં", 187 | "માટે", 188 | "માત્ર", 189 | "મારું", 190 | "મી", 191 | "મૂકવું", 192 | "મૂકી", 193 | "મૂક્યા", 194 | "મૂક્યાં", 195 | "મૂક્યું", 196 | "મેં", 197 | "રહી", 198 | "રહે", 199 | "રહેવું", 200 | "રહ્યા", 201 | "રહ્યાં", 202 | "રહ્યો", 203 | "રીતે", 204 | "રૂ.", 205 | "રૂા", 206 | "લેતા", 207 | "લેતું", 208 | "લેવા", 209 | "વગેરે", 210 | "વધુ", 211 | "શકે", 212 | "શા", 213 | "શું", 214 | "સરખું", 215 | "સામે", 216 | "સુધી", 217 | "હતા", 218 | "હતાં", 219 | "હતી", 220 | "હતું", 221 | "હવે", 222 | "હશે", 223 | "હશો", 224 | "હા", 225 | "હું", 226 | "હો", 227 | "હોઈ", 228 | "હોઈશ", 229 | "હોઈશું", 230 | "હોય", 231 | "હોવા", 232 | ]; 233 | -------------------------------------------------------------------------------- /src/stopwords/heb.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | pub static STOPWORDS_HEB: &[&str] = &[ 8 | "אבל", 9 | "או", 10 | "אולי", 11 | "אותה", 12 | "אותו", 13 | "אותי", 14 | "אותך", 15 | "אותם", 16 | "אותן", 17 | "אותנו", 18 | "אז", 19 | "אחר", 20 | "אחרות", 21 | "אחרי", 22 | "אחריכן", 23 | "אחרים", 24 | "אחרת", 25 | "אי", 26 | "איזה", 27 | "איך", 28 | "אין", 29 | "איפה", 30 | "איתה", 31 | "איתו", 32 | "איתי", 33 | "איתך", 34 | "איתכם", 35 | "איתכן", 36 | "איתם", 37 | "איתן", 38 | "איתנו", 39 | "אך", 40 | "אל", 41 | "אלה", 42 | "אלו", 43 | "אם", 44 | "אנחנו", 45 | "אני", 46 | "אס", 47 | "אף", 48 | "אצל", 49 | "אשר", 50 | "את", 51 | "אתה", 52 | "אתכם", 53 | "אתכן", 54 | "אתם", 55 | "אתן", 56 | "באיזומידה", 57 | "באמצע", 58 | "באמצעות", 59 | "בגלל", 60 | "בין", 61 | "בלי", 62 | "במידה", 63 | "במקוםשבו", 64 | "ברם", 65 | "בשביל", 66 | "בשעהש", 67 | "בתוך", 68 | "גם", 69 | "דרך", 70 | "הוא", 71 | "היא", 72 | "היה", 73 | "היכן", 74 | "היתה", 75 | "היתי", 76 | "הם", 77 | "הן", 78 | "הנה", 79 | "הסיבהשבגללה", 80 | "הרי", 81 | "ואילו", 82 | "ואת", 83 | "זאת", 84 | "זה", 85 | "זות", 86 | "יהיה", 87 | "יוכל", 88 | "יוכלו", 89 | "יותרמדי", 90 | "יכול", 91 | "יכולה", 92 | "יכולות", 93 | "יכולים", 94 | "יכל", 95 | "יכלה", 96 | "יכלו", 97 | "יש", 98 | "כאן", 99 | "כאשר", 100 | "כולם", 101 | "כולן", 102 | "כזה", 103 | "כי", 104 | "כיצד", 105 | "כך", 106 | "ככה", 107 | "כל", 108 | "כלל", 109 | "כמו", 110 | "כן", 111 | "כפי", 112 | "כש", 113 | "לא", 114 | "לאו", 115 | "לאיזותכלית", 116 | "לאן", 117 | "לבין", 118 | "לה", 119 | "להיות", 120 | "להם", 121 | "להן", 122 | "לו", 123 | "לי", 124 | "לכם", 125 | "לכן", 126 | "למה", 127 | "למטה", 128 | "למעלה", 129 | "למקוםשבו", 130 | "למרות", 131 | "לנו", 132 | "לעבר", 133 | "לעיכן", 134 | "לפיכך", 135 | "לפני", 136 | "מאד", 137 | "מאחורי", 138 | "מאיזוסיבה", 139 | "מאין", 140 | "מאיפה", 141 | "מבלי", 142 | "מבעד", 143 | "מדוע", 144 | "מה", 145 | "מהיכן", 146 | "מול", 147 | "מחוץ", 148 | "מי", 149 | "מכאן", 150 | "מכיוון", 151 | "מלבד", 152 | "מן", 153 | "מנין", 154 | "מסוגל", 155 | "מעט", 156 | "מעטים", 157 | "מעל", 158 | "מצד", 159 | "מקוםבו", 160 | "מתחת", 161 | "מתי", 162 | "נגד", 163 | "נגר", 164 | "נו", 165 | "עד", 166 | "עז", 167 | "על", 168 | "עלי", 169 | "עליה", 170 | "עליהם", 171 | "עליהן", 172 | "עליו", 173 | "עליך", 174 | "עליכם", 175 | "עלינו", 176 | "עם", 177 | "עצמה", 178 | "עצמהם", 179 | "עצמהן", 180 | "עצמו", 181 | "עצמי", 182 | "עצמם", 183 | "עצמן", 184 | "עצמנו", 185 | "פה", 186 | "רק", 187 | "שוב", 188 | "של", 189 | "שלה", 190 | "שלהם", 191 | "שלהן", 192 | "שלו", 193 | "שלי", 194 | "שלך", 195 | "שלכה", 196 | "שלכם", 197 | "שלכן", 198 | "שלנו", 199 | "שם", 200 | "תהיה", 201 | "תחת", 202 | ]; 203 | -------------------------------------------------------------------------------- /src/stopwords/hin.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | pub static STOPWORDS_HIN: &[&str] = &[ 8 | "अंदर", 9 | "अत", 10 | "अदि", 11 | "अप", 12 | "अपना", 13 | "अपनि", 14 | "अपनी", 15 | "अपने", 16 | "अभि", 17 | "अभी", 18 | "आदि", 19 | "आप", 20 | "इंहिं", 21 | "इंहें", 22 | "इंहों", 23 | "इतयादि", 24 | "इत्यादि", 25 | "इन", 26 | "इनका", 27 | "इन्हीं", 28 | "इन्हें", 29 | "इन्हों", 30 | "इस", 31 | "इसका", 32 | "इसकि", 33 | "इसकी", 34 | "इसके", 35 | "इसमें", 36 | "इसि", 37 | "इसी", 38 | "इसे", 39 | "उंहिं", 40 | "उंहें", 41 | "उंहों", 42 | "उन", 43 | "उनका", 44 | "उनकि", 45 | "उनकी", 46 | "उनके", 47 | "उनको", 48 | "उन्हीं", 49 | "उन्हें", 50 | "उन्हों", 51 | "उस", 52 | "उसके", 53 | "उसि", 54 | "उसी", 55 | "उसे", 56 | "एक", 57 | "एवं", 58 | "एस", 59 | "एसे", 60 | "ऐसे", 61 | "ओर", 62 | "और", 63 | "कइ", 64 | "कई", 65 | "कर", 66 | "करता", 67 | "करते", 68 | "करना", 69 | "करने", 70 | "करें", 71 | "कहते", 72 | "कहा", 73 | "का", 74 | "काफि", 75 | "काफ़ी", 76 | "कि", 77 | "किंहें", 78 | "किंहों", 79 | "कितना", 80 | "किन्हें", 81 | "किन्हों", 82 | "किया", 83 | "किर", 84 | "किस", 85 | "किसि", 86 | "किसी", 87 | "किसे", 88 | "की", 89 | "कुछ", 90 | "कुल", 91 | "के", 92 | "को", 93 | "कोइ", 94 | "कोई", 95 | "कोन", 96 | "कोनसा", 97 | "कौन", 98 | "कौनसा", 99 | "गया", 100 | "घर", 101 | "जब", 102 | "जहाँ", 103 | "जहां", 104 | "जा", 105 | "जिंहें", 106 | "जिंहों", 107 | "जितना", 108 | "जिधर", 109 | "जिन", 110 | "जिन्हें", 111 | "जिन्हों", 112 | "जिस", 113 | "जिसे", 114 | "जीधर", 115 | "जेसा", 116 | "जेसे", 117 | "जैसा", 118 | "जैसे", 119 | "जो", 120 | "तक", 121 | "तब", 122 | "तरह", 123 | "तिंहें", 124 | "तिंहों", 125 | "तिन", 126 | "तिन्हें", 127 | "तिन्हों", 128 | "तिस", 129 | "तिसे", 130 | "तो", 131 | "था", 132 | "थि", 133 | "थी", 134 | "थे", 135 | "दबारा", 136 | "दवारा", 137 | "दिया", 138 | "दुसरा", 139 | "दुसरे", 140 | "दूसरे", 141 | "दो", 142 | "द्वारा", 143 | "न", 144 | "नहिं", 145 | "नहीं", 146 | "ना", 147 | "निचे", 148 | "निहायत", 149 | "नीचे", 150 | "ने", 151 | "पर", 152 | "पहले", 153 | "पुरा", 154 | "पूरा", 155 | "पे", 156 | "फिर", 157 | "बनि", 158 | "बनी", 159 | "बहि", 160 | "बही", 161 | "बहुत", 162 | "बाद", 163 | "बाला", 164 | "बिलकुल", 165 | "भि", 166 | "भितर", 167 | "भी", 168 | "भीतर", 169 | "मगर", 170 | "मानो", 171 | "मे", 172 | "में", 173 | "यदि", 174 | "यह", 175 | "यहाँ", 176 | "यहां", 177 | "यहि", 178 | "यही", 179 | "या", 180 | "यिह", 181 | "ये", 182 | "रखें", 183 | "रवासा", 184 | "रहा", 185 | "रहे", 186 | "ऱ्वासा", 187 | "लिए", 188 | "लिये", 189 | "लेकिन", 190 | "व", 191 | "वगेरह", 192 | "वरग", 193 | "वर्ग", 194 | "वह", 195 | "वहाँ", 196 | "वहां", 197 | "वहिं", 198 | "वहीं", 199 | "वाले", 200 | "वुह", 201 | "वे", 202 | "वग़ैरह", 203 | "संग", 204 | "सकता", 205 | "सकते", 206 | "सबसे", 207 | "सभि", 208 | "सभी", 209 | "साथ", 210 | "साबुत", 211 | "साभ", 212 | "सारा", 213 | "से", 214 | "सो", 215 | "हि", 216 | "ही", 217 | "हुअ", 218 | "हुआ", 219 | "हुइ", 220 | "हुई", 221 | "हुए", 222 | "हे", 223 | "हें", 224 | "है", 225 | "हैं", 226 | "हो", 227 | "होता", 228 | "होति", 229 | "होती", 230 | "होते", 231 | "होना", 232 | "होने", 233 | ]; 234 | -------------------------------------------------------------------------------- /src/stopwords/hrv.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | pub static STOPWORDS_HRV: &[&str] = &[ 8 | "a", "ako", "ali", "bi", "bih", "bila", "bili", "bilo", "bio", "bismo", "biste", "biti", 9 | "bumo", "da", "do", "duž", "ga", "hoće", "hoćemo", "hoćete", "hoćeš", "hoću", "i", "iako", 10 | "ih", "ili", "iz", "ja", "je", "jedna", "jedne", "jedno", "jer", "jesam", "jesi", "jesmo", 11 | "jest", "jeste", "jesu", "jim", "joj", "još", "ju", "kada", "kako", "kao", "koja", "koje", 12 | "koji", "kojima", "koju", "kroz", "li", "me", "mene", "meni", "mi", "mimo", "moj", "moja", 13 | "moje", "mu", "na", "nad", "nakon", "nam", "nama", "nas", "naš", "naša", "naše", "našeg", "ne", 14 | "nego", "neka", "neki", "nekog", "neku", "nema", "netko", "neće", "nećemo", "nećete", "nećeš", 15 | "neću", "nešto", "ni", "nije", "nikoga", "nikoje", "nikoju", "nisam", "nisi", "nismo", "niste", 16 | "nisu", "njega", "njegov", "njegova", "njegovo", "njemu", "njezin", "njezina", "njezino", 17 | "njih", "njihov", "njihova", "njihovo", "njim", "njima", "njoj", "nju", "no", "o", "od", 18 | "odmah", "on", "ona", "oni", "ono", "ova", "pa", "pak", "po", "pod", "pored", "prije", "s", 19 | "sa", "sam", "samo", "se", "sebe", "sebi", "si", "smo", "ste", "su", "sve", "svi", "svog", 20 | "svoj", "svoja", "svoje", "svom", "ta", "tada", "taj", "tako", "te", "tebe", "tebi", "ti", 21 | "to", "toj", "tome", "tu", "tvoj", "tvoja", "tvoje", "u", "uz", "vam", "vama", "vas", "vaš", 22 | "vaša", "vaše", "već", "vi", "vrlo", "za", "zar", "će", "ćemo", "ćete", "ćeš", "ću", "što", 23 | ]; 24 | -------------------------------------------------------------------------------- /src/stopwords/hye.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2022, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | pub static STOPWORDS_HYE: &[&str] = &[ 8 | "այդ", 9 | "այլ", 10 | "այն", 11 | "այս", 12 | "դու", 13 | "դուք", 14 | "եմ", 15 | "են", 16 | "ենք", 17 | "ես", 18 | "եք", 19 | "է", 20 | "էի", 21 | "էին", 22 | "էինք", 23 | "էիր", 24 | "էիք", 25 | "էր", 26 | "ըստ", 27 | "թ", 28 | "ի", 29 | "ին", 30 | "իսկ", 31 | "իր", 32 | "կամ", 33 | "համար", 34 | "հետ", 35 | "հետո", 36 | "մենք", 37 | "մեջ", 38 | "մի", 39 | "ն", 40 | "նա", 41 | "նաև", 42 | "նրա", 43 | "նրանք", 44 | "որ", 45 | "որը", 46 | "որոնք", 47 | "որպես", 48 | "ու", 49 | "ում", 50 | "պիտի", 51 | "վրա", 52 | "և", 53 | ]; 54 | -------------------------------------------------------------------------------- /src/stopwords/jav.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | // Notice: we do not have stopwords for this language yet. 8 | pub static STOPWORDS_JAV: &[&str] = &[]; 9 | -------------------------------------------------------------------------------- /src/stopwords/jpn.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | pub static STOPWORDS_JPN: &[&str] = &[ 8 | "あそこ", 9 | "あっ", 10 | "あの", 11 | "あのかた", 12 | "あの人", 13 | "あり", 14 | "あります", 15 | "ある", 16 | "あれ", 17 | "い", 18 | "いう", 19 | "います", 20 | "いる", 21 | "う", 22 | "うち", 23 | "え", 24 | "お", 25 | "および", 26 | "おり", 27 | "おります", 28 | "か", 29 | "かつて", 30 | "から", 31 | "が", 32 | "き", 33 | "ここ", 34 | "こちら", 35 | "こと", 36 | "この", 37 | "これ", 38 | "これら", 39 | "さ", 40 | "さらに", 41 | "し", 42 | "しかし", 43 | "する", 44 | "ず", 45 | "せ", 46 | "せる", 47 | "そこ", 48 | "そして", 49 | "その", 50 | "その他", 51 | "その後", 52 | "それ", 53 | "それぞれ", 54 | "それで", 55 | "た", 56 | "ただし", 57 | "たち", 58 | "ため", 59 | "たり", 60 | "だ", 61 | "だっ", 62 | "だれ", 63 | "つ", 64 | "て", 65 | "で", 66 | "でき", 67 | "できる", 68 | "です", 69 | "では", 70 | "でも", 71 | "と", 72 | "という", 73 | "といった", 74 | "とき", 75 | "ところ", 76 | "として", 77 | "とともに", 78 | "とも", 79 | "と共に", 80 | "どこ", 81 | "どの", 82 | "な", 83 | "ない", 84 | "なお", 85 | "なかっ", 86 | "ながら", 87 | "なく", 88 | "なっ", 89 | "など", 90 | "なに", 91 | "なら", 92 | "なり", 93 | "なる", 94 | "なん", 95 | "に", 96 | "において", 97 | "における", 98 | "について", 99 | "にて", 100 | "によって", 101 | "により", 102 | "による", 103 | "に対して", 104 | "に対する", 105 | "に関する", 106 | "の", 107 | "ので", 108 | "のみ", 109 | "は", 110 | "ば", 111 | "へ", 112 | "ほか", 113 | "ほとんど", 114 | "ほど", 115 | "ます", 116 | "また", 117 | "または", 118 | "まで", 119 | "も", 120 | "もの", 121 | "ものの", 122 | "や", 123 | "よう", 124 | "より", 125 | "ら", 126 | "られ", 127 | "られる", 128 | "れ", 129 | "れる", 130 | "を", 131 | "ん", 132 | "何", 133 | "及び", 134 | "彼", 135 | "彼女", 136 | "我々", 137 | "特に", 138 | "私", 139 | "私達", 140 | "貴方", 141 | "貴方方", 142 | ]; 143 | -------------------------------------------------------------------------------- /src/stopwords/kan.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | // Notice: we do not have stopwords for this language yet. 8 | pub static STOPWORDS_KAN: &[&str] = &[ 9 | "ಆ", 10 | "ಈ", 11 | "ಅಥವಾ", 12 | "ಮತ್ತು", 13 | "ಆದರೆ", 14 | "ಎಂದು", 15 | "ಅವರ", 16 | "ಎಂಬ", 17 | "ಅವರು", 18 | "ಬಗ್ಗೆ", 19 | "ಇದೆ", 20 | "ಇದು", 21 | "ಮೂಲಕ", 22 | "ಅದು", 23 | "ಮೇಲೆ", 24 | "ಈಗ", 25 | "ಹಾಗೂ", 26 | "ಹೆಚ್ಚು", 27 | "ಅವರಿಗೆ", 28 | "ತಮ್ಮ", 29 | "ಮಾಡಿ", 30 | "ನಮ್ಮ", 31 | "ಮಾತ್ರ", 32 | "ದೊಡ್ಡ", 33 | "ಅದೇ", 34 | "ಕೂಡ", 35 | "ಯಾವುದೇ", 36 | "ಯಾವ", 37 | "ಆಗ", 38 | "ತುಂಬಾ", 39 | "ನಾವು", 40 | "ದಿನ", 41 | "ಬೇರೆ", 42 | "ಅವರನ್ನು", 43 | "ಎಲ್ಲಾ", 44 | "ನೀವು", 45 | "ಸಾಕಷ್ಟು", 46 | "ಕನ್ನಡ", 47 | "ಹೊಸ", 48 | "ಮುಂದೆ", 49 | "ಹೇಗೆ", 50 | "ನಂತರ", 51 | "ಇಲ್ಲಿ", 52 | "ಕೆಲಸ", 53 | "ಬಳಿಕ", 54 | "ಒಳ್ಳೆಯ", 55 | "ಹಾಗಾಗಿ", 56 | "ಜನ", 57 | "ಅದನ್ನು", 58 | "ಬಂದ", 59 | "ಕಾರಣ", 60 | "ಅವಕಾಶ", 61 | "ವರ್ಷ", 62 | "ನಿಮ್ಮ", 63 | "ಇತ್ತು", 64 | "ಹೇಳಿ", 65 | "ಮಾಡಿದ", 66 | "ಅದಕ್ಕೆ", 67 | "ಆಗಿ", 68 | "ಎಂಬುದು", 69 | "ಅಂತ", 70 | "ಕೆಲವು", 71 | "ಮೊದಲು", 72 | "ಬಂದು", 73 | "ಇದೇ", 74 | "ನೋಡಿ", 75 | "ಕೇವಲ", 76 | "ಎರಡು", 77 | "ಇನ್ನು", 78 | "ಅಷ್ಟೇ", 79 | "ಎಷ್ಟು", 80 | "ಮಾಡಬೇಕು", 81 | "ಹೀಗೆ", 82 | "ಕುರಿತು", 83 | "ಎಂದರೆ", 84 | "ಇನ್ನೂ", 85 | "ಮತ್ತೆ", 86 | "ಏನು", 87 | "ಮುಂದಿನ", 88 | "ಮಾಡುವ", 89 | "ವೇಳೆ", 90 | "ಜೊತೆಗೆ", 91 | ]; 92 | -------------------------------------------------------------------------------- /src/stopwords/khm.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | pub static STOPWORDS_KHM: &[&str] = &[ 8 | "ៗ", 9 | "។ល។", 10 | "៚", 11 | "។", 12 | "៕", 13 | "៖", 14 | "៙", 15 | "០", 16 | "១", 17 | "២", 18 | "៣", 19 | "៤", 20 | "៥", 21 | "៦", 22 | "៧", 23 | "៨", 24 | "៩", 25 | "៛", 26 | "នេះ", 27 | "នោះ", 28 | "ខ្ញុំ", 29 | "អ្នក", 30 | "គាត់", 31 | "នាង", 32 | "ពួក", 33 | "យើង", 34 | "ពួកគេ", 35 | "លោក", 36 | "អ្វី", 37 | "បាន", 38 | "ការ", 39 | "នៅ", 40 | "និង", 41 | "ដែល", 42 | "មាន", 43 | "ជា", 44 | "ថា", 45 | "ក្នុង", 46 | "របស់", 47 | "ពី", 48 | "មួយ", 49 | "នឹង", 50 | "ឲ្យ", 51 | "មិន", 52 | "ទៅ", 53 | "តែ", 54 | "ត្រូវ", 55 | "ដោយ", 56 | "ហើយ", 57 | "ឆ្នាំ", 58 | "ពេល", 59 | "គេ", 60 | "ប្រទេស", 61 | "អាច", 62 | "គឺ", 63 | "ក្រុម", 64 | "ធ្វើ", 65 | "ក៏", 66 | "លើ", 67 | "នៃ", 68 | "ដើម្បី", 69 | "មក", 70 | "ទី", 71 | "តាម", 72 | "ទេ", 73 | "ដល់", 74 | "វា", 75 | "ដែរ", 76 | "ខ្លួន", 77 | "សម្រាប់", 78 | "ក្រុមហ៊ុន", 79 | "ថ្ងៃ", 80 | "ចំនួន", 81 | "កម្ពុជា", 82 | "ឡើង", 83 | "ទៀត", 84 | "ទាំង", 85 | "បើ", 86 | "និយាយ", 87 | "ទទួល", 88 | "ដ៏", 89 | "ច្រើន", 90 | "ផង", 91 | "ដឹង", 92 | "ជាមួយ", 93 | "គ្នា", 94 | "ខែ", 95 | "នាក់", 96 | "កំពុង", 97 | "យ៉ាង", 98 | "តម្លៃ", 99 | "ប្រកួត", 100 | "ក្រុង", 101 | "តំបន់", 102 | "ភាព", 103 | "យក", 104 | "ជាង", 105 | "ចូល", 106 | "នូវ", 107 | "កាលពី", 108 | "ណា", 109 | "បន្ត", 110 | "ជាតិ", 111 | "រូប", 112 | "មនុស្ស", 113 | "កាល", 114 | "ចំពោះ", 115 | "ដូច", 116 | "ខណៈ", 117 | "វិញ", 118 | "មុន", 119 | "ភ្នំពេញ", 120 | "លើក", 121 | "ល្អ", 122 | "ខាង", 123 | "ដុល្លារ", 124 | "ឃើញ", 125 | "បញ្ហា", 126 | "ប្រើ", 127 | "ចាប់", 128 | "ទឹក", 129 | "តើ", 130 | "ប្រាក់", 131 | "ធំ", 132 | "ខ្មែរ", 133 | "ចេញ", 134 | "ខេត្ត", 135 | "ផ្នែក", 136 | "ថ្មី", 137 | "បង្ហាញ", 138 | "ស៊ី", 139 | "អាមេរិក", 140 | "គឺជា", 141 | "លក់", 142 | "ចង់", 143 | "ដាក់", 144 | "ម្នាក់", 145 | "រួម", 146 | "រថយន្ត", 147 | "ផ្លូវ", 148 | "ភាគរយ", 149 | "កើន", 150 | "ជួយ", 151 | "ពីរ", 152 | "លាន", 153 | "ផ្តល់", 154 | "រដ្ឋ", 155 | "ខ្លាំង", 156 | "ជាច្រើន", 157 | "ទីក្រុង", 158 | "ជន", 159 | "កីឡា", 160 | "ក្រោយ", 161 | "ប្រាប់", 162 | "រដ្ឋាភិបាល", 163 | "កាន់", 164 | "ការងារ", 165 | "រក", 166 | "ព្រោះ", 167 | "រឿង", 168 | "ប៉ុន្តែ", 169 | "ឡើយ", 170 | "មុខ", 171 | "ថ្លែង", 172 | "ធ្វើឲ្យ", 173 | "បី", 174 | "នាំ", 175 | "ច្បាប់", 176 | "ដី", 177 | "ដូចជា", 178 | "កម", 179 | "ផ្ទះ", 180 | "បញ្ជាក់", 181 | "ចុះ", 182 | "បំផុត", 183 | "ចិត្ត", 184 | "បែប", 185 | "ចិន", 186 | "កីឡាករ", 187 | "កញ្ញា", 188 | "គម្រោង", 189 | "បង្កើត", 190 | "នា", 191 | "សារ", 192 | "សេដ្ឋកិច្ច", 193 | "ធនាគារ", 194 | "អស់", 195 | "ភាគ", 196 | "កូន", 197 | "ប្រធាន", 198 | "ផ្សារ", 199 | "ខ្ពស់", 200 | "គ្មាន", 201 | "ណាស់", 202 | "សម្រេច", 203 | "គួរ", 204 | "គ្រប់", 205 | "ប្រជាជន", 206 | "បន្ថែម", 207 | "រយៈ", 208 | "ខ្លះ", 209 | "បទ", 210 | "ទិញ", 211 | "ទើប", 212 | "វិនិយោគ", 213 | "មានការ", 214 | "លេខ", 215 | "ថៃ", 216 | "មើល", 217 | "បុរស", 218 | "យុវជន", 219 | "ស្រី", 220 | "នយោបាយ", 221 | "កន្លែង", 222 | "គិត", 223 | "បើក", 224 | "ដូច្នេះ", 225 | "រូបថត", 226 | "វាយ", 227 | "ប្រភេទ", 228 | "សំខាន់", 229 | "បន្ទាប់ពី", 230 | "កម្មវិធី", 231 | "រយៈពេល", 232 | "ផលិត", 233 | "ឈ្នះ", 234 | "ពិភពលោក", 235 | "ភ្ញៀវ", 236 | "ដោយសារ", 237 | "ស្រុក", 238 | "អាយុ", 239 | "ចំណាយ", 240 | "អំពី", 241 | "ហ៊ុន", 242 | "សិក្សា", 243 | ]; 244 | -------------------------------------------------------------------------------- /src/stopwords/lat.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | // Notice: we do not have stopwords for this language yet. 8 | pub static STOPWORDS_LAT: &[&str] = &[ 9 | "a", "ab", "ac", "ad", "at", "atque", "aut", "autem", "cum", "de", "dum", "e", "erant", "erat", 10 | "est", "et", "etiam", "ex", "haec", "hic", "hoc", "in", "ita", "me", "nec", "neque", "non", 11 | "per", "qua", "quae", "quam", "qui", "quibus", "quidem", "quo", "quod", "re", "rebus", "rem", 12 | "res", "sed", "si", "sic", "sunt", "tamen", "tandem", "te", "ut", "vel", 13 | ]; 14 | -------------------------------------------------------------------------------- /src/stopwords/lav.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | pub static STOPWORDS_LAV: &[&str] = &[ 8 | "aiz", 9 | "ap", 10 | "apakš", 11 | "apakšpus", 12 | "ar", 13 | "arī", 14 | "augšpus", 15 | "bet", 16 | "bez", 17 | "bija", 18 | "biji", 19 | "biju", 20 | "bijām", 21 | "bijāt", 22 | "būs", 23 | "būsi", 24 | "būsiet", 25 | "būsim", 26 | "būt", 27 | "būšu", 28 | "caur", 29 | "diemžēl", 30 | "diezin", 31 | "droši", 32 | "dēļ", 33 | "esam", 34 | "esat", 35 | "esi", 36 | "esmu", 37 | "gan", 38 | "gar", 39 | "iekam", 40 | "iekams", 41 | "iekām", 42 | "iekāms", 43 | "iekš", 44 | "iekšpus", 45 | "ik", 46 | "ir", 47 | "it", 48 | "itin", 49 | "iz", 50 | "ja", 51 | "jau", 52 | "jeb", 53 | "jebšu", 54 | "jel", 55 | "jo", 56 | "jā", 57 | "ka", 58 | "kamēr", 59 | "kaut", 60 | "kolīdz", 61 | "kopš", 62 | "kā", 63 | "kļuva", 64 | "kļuvi", 65 | "kļuvu", 66 | "kļuvām", 67 | "kļuvāt", 68 | "kļūs", 69 | "kļūsi", 70 | "kļūsiet", 71 | "kļūsim", 72 | "kļūst", 73 | "kļūstam", 74 | "kļūstat", 75 | "kļūsti", 76 | "kļūstu", 77 | "kļūt", 78 | "kļūšu", 79 | "labad", 80 | "lai", 81 | "lejpus", 82 | "līdz", 83 | "līdzko", 84 | "ne", 85 | "nebūt", 86 | "nedz", 87 | "nekā", 88 | "nevis", 89 | "nezin", 90 | "no", 91 | "nu", 92 | "nē", 93 | "otrpus", 94 | "pa", 95 | "par", 96 | "pat", 97 | "pie", 98 | "pirms", 99 | "pret", 100 | "priekš", 101 | "pār", 102 | "pēc", 103 | "starp", 104 | "tad", 105 | "tak", 106 | "tapi", 107 | "taps", 108 | "tapsi", 109 | "tapsiet", 110 | "tapsim", 111 | "tapt", 112 | "tapāt", 113 | "tapšu", 114 | "taču", 115 | "te", 116 | "tiec", 117 | "tiek", 118 | "tiekam", 119 | "tiekat", 120 | "tieku", 121 | "tik", 122 | "tika", 123 | "tikai", 124 | "tiki", 125 | "tikko", 126 | "tiklab", 127 | "tiklīdz", 128 | "tiks", 129 | "tiksiet", 130 | "tiksim", 131 | "tikt", 132 | "tiku", 133 | "tikvien", 134 | "tikām", 135 | "tikāt", 136 | "tikšu", 137 | "tomēr", 138 | "topat", 139 | "turpretim", 140 | "turpretī", 141 | "tā", 142 | "tādēļ", 143 | "tālab", 144 | "tāpēc", 145 | "un", 146 | "uz", 147 | "vai", 148 | "var", 149 | "varat", 150 | "varēja", 151 | "varēji", 152 | "varēju", 153 | "varējām", 154 | "varējāt", 155 | "varēs", 156 | "varēsi", 157 | "varēsiet", 158 | "varēsim", 159 | "varēt", 160 | "varēšu", 161 | "vien", 162 | "virs", 163 | "virspus", 164 | "vis", 165 | "viņpus", 166 | "zem", 167 | "ārpus", 168 | "šaipus", 169 | ]; 170 | -------------------------------------------------------------------------------- /src/stopwords/mal.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | pub static STOPWORDS_MAL: &[&str] = &[ 8 | "കാണാന്‍", 9 | "നിന്ന്", 10 | "കുറഞ്ഞ", 11 | "മുഴുവന്", 12 | "കൂടാതെ", 13 | "ആദ്യം", 14 | "ഈ", 15 | "കൂടുതല്‍", 16 | "താങ്കള്‍", 17 | "എന്നാല്", 18 | "അതിനു", 19 | "ശേഷം", 20 | "ചെയ്യുന്നു", 21 | "ഇവിടത്തെ", 22 | "വേണ്ടി", 23 | "ഏറ്റവും", 24 | "ഇതില്", 25 | "വേണ്ടിയും", 26 | "ആണ്", 27 | "സ്ഥിതിചെയ്യുന്നു", 28 | "സ്ഥിതി", 29 | "സ്ഥിതിചെയ്യുന്ന", 30 | "ചെയ്യണം", 31 | "നമ്മുടെ", 32 | "ഇപ്പോള്", 33 | "ഒരു", 34 | "തന്റെ", 35 | "ചെയ്യുന്ന", 36 | "എന്ന", 37 | "ചെയ്യുന്നത്", 38 | "ഉണ്ട്", 39 | "മുന്‍പ്", 40 | "മുമ്പ്", 41 | "കൂടെ", 42 | "ചേര്‍ത്തു", 43 | "ഇപ്രകാരം", 44 | "എന്നിവയുടെ", 45 | "കഴിയും", 46 | "എന്നീ", 47 | "ഇതാണ്", 48 | "വളരെ", 49 | "കാരണം", 50 | "ഇവിടത്തെ", 51 | "എപ്പോഴും", 52 | "കൊണ്ട്", 53 | "നല്ല", 54 | "ധാരാളം", 55 | "എപ്പോഴും", 56 | "ഇവ", 57 | "കാരണം", 58 | "ഇതു", 59 | "മാത്രമല്ല", 60 | "മറ്റു", 61 | "എന്നിവ", 62 | "കൂടിയാണ്", 63 | "ഇടയില്", 64 | "ഇല്ല", 65 | "എന്നാണ്", 66 | "എന്നു", 67 | "കുറച്ച്", 68 | "അതായത്", 69 | "എന്തെന്നാല്", 70 | "എന്നറിയപ്പെടുന്നു", 71 | "കിടക്കുന്ന", 72 | "പോയാല്", 73 | "ഇത്", 74 | "എല്ലാ", 75 | "വേണ്ടി", 76 | "ഇവിടെ", 77 | "വരുന്നു", 78 | "പോലുള്ള", 79 | "വലിയ", 80 | "പറഞ്ഞ്", 81 | "ഇതിനെ", 82 | "കൊടുത്തിട്ടും", 83 | "എന്ന്", 84 | "വേണം", 85 | "ഒരുപോലെ", 86 | "ഒരു പോലെ", 87 | "കാര്യമാണ്", 88 | "കഴിയുന്നു", 89 | "വളരെ", 90 | "അധികം", 91 | "വളരെ അധികം", 92 | "വളരെയധികം", 93 | "പോയി", 94 | "ഉണ്ടാകുന്നുണ്ട്", 95 | "പക്ഷേ", 96 | "അതേ", 97 | "കൊണ്ട്", 98 | "ഏത്", 99 | "നിന്നും", 100 | "എത്താന്‍", 101 | "അടുത്ത്", 102 | "ആയി", 103 | "എന്നു പറയുന്നു", 104 | "ഇപ്പോൾ", 105 | "ഏകദേശം", 106 | "എന്നുപറയുന്നു", 107 | "കാണാൻ", 108 | "ആ", 109 | "വിവിധ", 110 | "ഇതിന്റെ", 111 | "നിന്നു", 112 | "ഇതിന്", 113 | "അടുത്ത", 114 | "അടുത്തുള്ള", 115 | "പല", 116 | "പ്രധാന", 117 | "നിലനിൽക്കുന്ന", 118 | "നിലനിൽക്കുന്നത്", 119 | "മുതലായവ", 120 | "മുതലായവക്ക്", 121 | "വേണ്ട", 122 | "പ്രാധാന്യം", 123 | ]; 124 | -------------------------------------------------------------------------------- /src/stopwords/mar.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | pub static STOPWORDS_MAR: &[&str] = &[ 8 | "अधिक", 9 | "अनेक", 10 | "अशी", 11 | "असलयाचे", 12 | "असलेल्या", 13 | "असा", 14 | "असून", 15 | "असे", 16 | "आज", 17 | "आणि", 18 | "आता", 19 | "आपल्या", 20 | "आला", 21 | "आली", 22 | "आले", 23 | "आहे", 24 | "आहेत", 25 | "एक", 26 | "एका", 27 | "कमी", 28 | "करणयात", 29 | "करून", 30 | "का", 31 | "काम", 32 | "काय", 33 | "काही", 34 | "किवा", 35 | "की", 36 | "केला", 37 | "केली", 38 | "केले", 39 | "कोटी", 40 | "गेल्या", 41 | "घेऊन", 42 | "जात", 43 | "झाला", 44 | "झाली", 45 | "झाले", 46 | "झालेल्या", 47 | "टा", 48 | "डॉ", 49 | "तर", 50 | "तरी", 51 | "तसेच", 52 | "ता", 53 | "ती", 54 | "तीन", 55 | "ते", 56 | "तो", 57 | "त्या", 58 | "त्याचा", 59 | "त्याची", 60 | "त्याच्या", 61 | "त्याना", 62 | "त्यानी", 63 | "त्यामुळे", 64 | "त्री", 65 | "दिली", 66 | "दोन", 67 | "न", 68 | "नाही", 69 | "निर्ण्य", 70 | "पण", 71 | "पम", 72 | "परयतन", 73 | "पाटील", 74 | "म", 75 | "मात्र", 76 | "माहिती", 77 | "मी", 78 | "मुबी", 79 | "म्हणजे", 80 | "म्हणाले", 81 | "म्हणून", 82 | "या", 83 | "याचा", 84 | "याची", 85 | "याच्या", 86 | "याना", 87 | "यानी", 88 | "येणार", 89 | "येत", 90 | "येथील", 91 | "येथे", 92 | "लाख", 93 | "व", 94 | "व्यकत", 95 | "सर्व", 96 | "सागित्ले", 97 | "सुरू", 98 | "हजार", 99 | "हा", 100 | "ही", 101 | "हे", 102 | "होणार", 103 | "होत", 104 | "होता", 105 | "होती", 106 | "होते", 107 | ]; 108 | -------------------------------------------------------------------------------- /src/stopwords/mkd.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | // Notice: we do not have stopwords for this language yet. 8 | pub static STOPWORDS_MKD: &[&str] = &[]; 9 | -------------------------------------------------------------------------------- /src/stopwords/mod.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | // All stopwords are sourced from: https://github.com/stopwords-iso 8 | // Last update: 7th March 2019 9 | 10 | pub mod afr; 11 | pub mod aka; 12 | pub mod amh; 13 | pub mod ara; 14 | pub mod aze; 15 | pub mod bel; 16 | pub mod ben; 17 | pub mod bul; 18 | pub mod cat; 19 | pub mod ces; 20 | pub mod cmn; 21 | pub mod dan; 22 | pub mod deu; 23 | pub mod ell; 24 | pub mod eng; 25 | pub mod epo; 26 | pub mod est; 27 | pub mod fin; 28 | pub mod fra; 29 | pub mod guj; 30 | pub mod heb; 31 | pub mod hin; 32 | pub mod hrv; 33 | pub mod hun; 34 | pub mod hye; 35 | pub mod ind; 36 | pub mod ita; 37 | pub mod jav; 38 | pub mod jpn; 39 | pub mod kan; 40 | pub mod kat; 41 | pub mod khm; 42 | pub mod kor; 43 | pub mod lat; 44 | pub mod lav; 45 | pub mod lit; 46 | pub mod mal; 47 | pub mod mar; 48 | pub mod mkd; 49 | pub mod mya; 50 | pub mod nep; 51 | pub mod nld; 52 | pub mod nob; 53 | pub mod ori; 54 | pub mod pan; 55 | pub mod pes; 56 | pub mod pol; 57 | pub mod por; 58 | pub mod ron; 59 | pub mod rus; 60 | pub mod sin; 61 | pub mod slk; 62 | pub mod slv; 63 | pub mod sna; 64 | pub mod spa; 65 | pub mod srp; 66 | pub mod swe; 67 | pub mod tam; 68 | pub mod tel; 69 | pub mod tgl; 70 | pub mod tha; 71 | pub mod tuk; 72 | pub mod tur; 73 | pub mod ukr; 74 | pub mod urd; 75 | pub mod uzb; 76 | pub mod vie; 77 | pub mod yid; 78 | pub mod zul; 79 | -------------------------------------------------------------------------------- /src/stopwords/mya.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | pub static STOPWORDS_MYA: &[&str] = &[ 8 | "အပေါ်", 9 | "အနက်", 10 | "အမြဲတမ်း", 11 | "အတွင်းတွင်", 12 | "မကြာမီ", 13 | "မတိုင်မီ", 14 | "ဒါ့အပြင်", 15 | "အောက်မှာ", 16 | "အထဲမှာ", 17 | "ဘယ်တော့မျှ", 18 | "မကြာခဏ", 19 | "တော်တော်လေး", 20 | "စဉ်တွင်", 21 | "နှင့်အတူ", 22 | "နှင့်", 23 | "နှင့်တကွ", 24 | "ကျွန်တော်", 25 | "ကျွန်မ", 26 | "ငါ", 27 | "ကျုပ်", 28 | "ကျွနု်ပ်", 29 | "ကျနော်", 30 | "ကျမ", 31 | "သူ", 32 | "သူမ", 33 | "ထိုဟာ", 34 | "ထိုအရာ", 35 | "ဤအရာ", 36 | "ထို", 37 | "၄င်း", 38 | "ကျွန်တော်တို့", 39 | "ကျွန်မတို့", 40 | "ငါတို့", 41 | "ကျုပ်တို့", 42 | "ကျွနု်ပ်တို့", 43 | "ကျနော်တို့", 44 | "ကျမတို့", 45 | "သင်", 46 | "သင်တို့", 47 | "နင်တို့", 48 | "မင်း", 49 | "မင်းတို့", 50 | "သူတို့", 51 | "ကျွန်တော်အား", 52 | "ကျွန်တော်ကို", 53 | "ကျွန်မကို", 54 | "ငါကို", 55 | "ကျုပ်ကို", 56 | "ကျွနု်ပ်ကို", 57 | "သူ့ကို", 58 | "သူမကို", 59 | "ထိုအရာကို", 60 | "သင့်ကို", 61 | "သင်တို့ကို", 62 | "နင်တို့ကို", 63 | "မင်းကို", 64 | "မင်းတို့ကို", 65 | "ငါတို့ကို", 66 | "ကျုပ်တို့ကို", 67 | "ကျွနု်ပ်တို့ကို", 68 | "မိမိကိုယ်တိုင်", 69 | "မိမိဘာသာ", 70 | "မင်းကိုယ်တိုင်", 71 | "မင်းဘာသာ", 72 | "မင်းတို့ကိုယ်တိုင်", 73 | "မင်းတို့ဘာသာ", 74 | "သူကိုယ်တိုင်", 75 | "ကိုယ်တိုင်", 76 | "သူမကိုယ်တိုင်", 77 | "သူ့ဘာသာ", 78 | "သူ့ကိုယ်ကို", 79 | "ကိုယ့်ကိုယ်ကို", 80 | "မိမိကိုယ်ကို", 81 | "၄င်းပင်", 82 | "ထိုအရာပင်", 83 | "သည့်", 84 | "မည့်", 85 | "တဲ့", 86 | "ကျွနု်ပ်၏", 87 | "ကျွန်တော်၏", 88 | "ကျွန်မ၏", 89 | "ကျနော်၏", 90 | "ကျမ၏", 91 | "သူ၏", 92 | "သူမ၏", 93 | "ထိုအရာ၏", 94 | "ထိုဟာ၏", 95 | "ကျွနု်ပ်တို့၏", 96 | "ငါတို့၏", 97 | "ကျွန်တော်တို့၏", 98 | "ကျွန်မတို့၏", 99 | "ကျနော်တို့၏", 100 | "ကျမတို့၏", 101 | "သင်၏", 102 | "သင်တို့၏", 103 | "မင်း၏", 104 | "မင်းတို့၏", 105 | "သူတို့၏", 106 | "ကျွန်တော့်ဟာ", 107 | "ကျွန်မဟာ", 108 | "ကျနော်၏ဟာ", 109 | "ကျမ၏ဟာ", 110 | "ကျမဟာ", 111 | "ကျနော်ဟာ", 112 | "သူဟာ", 113 | "သူမဟာ", 114 | "သူ့ဟာ", 115 | "ကျွနု်ပ်တို့ဟာ", 116 | "ကျွန်တော်တို့ဟာ", 117 | "ကျွန်မတို့ဟာ", 118 | "သင်တို့ဟာ", 119 | "မင်းတို့ဟာ", 120 | "သူတို့ဟာ", 121 | "သူမတို့ဟာ", 122 | "ဤအရာ", 123 | "ဟောဒါ", 124 | "ဟောဒီ", 125 | "ဟောဒီဟာ", 126 | "ဒီဟာ", 127 | "ဒါ", 128 | "ထိုအရာ", 129 | "၄င်းအရာ", 130 | "ယင်းအရာ", 131 | "အဲဒါ", 132 | "ဟိုဟာ", 133 | "အချို့", 134 | "တစ်ခုခု", 135 | "အဘယ်မဆို", 136 | "ဘယ်အရာမဆို", 137 | "အဘယ်မည်သော", 138 | "အကြင်", 139 | "အရာရာတိုင်း", 140 | "စိုးစဉ်မျှ", 141 | "စိုးစဉ်းမျှ", 142 | "ဘယ်လောက်မဆို", 143 | "တစ်စုံတစ်ရာ", 144 | "တစုံတရာ", 145 | "အလျဉ်းမဟုတ်", 146 | "မည်သည့်နည်းနှင့်မျှမဟုတ်", 147 | "အလျဉ်းမရှိသော", 148 | "အခြားဖြစ်သော", 149 | "အခြားသော", 150 | "အခြားတစ်ခု", 151 | "အခြားတစ်ယောက်", 152 | "အားလုံး", 153 | "အရာရာတိုင်း", 154 | "အကုန်လုံး", 155 | "အလုံးစုံ", 156 | "အရာခပ်သိမ်း", 157 | "တစ်ခုစီ", 158 | "အသီးသီး", 159 | "တစ်ဦးဦး", 160 | "တစ်ခုခု", 161 | "ကိုယ်စီကိုယ်ငှ", 162 | "ကိုယ်စီ", 163 | "တစ်ဦးစီ", 164 | "တစ်ယောက်စီ", 165 | "တစ်ခုစီ", 166 | "အကုန်", 167 | "အပြည့်အစုံ", 168 | "လုံးလုံး", 169 | "နှစ်ခုလုံး", 170 | "နှစ်ယောက်လုံး", 171 | "နှစ်ဘက်လုံး", 172 | "တစ်စုံတစ်ရာ", 173 | "တစ်စုံတစ်ခု", 174 | "တစုံတခု", 175 | "တစ်စုံတစ်ယောက်", 176 | "တစုံတယောက်", 177 | "တစ်ယောက်ယောက်", 178 | "မည်သူမဆို", 179 | "ဘာမျှမရှိ", 180 | "ဘာမှမရှိ", 181 | "အဘယ်အရာမျှမရှိ", 182 | "လူတိုင်း", 183 | "လူတကာ", 184 | "နှင့်", 185 | "ပြီးလျှင်", 186 | "၄င်းနောက်", 187 | "သို့မဟုတ်", 188 | "သို့တည်းမဟုတ်", 189 | "သို့မဟုတ်လျှင်", 190 | "ဒါမှမဟုတ်", 191 | "ဖြစ်စေ", 192 | "သို့စေကာမူ", 193 | "ဒါပေမယ့်", 194 | "ဒါပေမဲ့", 195 | "မှတစ်ပါး", 196 | "မှလွဲလျှင်", 197 | "အဘယ်ကြောင့်ဆိုသော်", 198 | "သောကြောင့်", 199 | "သဖြင့်", 200 | "၍", 201 | "သည့်အတွက်ကြောင့်", 202 | "လျှင်", 203 | "ပါက", 204 | "အကယ်၍", 205 | "သော်ငြားလည်း", 206 | "စေကာမူ", 207 | "နည်းတူ", 208 | "ပေမယ့်", 209 | "ပေမဲ့", 210 | "ထိုနည်းတူစွာ", 211 | "ထိုနည်းတူ", 212 | "ကဲ့သို့", 213 | "သကဲ့သို့", 214 | "ယင်းကဲ့သို့", 215 | "ထိုကဲ့သို့", 216 | "နှင့်စပ်လျဉ်း၍", 217 | "ဤမျှ", 218 | "ဤမျှလောက်", 219 | "ဤကဲ့သို့", 220 | "အခုလောက်ထိ", 221 | "ဒါကတော့", 222 | "အဘယ်ကဲ့သလို့", 223 | "မည်ကဲ့သို့", 224 | "မည်သည့်နည်းနှင့်", 225 | "မည်သည့်နည်းဖြင့်", 226 | "မည်သည့်နည့်နှင့်မဆို", 227 | "မည်သည့်နည်းဖြင့်မဆို", 228 | "မည်သို့", 229 | "ဘယ်လိုလဲ", 230 | "သို့ပေတည့်", 231 | "သို့ပေမည့်", 232 | "ဘယ်နည်းနှင့်", 233 | "မည်ရွေ့မည်မျှ", 234 | "အဘယ်မျှလောက်", 235 | "ဘယ်လောက်", 236 | "မည်သူ", 237 | "ဘယ်သူ", 238 | "မည်သည့်အကြောင်းကြောင့်", 239 | "ဘာအတွက်ကြောင့်", 240 | "အဘယ်ကြောင့်", 241 | "မည်သည့်အတွက်ကြောင့်", 242 | "ဘာကြောင့်", 243 | "ဘာအတွက်နဲ့လဲ", 244 | "မည်သည်", 245 | "ဘာလဲ", 246 | "အဘယ်အရာနည်း", 247 | "မည်သည့်အရပ်မှာ", 248 | "ဘယ်နေရာတွင်", 249 | "မည်သည့်နေရာတွင်", 250 | "မည်သည့်နေရာသို့", 251 | "ဘယ်နေရာသို့", 252 | "ဘယ်နေရာမှာ", 253 | "ဘယ်သူ၏", 254 | "မည်သည့်အရာ၏", 255 | "မည်သည့်အခါ", 256 | "ဘယ်အချိန်", 257 | "ဘယ်အခါ", 258 | "မည်သည့်အချိန်", 259 | "ဘယ်တော့", 260 | "မည်သူကို", 261 | "မည်သူက", 262 | "ဘယ်သူ့ကို", 263 | "မည်သူမည်ဝါ", 264 | "မည်သည့်အရာ", 265 | "ဘယ်အရာ", 266 | "မည်သို့ပင်ဖြစ်စေ", 267 | "ဘယ်လိုပဲဖြစ်ဖြစ်", 268 | "မည်ရွေ့မည်မျှဖြစ်စေ", 269 | "မည်သည့်နည်းနှင့်မဆို", 270 | "ဘယ်နည်းနဲ့ဖြစ်ဖြစ်", 271 | "မည်သူမဆို", 272 | "ဘယ်သူမဆို", 273 | "အဘယ်သူမဆို", 274 | "မည်သည့်အရာမဆို", 275 | "ဘာဖြစ်ဖြစ်", 276 | "မည်သည့်အရာဖြစ်ဖြစ်", 277 | "မည်သည့်အရပ်၌မဆို", 278 | "မည်သည့်နေရာမဆို", 279 | "ဘယ်အခါမဆို", 280 | "ဘယ်အချိန်မဆို", 281 | "ဘယ်အခါဖြစ်ဖြစ်", 282 | "အချိန်အခါမရွေး", 283 | ]; 284 | -------------------------------------------------------------------------------- /src/stopwords/nep.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | pub static STOPWORDS_NEP: &[&str] = &[ 8 | "छ", 9 | "र", 10 | "पनि", 11 | "छन्", 12 | "लागि", 13 | "भएको", 14 | "गरेको", 15 | "भने", 16 | "गर्न", 17 | "गर्ने", 18 | "हो", 19 | "तथा", 20 | "यो", 21 | "रहेको", 22 | "उनले", 23 | "थियो", 24 | "हुने", 25 | "गरेका", 26 | "थिए", 27 | "गर्दै", 28 | "तर", 29 | "नै", 30 | "को", 31 | "मा", 32 | "हुन्", 33 | "भन्ने", 34 | "हुन", 35 | "गरी", 36 | "त", 37 | "हुन्छ", 38 | "अब", 39 | "के", 40 | "रहेका", 41 | "गरेर", 42 | "छैन", 43 | "दिए", 44 | "भए", 45 | "यस", 46 | "ले", 47 | "गर्नु", 48 | "औं", 49 | "सो", 50 | "त्यो", 51 | "कि", 52 | "जुन", 53 | "यी", 54 | "का", 55 | "गरि", 56 | "ती", 57 | "न", 58 | "छु", 59 | "छौं", 60 | "लाई", 61 | "नि", 62 | "उप", 63 | "अक्सर", 64 | "आदि", 65 | "कसरी", 66 | "क्रमशः", 67 | "चाले", 68 | "अगाडी", 69 | "अझै", 70 | "अनुसार", 71 | "अन्तर्गत", 72 | "अन्य", 73 | "अन्यत्र", 74 | "अन्यथा", 75 | "अरु", 76 | "अरुलाई", 77 | "अर्को", 78 | "अर्थात", 79 | "अर्थात्", 80 | "अलग", 81 | "आए", 82 | "आजको", 83 | "ओठ", 84 | "आत्म", 85 | "आफू", 86 | "आफूलाई", 87 | "आफ्नै", 88 | "आफ्नो", 89 | "आयो", 90 | "उदाहरण", 91 | "उनको", 92 | "उहालाई", 93 | "एउटै", 94 | "एक", 95 | "एकदम", 96 | "कतै", 97 | "कम से कम", 98 | "कसै", 99 | "कसैले", 100 | "कहाँबाट", 101 | "कहिलेकाहीं", 102 | "का", 103 | "किन", 104 | "किनभने", 105 | "कुनै", 106 | "कुरा", 107 | "कृपया", 108 | "केही", 109 | "कोही", 110 | "गए", 111 | "गरौं", 112 | "गर्छ", 113 | "गर्छु", 114 | "गर्नुपर्छ", 115 | "गयौ", 116 | "गैर", 117 | "चार", 118 | "चाहनुहुन्छ", 119 | "चाहन्छु", 120 | "चाहिए", 121 | "छू", 122 | "जताततै", 123 | "जब", 124 | "जबकि", 125 | "जसको", 126 | "जसबाट", 127 | "जसमा", 128 | "जसलाई", 129 | "जसले", 130 | "जस्तै", 131 | "जस्तो", 132 | "जस्तोसुकै", 133 | "जहाँ", 134 | "जान", 135 | "जाहिर", 136 | "जे", 137 | "जो", 138 | "ठीक", 139 | "तत्काल", 140 | "तदनुसार", 141 | "तपाईको", 142 | "तपाई", 143 | "पर्याप्त", 144 | "पहिले", 145 | "पहिलो", 146 | "पहिल्यै", 147 | "पाँच", 148 | "पाँचौं", 149 | "तल", 150 | "तापनी", 151 | "तिनी", 152 | "तिनीहरू", 153 | "तिनीहरुको", 154 | "तिनिहरुलाई", 155 | "तिमी", 156 | "तिर", 157 | "तीन", 158 | "तुरुन्तै", 159 | "तेस्रो", 160 | "तेस्कारण", 161 | "पूर्व", 162 | "प्रति", 163 | "प्रतेक", 164 | "प्लस", 165 | "फेरी", 166 | "बने", 167 | "त्सपछि", 168 | "त्सैले", 169 | "त्यहाँ", 170 | "थिएन", 171 | "दिनुभएको", 172 | "दिनुहुन्छ", 173 | "दुई", 174 | "देखि", 175 | "बरु", 176 | "बारे", 177 | "बाहिर", 178 | "देखिन्छ", 179 | "देखियो", 180 | "देखे", 181 | "देखेको", 182 | "देखेर", 183 | "दोस्रो", 184 | "धेरै", 185 | "नजिकै", 186 | "नत्र", 187 | "नयाँ", 188 | "निम्ति", 189 | "बाहेक", 190 | "बीच", 191 | "बीचमा", 192 | "भन", 193 | "निम्न", 194 | "निम्नानुसार", 195 | "निर्दिष्ट", 196 | "नौ", 197 | "पक्का", 198 | "पक्कै", 199 | "पछि", 200 | "पछिल्लो", 201 | "पटक", 202 | "पर्छ", 203 | "पर्थ्यो", 204 | "भन्छन्", 205 | "भन्", 206 | "भन्छु", 207 | "भन्दा", 208 | "भन्नुभयो", 209 | "भर", 210 | "भित्र", 211 | "भित्री", 212 | "म", 213 | "मलाई", 214 | "मात्र", 215 | "माथि", 216 | "मुख्य", 217 | "मेरो", 218 | "यति", 219 | "यथोचित", 220 | "यदि", 221 | "यद्यपि", 222 | "यसको", 223 | "यसपछि", 224 | "यसबाहेक", 225 | "यसरी", 226 | "यसो", 227 | "यस्तो", 228 | "यहाँ", 229 | "यहाँसम्म", 230 | "या", 231 | "रही", 232 | "राखे", 233 | "राख्छ", 234 | "राम्रो", 235 | "रूप", 236 | "लगभग", 237 | "वरीपरी", 238 | "वास्तवमा", 239 | "बिरुद्ध", 240 | "बिशेष", 241 | "सायद", 242 | "शायद", 243 | "संग", 244 | "संगै", 245 | "सक्छ", 246 | "सट्टा", 247 | "सधै", 248 | "सबै", 249 | "सबैलाई", 250 | "समय", 251 | "सम्भव", 252 | "सम्म", 253 | "सही", 254 | "साँच्चै", 255 | "सात", 256 | "साथ", 257 | "साथै", 258 | "सारा", 259 | "सोही", 260 | "स्पष्ट", 261 | "हरे", 262 | "हरेक", 263 | ]; 264 | -------------------------------------------------------------------------------- /src/stopwords/nob.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | pub static STOPWORDS_NOB: &[&str] = &[ 8 | "og", 9 | "i", 10 | "jeg", 11 | "det", 12 | "at", 13 | "en", 14 | "et", 15 | "den", 16 | "til", 17 | "er", 18 | "som", 19 | "på", 20 | "de", 21 | "med", 22 | "han", 23 | "av", 24 | "ikke", 25 | "ikkje", 26 | "der", 27 | "så", 28 | "var", 29 | "meg", 30 | "seg", 31 | "men", 32 | "ett", 33 | "har", 34 | "om", 35 | "vi", 36 | "min", 37 | "mitt", 38 | "ha", 39 | "hadde", 40 | "hun", 41 | "nå", 42 | "over", 43 | "da", 44 | "ved", 45 | "fra", 46 | "du", 47 | "ut", 48 | "sin", 49 | "dem", 50 | "oss", 51 | "opp", 52 | "man", 53 | "kan", 54 | "hans", 55 | "hvor", 56 | "eller", 57 | "hva", 58 | "skal", 59 | "selv", 60 | "sjøl", 61 | "her", 62 | "alle", 63 | "vil", 64 | "bli", 65 | "ble", 66 | "blei", 67 | "blitt", 68 | "kunne", 69 | "inn", 70 | "når", 71 | "være", 72 | "kom", 73 | "noen", 74 | "noe", 75 | "ville", 76 | "dere", 77 | "som", 78 | "deres", 79 | "kun", 80 | "ja", 81 | "etter", 82 | "ned", 83 | "skulle", 84 | "denne", 85 | "for", 86 | "deg", 87 | "si", 88 | "sine", 89 | "sitt", 90 | "mot", 91 | "å", 92 | "meget", 93 | "hvorfor", 94 | "dette", 95 | "disse", 96 | "uten", 97 | "hvordan", 98 | "ingen", 99 | "din", 100 | "ditt", 101 | "blir", 102 | "samme", 103 | "hvilken", 104 | "hvilke", 105 | "sånn", 106 | "inni", 107 | "mellom", 108 | "vår", 109 | "hver", 110 | "hvem", 111 | "vors", 112 | "hvis", 113 | "både", 114 | "bare", 115 | "enn", 116 | "fordi", 117 | "før", 118 | "mange", 119 | "også", 120 | "slik", 121 | "vært", 122 | "være", 123 | "båe", 124 | "begge", 125 | "siden", 126 | "dykk", 127 | "dykkar", 128 | "dei", 129 | "deira", 130 | "deires", 131 | "deim", 132 | "di", 133 | "då", 134 | "eg", 135 | "ein", 136 | "eit", 137 | "eitt", 138 | "elles", 139 | "honom", 140 | "hjå", 141 | "ho", 142 | "hoe", 143 | "henne", 144 | "hennar", 145 | "hennes", 146 | "hoss", 147 | "hossen", 148 | "ikkje", 149 | "ingi", 150 | "inkje", 151 | "korleis", 152 | "korso", 153 | "kva", 154 | "kvar", 155 | "kvarhelst", 156 | "kven", 157 | "kvi", 158 | "kvifor", 159 | "me", 160 | "medan", 161 | "mi", 162 | "mine", 163 | "mykje", 164 | "no", 165 | "nokon", 166 | "noka", 167 | "nokor", 168 | "noko", 169 | "nokre", 170 | "si", 171 | "sia", 172 | "sidan", 173 | "so", 174 | "somt", 175 | "somme", 176 | "um", 177 | "upp", 178 | "vere", 179 | "vore", 180 | "verte", 181 | "vort", 182 | "varte", 183 | "vart", 184 | ]; 185 | -------------------------------------------------------------------------------- /src/stopwords/ori.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | // Notice: we do not have stopwords for this language yet. 8 | pub static STOPWORDS_ORI: &[&str] = &[]; 9 | -------------------------------------------------------------------------------- /src/stopwords/pol.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | pub static STOPWORDS_POL: &[&str] = &[ 8 | "a", 9 | "aby", 10 | "ach", 11 | "acz", 12 | "aczkolwiek", 13 | "aj", 14 | "albo", 15 | "ale", 16 | "ależ", 17 | "ani", 18 | "aż", 19 | "bardziej", 20 | "bardzo", 21 | "bez", 22 | "bo", 23 | "bowiem", 24 | "by", 25 | "byli", 26 | "bym", 27 | "bynajmniej", 28 | "być", 29 | "był", 30 | "była", 31 | "było", 32 | "były", 33 | "będzie", 34 | "będą", 35 | "cali", 36 | "cała", 37 | "cały", 38 | "chce", 39 | "choć", 40 | "ci", 41 | "ciebie", 42 | "cię", 43 | "co", 44 | "cokolwiek", 45 | "coraz", 46 | "coś", 47 | "czasami", 48 | "czasem", 49 | "czemu", 50 | "czy", 51 | "czyli", 52 | "często", 53 | "daleko", 54 | "dla", 55 | "dlaczego", 56 | "dlatego", 57 | "do", 58 | "dobrze", 59 | "dokąd", 60 | "dość", 61 | "dr", 62 | "dużo", 63 | "dwa", 64 | "dwaj", 65 | "dwie", 66 | "dwoje", 67 | "dzisiaj", 68 | "dziś", 69 | "gdy", 70 | "gdyby", 71 | "gdyż", 72 | "gdzie", 73 | "gdziekolwiek", 74 | "gdzieś", 75 | "go", 76 | "godz", 77 | "hab", 78 | "i", 79 | "ich", 80 | "ii", 81 | "iii", 82 | "ile", 83 | "im", 84 | "inna", 85 | "inne", 86 | "inny", 87 | "innych", 88 | "inż", 89 | "iv", 90 | "ix", 91 | "iż", 92 | "ja", 93 | "jak", 94 | "jakaś", 95 | "jakby", 96 | "jaki", 97 | "jakichś", 98 | "jakie", 99 | "jakiś", 100 | "jakiż", 101 | "jakkolwiek", 102 | "jako", 103 | "jakoś", 104 | "je", 105 | "jeden", 106 | "jedna", 107 | "jednak", 108 | "jednakże", 109 | "jedno", 110 | "jednym", 111 | "jedynie", 112 | "jego", 113 | "jej", 114 | "jemu", 115 | "jest", 116 | "jestem", 117 | "jeszcze", 118 | "jeśli", 119 | "jeżeli", 120 | "już", 121 | "ją", 122 | "każdy", 123 | "kiedy", 124 | "kierunku", 125 | "kilka", 126 | "kilku", 127 | "kimś", 128 | "kto", 129 | "ktokolwiek", 130 | "ktoś", 131 | "która", 132 | "które", 133 | "którego", 134 | "której", 135 | "który", 136 | "których", 137 | "którym", 138 | "którzy", 139 | "ku", 140 | "lat", 141 | "lecz", 142 | "lub", 143 | "ma", 144 | "mają", 145 | "mam", 146 | "mamy", 147 | "mało", 148 | "mgr", 149 | "mi", 150 | "miał", 151 | "mimo", 152 | "między", 153 | "mnie", 154 | "mną", 155 | "mogą", 156 | "moi", 157 | "moim", 158 | "moja", 159 | "moje", 160 | "może", 161 | "możliwe", 162 | "można", 163 | "mu", 164 | "musi", 165 | "my", 166 | "mój", 167 | "na", 168 | "nad", 169 | "nam", 170 | "nami", 171 | "nas", 172 | "nasi", 173 | "nasz", 174 | "nasza", 175 | "nasze", 176 | "naszego", 177 | "naszych", 178 | "natomiast", 179 | "natychmiast", 180 | "nawet", 181 | "nic", 182 | "nich", 183 | "nie", 184 | "niech", 185 | "niego", 186 | "niej", 187 | "niemu", 188 | "nigdy", 189 | "nim", 190 | "nimi", 191 | "nią", 192 | "niż", 193 | "no", 194 | "nowe", 195 | "np", 196 | "nr", 197 | "o", 198 | "o.o.", 199 | "obok", 200 | "od", 201 | "ok", 202 | "około", 203 | "on", 204 | "ona", 205 | "one", 206 | "oni", 207 | "ono", 208 | "oraz", 209 | "oto", 210 | "owszem", 211 | "pan", 212 | "pana", 213 | "pani", 214 | "pl", 215 | "po", 216 | "pod", 217 | "podczas", 218 | "pomimo", 219 | "ponad", 220 | "ponieważ", 221 | "powinien", 222 | "powinna", 223 | "powinni", 224 | "powinno", 225 | "poza", 226 | "prawie", 227 | "prof", 228 | "przecież", 229 | "przed", 230 | "przede", 231 | "przedtem", 232 | "przez", 233 | "przy", 234 | "raz", 235 | "razie", 236 | "roku", 237 | "również", 238 | "sam", 239 | "sama", 240 | "się", 241 | "skąd", 242 | "sobie", 243 | "sobą", 244 | "sposób", 245 | "swoje", 246 | "są", 247 | "ta", 248 | "tak", 249 | "taka", 250 | "taki", 251 | "takich", 252 | "takie", 253 | "także", 254 | "tam", 255 | "te", 256 | "tego", 257 | "tej", 258 | "tel", 259 | "temu", 260 | "ten", 261 | "teraz", 262 | "też", 263 | "to", 264 | "tobie", 265 | "tobą", 266 | "toteż", 267 | "trzeba", 268 | "tu", 269 | "tutaj", 270 | "twoi", 271 | "twoim", 272 | "twoja", 273 | "twoje", 274 | "twym", 275 | "twój", 276 | "ty", 277 | "tych", 278 | "tylko", 279 | "tym", 280 | "tys", 281 | "tzw", 282 | "tę", 283 | "u", 284 | "ul", 285 | "vi", 286 | "vii", 287 | "viii", 288 | "vol", 289 | "w", 290 | "wam", 291 | "wami", 292 | "was", 293 | "wasi", 294 | "wasz", 295 | "wasza", 296 | "wasze", 297 | "we", 298 | "według", 299 | "wie", 300 | "wiele", 301 | "wielu", 302 | "więc", 303 | "więcej", 304 | "wszyscy", 305 | "wszystkich", 306 | "wszystkie", 307 | "wszystkim", 308 | "wszystko", 309 | "wtedy", 310 | "www", 311 | "wy", 312 | "właśnie", 313 | "wśród", 314 | "xi", 315 | "xii", 316 | "xiii", 317 | "xiv", 318 | "xv", 319 | "z", 320 | "za", 321 | "zapewne", 322 | "zawsze", 323 | "zaś", 324 | "ze", 325 | "zeznowu", 326 | "znowu", 327 | "znów", 328 | "został", 329 | "zł", 330 | "żaden", 331 | "żadna", 332 | "żadne", 333 | "żadnych", 334 | "że", 335 | "żeby", 336 | ]; 337 | -------------------------------------------------------------------------------- /src/stopwords/sin.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | // Notice: we do not have stopwords for this language yet. 8 | pub static STOPWORDS_SIN: &[&str] = &[]; 9 | -------------------------------------------------------------------------------- /src/stopwords/slk.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2020, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | pub static STOPWORDS_SLK: &[&str] = &[ 8 | "a", 9 | "aby", 10 | "aj", 11 | "ak", 12 | "akej", 13 | "akejže", 14 | "ako", 15 | "akom", 16 | "akomže", 17 | "akou", 18 | "akouže", 19 | "akože", 20 | "aká", 21 | "akáže", 22 | "aké", 23 | "akého", 24 | "akéhože", 25 | "akému", 26 | "akémuže", 27 | "akéže", 28 | "akú", 29 | "akúže", 30 | "aký", 31 | "akých", 32 | "akýchže", 33 | "akým", 34 | "akými", 35 | "akýmiže", 36 | "akýmže", 37 | "akýže", 38 | "ale", 39 | "alebo", 40 | "ani", 41 | "asi", 42 | "avšak", 43 | "až", 44 | "ba", 45 | "bez", 46 | "bezo", 47 | "bol", 48 | "bola", 49 | "boli", 50 | "bolo", 51 | "bude", 52 | "budem", 53 | "budeme", 54 | "budete", 55 | "budeš", 56 | "budú", 57 | "buď", 58 | "by", 59 | "byť", 60 | "cez", 61 | "cezo", 62 | "dnes", 63 | "do", 64 | "ešte", 65 | "ho", 66 | "hoci", 67 | "i", 68 | "iba", 69 | "ich", 70 | "im", 71 | "inej", 72 | "inom", 73 | "iná", 74 | "iné", 75 | "iného", 76 | "inému", 77 | "iní", 78 | "inú", 79 | "iný", 80 | "iných", 81 | "iným", 82 | "inými", 83 | "ja", 84 | "je", 85 | "jeho", 86 | "jej", 87 | "jemu", 88 | "ju", 89 | "k", 90 | "kam", 91 | "kamže", 92 | "každou", 93 | "každá", 94 | "každé", 95 | "každého", 96 | "každému", 97 | "každí", 98 | "každú", 99 | "každý", 100 | "každých", 101 | "každým", 102 | "každými", 103 | "kde", 104 | "kej", 105 | "kejže", 106 | "keď", 107 | "keďže", 108 | "kie", 109 | "kieho", 110 | "kiehože", 111 | "kiemu", 112 | "kiemuže", 113 | "kieže", 114 | "koho", 115 | "kom", 116 | "komu", 117 | "kou", 118 | "kouže", 119 | "kto", 120 | "ktorej", 121 | "ktorou", 122 | "ktorá", 123 | "ktoré", 124 | "ktorí", 125 | "ktorú", 126 | "ktorý", 127 | "ktorých", 128 | "ktorým", 129 | "ktorými", 130 | "ku", 131 | "ká", 132 | "káže", 133 | "ké", 134 | "kéže", 135 | "kú", 136 | "kúže", 137 | "ký", 138 | "kýho", 139 | "kýhože", 140 | "kým", 141 | "kýmu", 142 | "kýmuže", 143 | "kýže", 144 | "lebo", 145 | "leda", 146 | "ledaže", 147 | "len", 148 | "ma", 149 | "majú", 150 | "mal", 151 | "mala", 152 | "mali", 153 | "mať", 154 | "medzi", 155 | "mi", 156 | "mne", 157 | "mnou", 158 | "moja", 159 | "moje", 160 | "mojej", 161 | "mojich", 162 | "mojim", 163 | "mojimi", 164 | "mojou", 165 | "moju", 166 | "možno", 167 | "mu", 168 | "musia", 169 | "musieť", 170 | "musí", 171 | "musím", 172 | "musíme", 173 | "musíte", 174 | "musíš", 175 | "my", 176 | "má", 177 | "mám", 178 | "máme", 179 | "máte", 180 | "máš", 181 | "môcť", 182 | "môj", 183 | "môjho", 184 | "môže", 185 | "môžem", 186 | "môžeme", 187 | "môžete", 188 | "môžeš", 189 | "môžu", 190 | "mňa", 191 | "na", 192 | "nad", 193 | "nado", 194 | "najmä", 195 | "nami", 196 | "naša", 197 | "naše", 198 | "našej", 199 | "naši", 200 | "našich", 201 | "našim", 202 | "našimi", 203 | "našou", 204 | "ne", 205 | "nech", 206 | "neho", 207 | "nej", 208 | "nejakej", 209 | "nejakom", 210 | "nejakou", 211 | "nejaká", 212 | "nejaké", 213 | "nejakého", 214 | "nejakému", 215 | "nejakú", 216 | "nejaký", 217 | "nejakých", 218 | "nejakým", 219 | "nejakými", 220 | "nemu", 221 | "než", 222 | "nich", 223 | "nie", 224 | "niektorej", 225 | "niektorom", 226 | "niektorou", 227 | "niektorá", 228 | "niektoré", 229 | "niektorého", 230 | "niektorému", 231 | "niektorú", 232 | "niektorý", 233 | "niektorých", 234 | "niektorým", 235 | "niektorými", 236 | "nielen", 237 | "niečo", 238 | "nim", 239 | "nimi", 240 | "nič", 241 | "ničoho", 242 | "ničom", 243 | "ničomu", 244 | "ničím", 245 | "no", 246 | "nám", 247 | "nás", 248 | "náš", 249 | "nášho", 250 | "ním", 251 | "o", 252 | "od", 253 | "odo", 254 | "on", 255 | "ona", 256 | "oni", 257 | "ono", 258 | "ony", 259 | "oň", 260 | "oňho", 261 | "po", 262 | "pod", 263 | "podo", 264 | "podľa", 265 | "pokiaľ", 266 | "popod", 267 | "popri", 268 | "potom", 269 | "poza", 270 | "pre", 271 | "pred", 272 | "predo", 273 | "preto", 274 | "pretože", 275 | "prečo", 276 | "pri", 277 | "práve", 278 | "s", 279 | "sa", 280 | "seba", 281 | "sebe", 282 | "sebou", 283 | "sem", 284 | "si", 285 | "sme", 286 | "so", 287 | "som", 288 | "ste", 289 | "svoj", 290 | "svoja", 291 | "svoje", 292 | "svojho", 293 | "svojich", 294 | "svojim", 295 | "svojimi", 296 | "svojou", 297 | "svoju", 298 | "svojím", 299 | "sú", 300 | "ta", 301 | "tak", 302 | "takej", 303 | "takejto", 304 | "taká", 305 | "takáto", 306 | "také", 307 | "takého", 308 | "takéhoto", 309 | "takému", 310 | "takémuto", 311 | "takéto", 312 | "takí", 313 | "takú", 314 | "takúto", 315 | "taký", 316 | "takýto", 317 | "takže", 318 | "tam", 319 | "teba", 320 | "tebe", 321 | "tebou", 322 | "teda", 323 | "tej", 324 | "tejto", 325 | "ten", 326 | "tento", 327 | "ti", 328 | "tie", 329 | "tieto", 330 | "tiež", 331 | "to", 332 | "toho", 333 | "tohoto", 334 | "tohto", 335 | "tom", 336 | "tomto", 337 | "tomu", 338 | "tomuto", 339 | "toto", 340 | "tou", 341 | "touto", 342 | "tu", 343 | "tvoj", 344 | "tvoja", 345 | "tvoje", 346 | "tvojej", 347 | "tvojho", 348 | "tvoji", 349 | "tvojich", 350 | "tvojim", 351 | "tvojimi", 352 | "tvojím", 353 | "ty", 354 | "tá", 355 | "táto", 356 | "tí", 357 | "títo", 358 | "tú", 359 | "túto", 360 | "tých", 361 | "tým", 362 | "tými", 363 | "týmto", 364 | "u", 365 | "už", 366 | "v", 367 | "vami", 368 | "vaša", 369 | "vaše", 370 | "vašej", 371 | "vaši", 372 | "vašich", 373 | "vašim", 374 | "vaším", 375 | "veď", 376 | "viac", 377 | "vo", 378 | "vy", 379 | "vám", 380 | "vás", 381 | "váš", 382 | "vášho", 383 | "však", 384 | "všetci", 385 | "všetka", 386 | "všetko", 387 | "všetky", 388 | "všetok", 389 | "z", 390 | "za", 391 | "začo", 392 | "začože", 393 | "zo", 394 | "áno", 395 | "čej", 396 | "či", 397 | "čia", 398 | "čie", 399 | "čieho", 400 | "čiemu", 401 | "čiu", 402 | "čo", 403 | "čoho", 404 | "čom", 405 | "čomu", 406 | "čou", 407 | "čože", 408 | "čí", 409 | "čím", 410 | "čími", 411 | "ďalšia", 412 | "ďalšie", 413 | "ďalšieho", 414 | "ďalšiemu", 415 | "ďalšiu", 416 | "ďalšom", 417 | "ďalšou", 418 | "ďalší", 419 | "ďalších", 420 | "ďalším", 421 | "ďalšími", 422 | "ňom", 423 | "ňou", 424 | "ňu", 425 | "že", 426 | ]; 427 | -------------------------------------------------------------------------------- /src/stopwords/sna.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | // Notice: we do not have stopwords for this language yet. 8 | pub static STOPWORDS_SNA: &[&str] = &[]; 9 | -------------------------------------------------------------------------------- /src/stopwords/srp.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | pub static STOPWORDS_SRP: &[&str] = &[ 8 | "a", 9 | "avaj", 10 | "ako", 11 | "al", 12 | "ali", 13 | "arh", 14 | "au", 15 | "ah", 16 | "aha", 17 | "aj", 18 | "bar", 19 | "bi", 20 | "bila", 21 | "bili", 22 | "bilo", 23 | "bismo", 24 | "biste", 25 | "bih", 26 | "bijasmo", 27 | "bijaste", 28 | "bijah", 29 | "bijahu", 30 | "bijaše", 31 | "biće", 32 | "blizu", 33 | "broj", 34 | "brr", 35 | "bude", 36 | "budimo", 37 | "budite", 38 | "budu", 39 | "budući", 40 | "bum", 41 | "buć", 42 | "vam", 43 | "vama", 44 | "vas", 45 | "vaša", 46 | "vaše", 47 | "vašim", 48 | "vašima", 49 | "valjda", 50 | "veoma", 51 | "verovatno", 52 | "već", 53 | "većina", 54 | "vi", 55 | "video", 56 | "više", 57 | "vrlo", 58 | "vrh", 59 | "ga", 60 | "gde", 61 | "gic", 62 | "god", 63 | "gore", 64 | "gđekoje", 65 | "da", 66 | "dakle", 67 | "dana", 68 | "danas", 69 | "daj", 70 | "dva", 71 | "de", 72 | "deder", 73 | "delimice", 74 | "delimično", 75 | "dem", 76 | "do", 77 | "dobar", 78 | "dobiti", 79 | "dovečer", 80 | "dokle", 81 | "dole", 82 | "donekle", 83 | "dosad", 84 | "doskoro", 85 | "dotad", 86 | "dotle", 87 | "došao", 88 | "doći", 89 | "drugamo", 90 | "drugde", 91 | "drugi", 92 | "e", 93 | "evo", 94 | "eno", 95 | "eto", 96 | "eh", 97 | "ehe", 98 | "ej", 99 | "želela", 100 | "želele", 101 | "želeli", 102 | "želelo", 103 | "želeh", 104 | "želeći", 105 | "želi", 106 | "za", 107 | "zaista", 108 | "zar", 109 | "zatim", 110 | "zato", 111 | "zahvaliti", 112 | "zašto", 113 | "zbilja", 114 | "zimus", 115 | "znati", 116 | "zum", 117 | "i", 118 | "ide", 119 | "iz", 120 | "izvan", 121 | "izvoli", 122 | "između", 123 | "iznad", 124 | "ikada", 125 | "ikakav", 126 | "ikakva", 127 | "ikakve", 128 | "ikakvi", 129 | "ikakvim", 130 | "ikakvima", 131 | "ikakvih", 132 | "ikakvo", 133 | "ikakvog", 134 | "ikakvoga", 135 | "ikakvom", 136 | "ikakvome", 137 | "ikakvoj", 138 | "ili", 139 | "im", 140 | "ima", 141 | "imam", 142 | "imao", 143 | "ispod", 144 | "ih", 145 | "iju", 146 | "ići", 147 | "kad", 148 | "kada", 149 | "koga", 150 | "kojekakav", 151 | "kojima", 152 | "koju", 153 | "krišom", 154 | "lani", 155 | "li", 156 | "mali", 157 | "manji", 158 | "me", 159 | "mene", 160 | "meni", 161 | "mi", 162 | "mimo", 163 | "misli", 164 | "mnogo", 165 | "mogu", 166 | "mora", 167 | "morao", 168 | "moj", 169 | "moja", 170 | "moje", 171 | "moji", 172 | "moju", 173 | "moći", 174 | "mu", 175 | "na", 176 | "nad", 177 | "nakon", 178 | "nam", 179 | "nama", 180 | "nas", 181 | "naša", 182 | "naše", 183 | "našeg", 184 | "naši", 185 | "naći", 186 | "ne", 187 | "negde", 188 | "neka", 189 | "nekad", 190 | "neke", 191 | "nekog", 192 | "neku", 193 | "nema", 194 | "nemam", 195 | "neko", 196 | "neće", 197 | "nećemo", 198 | "nećete", 199 | "nećeš", 200 | "neću", 201 | "ni", 202 | "nikada", 203 | "nikoga", 204 | "nikoje", 205 | "nikoji", 206 | "nikoju", 207 | "nisam", 208 | "nisi", 209 | "niste", 210 | "nisu", 211 | "ništa", 212 | "nijedan", 213 | "no", 214 | "o", 215 | "ova", 216 | "ovako", 217 | "ovamo", 218 | "ovaj", 219 | "ovde", 220 | "ove", 221 | "ovim", 222 | "ovima", 223 | "ovo", 224 | "ovoj", 225 | "od", 226 | "odmah", 227 | "oko", 228 | "okolo", 229 | "on", 230 | "onaj", 231 | "one", 232 | "onim", 233 | "onima", 234 | "onom", 235 | "onoj", 236 | "onu", 237 | "osim", 238 | "ostali", 239 | "otišao", 240 | "pa", 241 | "pak", 242 | "pitati", 243 | "po", 244 | "povodom", 245 | "pod", 246 | "podalje", 247 | "poželjan", 248 | "poželjna", 249 | "poizdalje", 250 | "poimence", 251 | "ponekad", 252 | "popreko", 253 | "pored", 254 | "posle", 255 | "potaman", 256 | "potrbuške", 257 | "pouzdano", 258 | "početak", 259 | "pojedini", 260 | "praviti", 261 | "prvi", 262 | "preko", 263 | "prema", 264 | "prije", 265 | "put", 266 | "pljus", 267 | "radije", 268 | "s", 269 | "sa", 270 | "sav", 271 | "sada", 272 | "sam", 273 | "samo", 274 | "sasvim", 275 | "sva", 276 | "svaki", 277 | "svi", 278 | "svim", 279 | "svog", 280 | "svom", 281 | "svoj", 282 | "svoja", 283 | "svoje", 284 | "svoju", 285 | "svu", 286 | "svugde", 287 | "se", 288 | "sebe", 289 | "sebi", 290 | "si", 291 | "smeti", 292 | "smo", 293 | "stvar", 294 | "stvarno", 295 | "ste", 296 | "su", 297 | "sutra", 298 | "ta", 299 | "tačno", 300 | "tako", 301 | "takođe", 302 | "tamo", 303 | "tvoj", 304 | "tvoja", 305 | "tvoje", 306 | "tvoji", 307 | "tvoju", 308 | "te", 309 | "tebe", 310 | "tebi", 311 | "ti", 312 | "tima", 313 | "to", 314 | "tome", 315 | "toj", 316 | "tu", 317 | "u", 318 | "uvek", 319 | "uvijek", 320 | "uz", 321 | "uza", 322 | "uzalud", 323 | "uzduž", 324 | "uzeti", 325 | "umalo", 326 | "unutra", 327 | "upotrebiti", 328 | "uprkos", 329 | "učinio", 330 | "učiniti", 331 | "halo", 332 | "hvala", 333 | "hej", 334 | "hm", 335 | "hop", 336 | "hoće", 337 | "hoćemo", 338 | "hoćete", 339 | "hoćeš", 340 | "hoću", 341 | "htedoste", 342 | "htedoh", 343 | "htedoše", 344 | "htela", 345 | "htele", 346 | "hteli", 347 | "hteo", 348 | "htejasmo", 349 | "htejaste", 350 | "htejahu", 351 | "hura", 352 | "često", 353 | "čijem", 354 | "čiji", 355 | "čijim", 356 | "čijima", 357 | "šic", 358 | "štagod", 359 | "što", 360 | "štogod", 361 | "ja", 362 | "je", 363 | "jedan", 364 | "jedini", 365 | "jedna", 366 | "jedne", 367 | "jedni", 368 | "jedno", 369 | "jednom", 370 | "jer", 371 | "jesam", 372 | "jesi", 373 | "jesmo", 374 | "jesu", 375 | "jim", 376 | "joj", 377 | "ju", 378 | "juče", 379 | "njegova", 380 | "njegovo", 381 | "njezin", 382 | "njezina", 383 | "njezino", 384 | "njemu", 385 | "njen", 386 | "njim", 387 | "njima", 388 | "njihova", 389 | "njihovo", 390 | "njoj", 391 | "nju", 392 | "će", 393 | "ćemo", 394 | "ćete", 395 | "ćeš", 396 | "ću", 397 | ]; 398 | -------------------------------------------------------------------------------- /src/stopwords/tam.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | pub static STOPWORDS_TAM: &[&str] = &[ 8 | "ஒரு", 9 | "என்று", 10 | "மற்றும்", 11 | "இந்த", 12 | "இது", 13 | "என்ற", 14 | "கொண்டு", 15 | "என்பது", 16 | "பல", 17 | "ஆகும்", 18 | "அல்லது", 19 | "அவர்", 20 | "நான்", 21 | "உள்ள", 22 | "அந்த", 23 | "இவர்", 24 | "என", 25 | "முதல்", 26 | "என்ன", 27 | "இருந்து", 28 | "சில", 29 | "என்", 30 | "போன்ற", 31 | "வேண்டும்", 32 | "வந்து", 33 | "இதன்", 34 | "அது", 35 | "அவன்", 36 | "தான்", 37 | "பலரும்", 38 | "என்னும்", 39 | "மேலும்", 40 | "பின்னர்", 41 | "கொண்ட", 42 | "இருக்கும்", 43 | "தனது", 44 | "உள்ளது", 45 | "போது", 46 | "என்றும்", 47 | "அதன்", 48 | "தன்", 49 | "பிறகு", 50 | "அவர்கள்", 51 | "வரை", 52 | "அவள்", 53 | "நீ", 54 | "ஆகிய", 55 | "இருந்தது", 56 | "உள்ளன", 57 | "வந்த", 58 | "இருந்த", 59 | "மிகவும்", 60 | "இங்கு", 61 | "மீது", 62 | "ஓர்", 63 | "இவை", 64 | "இந்தக்", 65 | "பற்றி", 66 | "வரும்", 67 | "வேறு", 68 | "இரு", 69 | "இதில்", 70 | "போல்", 71 | "இப்போது", 72 | "அவரது", 73 | "மட்டும்", 74 | "இந்தப்", 75 | "எனும்", 76 | "மேல்", 77 | "பின்", 78 | "சேர்ந்த", 79 | "ஆகியோர்", 80 | "எனக்கு", 81 | "இன்னும்", 82 | "அந்தப்", 83 | "அன்று", 84 | "ஒரே", 85 | "மிக", 86 | "அங்கு", 87 | "பல்வேறு", 88 | "விட்டு", 89 | "பெரும்", 90 | "அதை", 91 | "பற்றிய", 92 | "உன்", 93 | "அதிக", 94 | "அந்தக்", 95 | "பேர்", 96 | "இதனால்", 97 | "அவை", 98 | "அதே", 99 | "ஏன்", 100 | "முறை", 101 | "யார்", 102 | "என்பதை", 103 | "எல்லாம்", 104 | "மட்டுமே", 105 | "இங்கே", 106 | "அங்கே", 107 | "இடம்", 108 | "இடத்தில்", 109 | "அதில்", 110 | "நாம்", 111 | "அதற்கு", 112 | "எனவே", 113 | "பிற", 114 | "சிறு", 115 | "மற்ற", 116 | "விட", 117 | "எந்த", 118 | "எனவும்", 119 | "எனப்படும்", 120 | "எனினும்", 121 | "அடுத்த", 122 | "இதனை", 123 | "இதை", 124 | "கொள்ள", 125 | "இந்தத்", 126 | "இதற்கு", 127 | "அதனால்", 128 | "தவிர", 129 | "போல", 130 | "வரையில்", 131 | "சற்று", 132 | "எனக்", 133 | ]; 134 | -------------------------------------------------------------------------------- /src/stopwords/tel.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | // Notice: we do not have stopwords for this language yet. 8 | pub static STOPWORDS_TEL: &[&str] = &[]; 9 | -------------------------------------------------------------------------------- /src/stopwords/tgl.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2022, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | pub static STOPWORDS_TGL: &[&str] = &[ 8 | "akin", 9 | "aking", 10 | "ako", 11 | "alin", 12 | "am", 13 | "amin", 14 | "aming", 15 | "ang", 16 | "ano", 17 | "anumang", 18 | "apat", 19 | "at", 20 | "atin", 21 | "ating", 22 | "ay", 23 | "bababa", 24 | "bago", 25 | "bakit", 26 | "bawat", 27 | "bilang", 28 | "dahil", 29 | "dalawa", 30 | "dapat", 31 | "din", 32 | "dito", 33 | "doon", 34 | "gagawin", 35 | "gayunman", 36 | "ginagawa", 37 | "ginawa", 38 | "ginawang", 39 | "gumawa", 40 | "gusto", 41 | "habang", 42 | "hanggang", 43 | "hindi", 44 | "huwag", 45 | "iba", 46 | "ibaba", 47 | "ibabaw", 48 | "ibig", 49 | "ikaw", 50 | "ilagay", 51 | "ilalim", 52 | "ilan", 53 | "inyong", 54 | "isa", 55 | "isang", 56 | "itaas", 57 | "ito", 58 | "iyo", 59 | "iyon", 60 | "iyong", 61 | "ka", 62 | "kahit", 63 | "kailangan", 64 | "kailanman", 65 | "kami", 66 | "kanila", 67 | "kanilang", 68 | "kanino", 69 | "kanya", 70 | "kanyang", 71 | "kapag", 72 | "kapwa", 73 | "karamihan", 74 | "katiyakan", 75 | "katulad", 76 | "kaya", 77 | "kaysa", 78 | "ko", 79 | "kong", 80 | "kulang", 81 | "kumuha", 82 | "kung", 83 | "laban", 84 | "lahat", 85 | "lamang", 86 | "likod", 87 | "lima", 88 | "maaari", 89 | "maaaring", 90 | "maging", 91 | "mahusay", 92 | "makita", 93 | "marami", 94 | "marapat", 95 | "masyado", 96 | "may", 97 | "mayroon", 98 | "mga", 99 | "minsan", 100 | "mismo", 101 | "mula", 102 | "muli", 103 | "na", 104 | "nabanggit", 105 | "naging", 106 | "nagkaroon", 107 | "nais", 108 | "nakita", 109 | "namin", 110 | "napaka", 111 | "narito", 112 | "nasaan", 113 | "ng", 114 | "ngayon", 115 | "ni", 116 | "nila", 117 | "nilang", 118 | "nito", 119 | "niya", 120 | "niyang", 121 | "noon", 122 | "o", 123 | "pa", 124 | "paano", 125 | "pababa", 126 | "paggawa", 127 | "pagitan", 128 | "pagkakaroon", 129 | "pagkatapos", 130 | "palabas", 131 | "pamamagitan", 132 | "panahon", 133 | "pangalawa", 134 | "para", 135 | "paraan", 136 | "pareho", 137 | "pataas", 138 | "pero", 139 | "pumunta", 140 | "pumupunta", 141 | "sa", 142 | "saan", 143 | "sabi", 144 | "sabihin", 145 | "sarili", 146 | "sila", 147 | "sino", 148 | "siya", 149 | "tatlo", 150 | "tayo", 151 | "tulad", 152 | "tungkol", 153 | "una", 154 | "walang", 155 | ]; 156 | -------------------------------------------------------------------------------- /src/stopwords/tha.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | pub static STOPWORDS_THA: &[&str] = &[ 8 | "กล่าว", 9 | "กว่า", 10 | "กัน", 11 | "กับ", 12 | "การ", 13 | "ก็", 14 | "ก่อน", 15 | "ขณะ", 16 | "ขอ", 17 | "ของ", 18 | "ขึ้น", 19 | "คง", 20 | "ครั้ง", 21 | "ความ", 22 | "คือ", 23 | "จะ", 24 | "จัด", 25 | "จาก", 26 | "จึง", 27 | "ช่วง", 28 | "ซึ่ง", 29 | "ดัง", 30 | "ด้วย", 31 | "ด้าน", 32 | "ตั้ง", 33 | "ตั้งแต่", 34 | "ตาม", 35 | "ต่อ", 36 | "ต่าง", 37 | "ต่างๆ", 38 | "ต้อง", 39 | "ถึง", 40 | "ถูก", 41 | "ถ้า", 42 | "ทั้ง", 43 | "ทั้งนี้", 44 | "ทาง", 45 | "ทำ", 46 | "ทำให้", 47 | "ที่", 48 | "ที่สุด", 49 | "ทุก", 50 | "นอกจาก", 51 | "นัก", 52 | "นั้น", 53 | "นำ", 54 | "นี้", 55 | "น่า", 56 | "บาง", 57 | "ผล", 58 | "ผ่าน", 59 | "พบ", 60 | "พร้อม", 61 | "มา", 62 | "มาก", 63 | "มี", 64 | "ยัง", 65 | "รวม", 66 | "ระหว่าง", 67 | "รับ", 68 | "ราย", 69 | "ร่วม", 70 | "ลง", 71 | "วัน", 72 | "ว่า", 73 | "สำหรับ", 74 | "สุด", 75 | "ส่ง", 76 | "ส่วน", 77 | "หนึ่ง", 78 | "หรือ", 79 | "หลัง", 80 | "หลังจาก", 81 | "หลาย", 82 | "หาก", 83 | "อยาก", 84 | "อยู่", 85 | "อย่าง", 86 | "ออก", 87 | "อะไร", 88 | "อาจ", 89 | "อีก", 90 | "เขา", 91 | "เข้า", 92 | "เคย", 93 | "เฉพาะ", 94 | "เช่น", 95 | "เดียว", 96 | "เดียวกัน", 97 | "เนื่องจาก", 98 | "เปิด", 99 | "เปิดเผย", 100 | "เป็น", 101 | "เป็นการ", 102 | "เพราะ", 103 | "เพื่อ", 104 | "เมื่อ", 105 | "เรา", 106 | "เริ่ม", 107 | "เลย", 108 | "เห็น", 109 | "เอง", 110 | "แต่", 111 | "แบบ", 112 | "แรก", 113 | "และ", 114 | "แล้ว", 115 | "แห่ง", 116 | "โดย", 117 | "ใน", 118 | "ให้", 119 | "ได้", 120 | "ไป", 121 | "ไม่", 122 | "ไว้", 123 | ]; 124 | -------------------------------------------------------------------------------- /src/stopwords/tuk.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | // Notice: we do not have stopwords for this language yet. 8 | pub static STOPWORDS_TUK: &[&str] = &[]; 9 | -------------------------------------------------------------------------------- /src/stopwords/ukr.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | pub static STOPWORDS_UKR: &[&str] = &[ 8 | "але", 9 | "ви", 10 | "вона", 11 | "вони", 12 | "воно", 13 | "він", 14 | "в╡д", 15 | "з", 16 | "й", 17 | "коли", 18 | "ми", 19 | "нам", 20 | "про", 21 | "та", 22 | "ти", 23 | "хоча", 24 | "це", 25 | "цей", 26 | "чи", 27 | "чого", 28 | "що", 29 | "як", 30 | "яко╞", 31 | "із", 32 | "інших", 33 | "╙", 34 | "╞х", 35 | "╡", 36 | ]; 37 | -------------------------------------------------------------------------------- /src/stopwords/uzb.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | // Notice: we do not have stopwords for this language yet. 8 | pub static STOPWORDS_UZB: &[&str] = &[]; 9 | -------------------------------------------------------------------------------- /src/stopwords/yid.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | // This is an alias for HEB stopwords, but I may be mistaken there. 8 | pub static STOPWORDS_YID: &[&str] = &[ 9 | "אבל", 10 | "או", 11 | "אולי", 12 | "אותה", 13 | "אותו", 14 | "אותי", 15 | "אותך", 16 | "אותם", 17 | "אותן", 18 | "אותנו", 19 | "אז", 20 | "אחר", 21 | "אחרות", 22 | "אחרי", 23 | "אחריכן", 24 | "אחרים", 25 | "אחרת", 26 | "אי", 27 | "איזה", 28 | "איך", 29 | "אין", 30 | "איפה", 31 | "איתה", 32 | "איתו", 33 | "איתי", 34 | "איתך", 35 | "איתכם", 36 | "איתכן", 37 | "איתם", 38 | "איתן", 39 | "איתנו", 40 | "אך", 41 | "אל", 42 | "אלה", 43 | "אלו", 44 | "אם", 45 | "אנחנו", 46 | "אני", 47 | "אס", 48 | "אף", 49 | "אצל", 50 | "אשר", 51 | "את", 52 | "אתה", 53 | "אתכם", 54 | "אתכן", 55 | "אתם", 56 | "אתן", 57 | "באיזומידה", 58 | "באמצע", 59 | "באמצעות", 60 | "בגלל", 61 | "בין", 62 | "בלי", 63 | "במידה", 64 | "במקוםשבו", 65 | "ברם", 66 | "בשביל", 67 | "בשעהש", 68 | "בתוך", 69 | "גם", 70 | "דרך", 71 | "הוא", 72 | "היא", 73 | "היה", 74 | "היכן", 75 | "היתה", 76 | "היתי", 77 | "הם", 78 | "הן", 79 | "הנה", 80 | "הסיבהשבגללה", 81 | "הרי", 82 | "ואילו", 83 | "ואת", 84 | "זאת", 85 | "זה", 86 | "זות", 87 | "יהיה", 88 | "יוכל", 89 | "יוכלו", 90 | "יותרמדי", 91 | "יכול", 92 | "יכולה", 93 | "יכולות", 94 | "יכולים", 95 | "יכל", 96 | "יכלה", 97 | "יכלו", 98 | "יש", 99 | "כאן", 100 | "כאשר", 101 | "כולם", 102 | "כולן", 103 | "כזה", 104 | "כי", 105 | "כיצד", 106 | "כך", 107 | "ככה", 108 | "כל", 109 | "כלל", 110 | "כמו", 111 | "כן", 112 | "כפי", 113 | "כש", 114 | "לא", 115 | "לאו", 116 | "לאיזותכלית", 117 | "לאן", 118 | "לבין", 119 | "לה", 120 | "להיות", 121 | "להם", 122 | "להן", 123 | "לו", 124 | "לי", 125 | "לכם", 126 | "לכן", 127 | "למה", 128 | "למטה", 129 | "למעלה", 130 | "למקוםשבו", 131 | "למרות", 132 | "לנו", 133 | "לעבר", 134 | "לעיכן", 135 | "לפיכך", 136 | "לפני", 137 | "מאד", 138 | "מאחורי", 139 | "מאיזוסיבה", 140 | "מאין", 141 | "מאיפה", 142 | "מבלי", 143 | "מבעד", 144 | "מדוע", 145 | "מה", 146 | "מהיכן", 147 | "מול", 148 | "מחוץ", 149 | "מי", 150 | "מכאן", 151 | "מכיוון", 152 | "מלבד", 153 | "מן", 154 | "מנין", 155 | "מסוגל", 156 | "מעט", 157 | "מעטים", 158 | "מעל", 159 | "מצד", 160 | "מקוםבו", 161 | "מתחת", 162 | "מתי", 163 | "נגד", 164 | "נגר", 165 | "נו", 166 | "עד", 167 | "עז", 168 | "על", 169 | "עלי", 170 | "עליה", 171 | "עליהם", 172 | "עליהן", 173 | "עליו", 174 | "עליך", 175 | "עליכם", 176 | "עלינו", 177 | "עם", 178 | "עצמה", 179 | "עצמהם", 180 | "עצמהן", 181 | "עצמו", 182 | "עצמי", 183 | "עצמם", 184 | "עצמן", 185 | "עצמנו", 186 | "פה", 187 | "רק", 188 | "שוב", 189 | "של", 190 | "שלה", 191 | "שלהם", 192 | "שלהן", 193 | "שלו", 194 | "שלי", 195 | "שלך", 196 | "שלכה", 197 | "שלכם", 198 | "שלכן", 199 | "שלנו", 200 | "שם", 201 | "תהיה", 202 | "תחת", 203 | ]; 204 | -------------------------------------------------------------------------------- /src/stopwords/zul.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | pub static STOPWORDS_ZUL: &[&str] = &[ 8 | "futhi", 9 | "kahle", 10 | "kakhulu", 11 | "kanye", 12 | "khona", 13 | "kodwa", 14 | "kungani", 15 | "kusho", 16 | "la", 17 | "lakhe", 18 | "lapho", 19 | "mina", 20 | "ngesikhathi", 21 | "nje", 22 | "phansi", 23 | "phezulu", 24 | "u", 25 | "ukuba", 26 | "ukuthi", 27 | "ukuze", 28 | "uma", 29 | "wahamba", 30 | "wakhe", 31 | "wami", 32 | "wase", 33 | "wathi", 34 | "yakhe", 35 | "zakhe", 36 | "zonke", 37 | ]; 38 | -------------------------------------------------------------------------------- /src/store/generic.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | use core::cmp::Eq; 8 | use core::hash::Hash; 9 | use hashbrown::HashMap; 10 | use std::fmt::Display; 11 | use std::sync::{Arc, RwLock}; 12 | use std::time::{Duration, SystemTime}; 13 | 14 | pub trait StoreGeneric { 15 | fn ref_last_used(&self) -> &RwLock; 16 | } 17 | 18 | pub trait StoreGenericPool< 19 | K: Hash + Eq + Copy + Display, 20 | S: StoreGeneric, 21 | B: StoreGenericBuilder, 22 | > 23 | { 24 | fn proceed_acquire_cache( 25 | kind: &str, 26 | collection_str: &str, 27 | pool_key: K, 28 | store: &Arc, 29 | ) -> Result, ()> { 30 | debug!( 31 | "{} store acquired from pool for collection: {} (pool key: {})", 32 | kind, collection_str, pool_key 33 | ); 34 | 35 | // Bump store last used date (avoids early janitor eviction) 36 | let mut last_used_value = store.ref_last_used().write().unwrap(); 37 | 38 | *last_used_value = SystemTime::now(); 39 | 40 | // Perform an early drop of the lock (frees up write lock early) 41 | drop(last_used_value); 42 | 43 | Ok(store.clone()) 44 | } 45 | 46 | fn proceed_acquire_open( 47 | kind: &str, 48 | collection_str: &str, 49 | pool_key: K, 50 | pool: &Arc>>>, 51 | ) -> Result, ()> { 52 | match B::build(pool_key) { 53 | Ok(store) => { 54 | // Acquire a thread-safe store pool reference in write mode 55 | let mut store_pool_write = pool.write().unwrap(); 56 | let store_box = Arc::new(store); 57 | 58 | store_pool_write.insert(pool_key, store_box.clone()); 59 | 60 | debug!( 61 | "opened and cached {} store in pool for collection: {} (pool key: {})", 62 | kind, collection_str, pool_key 63 | ); 64 | 65 | Ok(store_box) 66 | } 67 | Err(_) => { 68 | error!( 69 | "failed opening {} store for collection: {} (pool key: {})", 70 | kind, collection_str, pool_key 71 | ); 72 | 73 | Err(()) 74 | } 75 | } 76 | } 77 | 78 | fn proceed_janitor( 79 | kind: &str, 80 | pool: &Arc>>>, 81 | inactive_after: u64, 82 | access_lock: &Arc>, 83 | ) { 84 | debug!("scanning for {} store pool items to janitor", kind); 85 | 86 | // Acquire access lock (in blocking write mode), and reference it in context 87 | // Notice: this prevents store to be acquired from any context 88 | let _access = access_lock.write().unwrap(); 89 | 90 | let mut removal_register: Vec = Vec::new(); 91 | 92 | for (collection_bucket, store) in pool.read().unwrap().iter() { 93 | // Important: be lenient with system clock going back to a past duration, since \ 94 | // we may be running in a virtualized environment where clock is not guaranteed \ 95 | // to be monotonic. This is done to avoid poisoning associated mutexes by \ 96 | // crashing on unwrap(). 97 | let last_used_elapsed = store 98 | .ref_last_used() 99 | .read() 100 | .unwrap() 101 | .elapsed() 102 | .unwrap_or_else(|err| { 103 | error!( 104 | "store pool item: {} last used duration clock issue, zeroing: {}", 105 | collection_bucket, err 106 | ); 107 | 108 | // Assuming a zero seconds fallback duration 109 | Duration::from_secs(0) 110 | }) 111 | .as_secs(); 112 | 113 | if last_used_elapsed >= inactive_after { 114 | debug!( 115 | "found expired {} store pool item: {}; elapsed time: {}s", 116 | kind, collection_bucket, last_used_elapsed 117 | ); 118 | 119 | // Notice: the bucket value needs to be cloned, as we cannot reference as value \ 120 | // that will outlive referenced value once we remove it from its owner set. 121 | removal_register.push(*collection_bucket); 122 | } else { 123 | debug!( 124 | "found non-expired {} store pool item: {}; elapsed time: {}s", 125 | kind, collection_bucket, last_used_elapsed 126 | ); 127 | } 128 | } 129 | 130 | if !removal_register.is_empty() { 131 | let mut store_pool_write = pool.write().unwrap(); 132 | 133 | for collection_bucket in &removal_register { 134 | store_pool_write.remove(collection_bucket); 135 | } 136 | } 137 | 138 | info!( 139 | "done scanning for {} store pool items to janitor, expired {} items, now has {} items", 140 | kind, 141 | removal_register.len(), 142 | pool.read().unwrap().len() 143 | ); 144 | } 145 | } 146 | 147 | pub trait StoreGenericBuilder { 148 | fn build(pool_key: K) -> Result; 149 | } 150 | 151 | pub trait StoreGenericActionBuilder { 152 | fn proceed_erase_collection(collection_str: &str) -> Result; 153 | 154 | fn proceed_erase_bucket(collection_str: &str, bucket_str: &str) -> Result; 155 | 156 | fn dispatch_erase<'a, T: Into<&'a str>>( 157 | kind: &str, 158 | collection: T, 159 | bucket: Option, 160 | ) -> Result { 161 | let collection_str = collection.into(); 162 | 163 | info!("{} erase requested on collection: {}", kind, collection_str); 164 | 165 | if let Some(bucket) = bucket { 166 | Self::proceed_erase_bucket(collection_str, bucket.into()) 167 | } else { 168 | Self::proceed_erase_collection(collection_str) 169 | } 170 | } 171 | } 172 | -------------------------------------------------------------------------------- /src/store/identifiers.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | use std::hash::Hasher; 8 | use twox_hash::XxHash32; 9 | 10 | pub type StoreObjectIID = u32; 11 | pub type StoreObjectOID<'a> = &'a str; 12 | pub type StoreTermHashed = u32; 13 | 14 | pub struct StoreTermHash; 15 | 16 | pub enum StoreMetaKey { 17 | IIDIncr, 18 | } 19 | 20 | pub enum StoreMetaValue { 21 | IIDIncr(StoreObjectIID), 22 | } 23 | 24 | impl StoreMetaKey { 25 | pub fn as_u32(&self) -> u32 { 26 | match self { 27 | StoreMetaKey::IIDIncr => 0, 28 | } 29 | } 30 | } 31 | 32 | impl StoreTermHash { 33 | pub fn from(term: &str) -> StoreTermHashed { 34 | let mut hasher = XxHash32::with_seed(0); 35 | 36 | hasher.write(term.as_bytes()); 37 | 38 | hasher.finish() as u32 39 | } 40 | } 41 | 42 | #[cfg(test)] 43 | mod tests { 44 | use super::*; 45 | 46 | #[test] 47 | fn it_converts_meta_key_to_u32() { 48 | assert_eq!(StoreMetaKey::IIDIncr.as_u32(), 0); 49 | } 50 | 51 | #[test] 52 | fn it_hashes_term() { 53 | assert_eq!(StoreTermHash::from("hash:1"), 3637660813); 54 | assert_eq!(StoreTermHash::from("hash:2"), 3577985381); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/store/item.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | pub struct StoreItemBuilder; 8 | 9 | #[derive(PartialEq, Debug)] 10 | pub struct StoreItem<'a>( 11 | pub StoreItemPart<'a>, 12 | pub Option>, 13 | pub Option>, 14 | ); 15 | 16 | #[derive(Copy, Clone, PartialEq, Debug)] 17 | pub struct StoreItemPart<'a>(&'a str); 18 | 19 | // TODO: Change variant names 20 | #[allow(clippy::enum_variant_names)] 21 | #[derive(PartialEq, Debug)] 22 | pub enum StoreItemError { 23 | InvalidCollection, 24 | InvalidBucket, 25 | InvalidObject, 26 | } 27 | 28 | const STORE_ITEM_PART_LEN_MIN: usize = 0; 29 | const STORE_ITEM_PART_LEN_MAX: usize = 128; 30 | 31 | impl<'a> StoreItemPart<'a> { 32 | pub fn from_str(part: &'a str) -> Result { 33 | let len = part.len(); 34 | 35 | if len > STORE_ITEM_PART_LEN_MIN 36 | && len <= STORE_ITEM_PART_LEN_MAX 37 | && part.chars().all(|character| character.is_ascii()) 38 | { 39 | Ok(StoreItemPart(part)) 40 | } else { 41 | Err(()) 42 | } 43 | } 44 | 45 | pub fn as_str(&self) -> &'a str { 46 | self.0 47 | } 48 | } 49 | 50 | impl<'a> From> for &'a str { 51 | fn from(part: StoreItemPart<'a>) -> Self { 52 | part.as_str() 53 | } 54 | } 55 | 56 | impl StoreItemBuilder { 57 | pub fn from_depth_1(collection: &str) -> Result { 58 | // Validate & box collection 59 | if let Ok(collection_item) = StoreItemPart::from_str(collection) { 60 | Ok(StoreItem(collection_item, None, None)) 61 | } else { 62 | Err(StoreItemError::InvalidCollection) 63 | } 64 | } 65 | 66 | pub fn from_depth_2<'a>( 67 | collection: &'a str, 68 | bucket: &'a str, 69 | ) -> Result, StoreItemError> { 70 | // Validate & box collection + bucket 71 | match ( 72 | StoreItemPart::from_str(collection), 73 | StoreItemPart::from_str(bucket), 74 | ) { 75 | (Ok(collection_item), Ok(bucket_item)) => { 76 | Ok(StoreItem(collection_item, Some(bucket_item), None)) 77 | } 78 | (Err(_), _) => Err(StoreItemError::InvalidCollection), 79 | (_, Err(_)) => Err(StoreItemError::InvalidBucket), 80 | } 81 | } 82 | 83 | pub fn from_depth_3<'a>( 84 | collection: &'a str, 85 | bucket: &'a str, 86 | object: &'a str, 87 | ) -> Result, StoreItemError> { 88 | // Validate & box collection + bucket + object 89 | match ( 90 | StoreItemPart::from_str(collection), 91 | StoreItemPart::from_str(bucket), 92 | StoreItemPart::from_str(object), 93 | ) { 94 | (Ok(collection_item), Ok(bucket_item), Ok(object_item)) => Ok(StoreItem( 95 | collection_item, 96 | Some(bucket_item), 97 | Some(object_item), 98 | )), 99 | (Err(_), _, _) => Err(StoreItemError::InvalidCollection), 100 | (_, Err(_), _) => Err(StoreItemError::InvalidBucket), 101 | (_, _, Err(_)) => Err(StoreItemError::InvalidObject), 102 | } 103 | } 104 | } 105 | 106 | #[cfg(test)] 107 | mod tests { 108 | use super::*; 109 | 110 | #[test] 111 | fn it_builds_store_item_depth_1() { 112 | assert_eq!( 113 | StoreItemBuilder::from_depth_1("c:test:1"), 114 | Ok(StoreItem(StoreItemPart("c:test:1"), None, None)) 115 | ); 116 | assert_eq!( 117 | StoreItemBuilder::from_depth_1(""), 118 | Err(StoreItemError::InvalidCollection) 119 | ); 120 | } 121 | 122 | #[test] 123 | fn it_builds_store_item_depth_2() { 124 | assert_eq!( 125 | StoreItemBuilder::from_depth_2("c:test:2", "b:test:2"), 126 | Ok(StoreItem( 127 | StoreItemPart("c:test:2"), 128 | Some(StoreItemPart("b:test:2")), 129 | None 130 | )) 131 | ); 132 | assert_eq!( 133 | StoreItemBuilder::from_depth_2("", "b:test:2"), 134 | Err(StoreItemError::InvalidCollection) 135 | ); 136 | assert_eq!( 137 | StoreItemBuilder::from_depth_2("c:test:2", ""), 138 | Err(StoreItemError::InvalidBucket) 139 | ); 140 | } 141 | 142 | #[test] 143 | fn it_builds_store_item_depth_3() { 144 | assert_eq!( 145 | StoreItemBuilder::from_depth_3("c:test:3", "b:test:3", "o:test:3"), 146 | Ok(StoreItem( 147 | StoreItemPart("c:test:3"), 148 | Some(StoreItemPart("b:test:3")), 149 | Some(StoreItemPart("o:test:3")) 150 | )) 151 | ); 152 | assert_eq!( 153 | StoreItemBuilder::from_depth_3("", "b:test:3", "o:test:3"), 154 | Err(StoreItemError::InvalidCollection) 155 | ); 156 | assert_eq!( 157 | StoreItemBuilder::from_depth_3("c:test:3", "", "o:test:3"), 158 | Err(StoreItemError::InvalidBucket) 159 | ); 160 | assert_eq!( 161 | StoreItemBuilder::from_depth_3("c:test:3", "b:test:3", ""), 162 | Err(StoreItemError::InvalidObject) 163 | ); 164 | } 165 | } 166 | -------------------------------------------------------------------------------- /src/store/macros.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | #[macro_export] 8 | macro_rules! io_error { 9 | ($error:expr) => { 10 | io::Error::new(io::ErrorKind::Other, $error) 11 | }; 12 | } 13 | -------------------------------------------------------------------------------- /src/store/mod.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | #[macro_use] 8 | mod macros; 9 | 10 | mod generic; 11 | mod keyer; 12 | 13 | pub mod fst; 14 | pub mod identifiers; 15 | pub mod item; 16 | pub mod kv; 17 | pub mod operation; 18 | -------------------------------------------------------------------------------- /src/store/operation.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | use crate::executor::count::ExecutorCount; 8 | use crate::executor::flushb::ExecutorFlushB; 9 | use crate::executor::flushc::ExecutorFlushC; 10 | use crate::executor::flusho::ExecutorFlushO; 11 | use crate::executor::list::ExecutorList; 12 | use crate::executor::pop::ExecutorPop; 13 | use crate::executor::push::ExecutorPush; 14 | use crate::executor::search::ExecutorSearch; 15 | use crate::executor::suggest::ExecutorSuggest; 16 | use crate::query::actions::Query; 17 | 18 | pub struct StoreOperationDispatch; 19 | 20 | impl StoreOperationDispatch { 21 | pub fn dispatch(query: Query) -> Result, ()> { 22 | // Dispatch de-constructed query to its target executor 23 | match query { 24 | Query::Search(store, query_id, lexer, limit, offset) => { 25 | ExecutorSearch::execute(store, query_id, lexer, limit, offset) 26 | .map(|results| results.map(|results| results.join(" "))) 27 | } 28 | Query::Suggest(store, query_id, lexer, limit) => { 29 | ExecutorSuggest::execute(store, query_id, lexer, limit) 30 | .map(|results| results.map(|results| results.join(" "))) 31 | } 32 | Query::List(store, query_id, limit, offset) => { 33 | ExecutorList::execute(store, query_id, limit, offset) 34 | .map(|results| results.join(" ")) 35 | .map(|results| Some(results)) 36 | } 37 | Query::Push(store, lexer) => ExecutorPush::execute(store, lexer).map(|_| None), 38 | Query::Pop(store, lexer) => { 39 | ExecutorPop::execute(store, lexer).map(|count| Some(count.to_string())) 40 | } 41 | Query::Count(store) => { 42 | ExecutorCount::execute(store).map(|count| Some(count.to_string())) 43 | } 44 | Query::FlushC(store) => { 45 | ExecutorFlushC::execute(store).map(|count| Some(count.to_string())) 46 | } 47 | Query::FlushB(store) => { 48 | ExecutorFlushB::execute(store).map(|count| Some(count.to_string())) 49 | } 50 | Query::FlushO(store) => { 51 | ExecutorFlushO::execute(store).map(|count| Some(count.to_string())) 52 | } 53 | } 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /src/tasker/mod.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | pub mod runtime; 8 | pub mod shutdown; 9 | -------------------------------------------------------------------------------- /src/tasker/runtime.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | use std::thread; 8 | use std::time::{Duration, Instant}; 9 | 10 | use crate::store::fst::StoreFSTPool; 11 | use crate::store::kv::StoreKVPool; 12 | 13 | pub struct TaskerBuilder; 14 | pub struct Tasker; 15 | 16 | const TASKER_TICK_INTERVAL: Duration = Duration::from_secs(10); 17 | 18 | impl TaskerBuilder { 19 | pub fn build() -> Tasker { 20 | Tasker {} 21 | } 22 | } 23 | 24 | impl Tasker { 25 | pub fn run(&self) { 26 | info!("tasker is now active"); 27 | 28 | loop { 29 | // Hold for next aggregate run 30 | thread::sleep(TASKER_TICK_INTERVAL); 31 | 32 | debug!("running a tasker tick..."); 33 | 34 | let tick_start = Instant::now(); 35 | 36 | Self::tick(); 37 | 38 | let tick_took = tick_start.elapsed(); 39 | 40 | info!( 41 | "ran tasker tick (took {}s + {}ms)", 42 | tick_took.as_secs(), 43 | tick_took.subsec_millis() 44 | ); 45 | } 46 | } 47 | 48 | fn tick() { 49 | // Proceed all tick actions 50 | 51 | // #1: Janitors 52 | StoreKVPool::janitor(); 53 | StoreFSTPool::janitor(); 54 | 55 | // #2: Others 56 | StoreKVPool::flush(false); 57 | StoreFSTPool::consolidate(false); 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /src/tasker/shutdown.rs: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Valerian Saliou 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | #[cfg(windows)] 8 | mod platform { 9 | // Notice: the following module is inspired from a fork of `graceful`, which implements \ 10 | // Windows support upon the original `graceful` crate; find the fork at: \ 11 | // https://github.com/Git0Shuai/graceful 12 | 13 | use std::sync::mpsc::{sync_channel, Receiver, SyncSender}; 14 | use std::sync::Mutex; 15 | 16 | use winapi::shared::minwindef::{BOOL, DWORD, TRUE}; 17 | use winapi::um::consoleapi::SetConsoleCtrlHandler; 18 | 19 | lazy_static! { 20 | static ref CHANNEL: (SyncSender, Mutex>) = { 21 | let channel = sync_channel(0); 22 | 23 | (channel.0, Mutex::new(channel.1)) 24 | }; 25 | } 26 | 27 | unsafe extern "system" fn handler(event: DWORD) -> BOOL { 28 | CHANNEL.0.send(event).unwrap(); 29 | CHANNEL.0.send(0).unwrap(); 30 | 31 | TRUE 32 | } 33 | 34 | pub struct ShutdownSignal; 35 | 36 | impl ShutdownSignal { 37 | pub fn new() -> ShutdownSignal { 38 | unsafe { SetConsoleCtrlHandler(Some(handler), TRUE) }; 39 | 40 | ShutdownSignal 41 | } 42 | 43 | pub fn at_exit(&self, handler: F) { 44 | let event = { 45 | let receiver = CHANNEL.1.lock().unwrap(); 46 | 47 | receiver.recv().unwrap() 48 | }; 49 | 50 | handler(event as usize); 51 | 52 | CHANNEL.1.lock().unwrap().recv().unwrap(); 53 | } 54 | } 55 | } 56 | 57 | #[cfg(unix)] 58 | mod platform { 59 | // Notice: the following module is inspired from `graceful`, which can be found at: \ 60 | // https://github.com/0x1997/graceful 61 | 62 | use nix::sys::signal::{SigSet, SIGINT, SIGQUIT, SIGTERM}; 63 | 64 | pub struct ShutdownSignal(SigSet); 65 | 66 | impl ShutdownSignal { 67 | pub fn new() -> ShutdownSignal { 68 | let mut mask = SigSet::empty(); 69 | 70 | ShutdownSignal::init(&mut mask).unwrap(); 71 | ShutdownSignal(mask) 72 | } 73 | 74 | fn init(mask: &mut SigSet) -> nix::Result<()> { 75 | mask.add(SIGINT); 76 | mask.add(SIGQUIT); 77 | mask.add(SIGTERM); 78 | 79 | mask.thread_block() 80 | } 81 | 82 | pub fn at_exit(&self, handler: F) { 83 | let signal = self.0.wait().unwrap(); 84 | 85 | handler(signal as usize); 86 | } 87 | } 88 | } 89 | 90 | pub use platform::ShutdownSignal; 91 | -------------------------------------------------------------------------------- /tests/integration/.gitignore: -------------------------------------------------------------------------------- 1 | instance/data/ 2 | 3 | runner/node_modules/ 4 | -------------------------------------------------------------------------------- /tests/integration/instance/config.cfg: -------------------------------------------------------------------------------- 1 | # Sonic 2 | # Configuration file (integration tests) 3 | 4 | [server] 5 | 6 | log_level = "warn" 7 | 8 | [channel] 9 | 10 | inet = "127.0.0.1:1491" 11 | auth_password = "password:test" 12 | 13 | [channel.search] 14 | 15 | [store] 16 | 17 | [store.kv] 18 | [store.kv.pool] 19 | [store.kv.database] 20 | 21 | [store.fst] 22 | [store.fst.pool] 23 | [store.fst.graph] 24 | -------------------------------------------------------------------------------- /tests/integration/runner/package-lock.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "sonic-tests-integration", 3 | "version": "1.0.0", 4 | "lockfileVersion": 1, 5 | "requires": true, 6 | "dependencies": { 7 | "sonic-channel": { 8 | "version": "1.2.5", 9 | "resolved": "https://registry.npmjs.org/sonic-channel/-/sonic-channel-1.2.5.tgz", 10 | "integrity": "sha512-tgyV+l98yT4jSOFztj1BKn4DammFS7Cdaaxn9EnCMYHJ1N1sD+IQDsyrA9lKaJV1gRQRHNPc9HAsRQP2L/BR1g==" 11 | } 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /tests/integration/runner/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "sonic-tests-integration", 3 | "description": "Sonic integration tests", 4 | "version": "1.0.0", 5 | "main": "runner.js", 6 | "homepage": "https://github.com/valeriansaliou/sonic", 7 | "license": "ISC", 8 | "engineStrict": true, 9 | "engines": { 10 | "node": ">=10.0.0", 11 | "npm": ">=6.0.0" 12 | }, 13 | "scripts": { 14 | "test": "echo \"Error: no test specified\" && exit 1" 15 | }, 16 | "author": { 17 | "name": "Nikita Vilunov", 18 | "email": "nikitaoryol@gmail.com" 19 | }, 20 | "dependencies": { 21 | "sonic-channel": "^1.2.5" 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /tests/integration/runner/runner.js: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Nikita Vilunov 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | const SonicChannel = require("sonic-channel"); 8 | 9 | function connect(channel, name) { 10 | return new Promise((resolve, reject) => { 11 | channel.connect({ 12 | connected() { 13 | console.info( 14 | `=== Sonic Channel succeeded to connect to host (${name}) ===` 15 | ); 16 | 17 | resolve(channel); 18 | }, 19 | 20 | disconnected() { 21 | console.error(`=== Sonic Channel is now disconnected (${name}) ===`); 22 | }, 23 | 24 | timeout() { 25 | console.error(`=== Sonic Channel connection timed out (${name}) ===`); 26 | }, 27 | 28 | retrying() { 29 | console.error(`=== Trying to reconnect to Sonic Channel (${name}) ===`); 30 | }, 31 | 32 | error(error) { 33 | console.error( 34 | `=== Sonic Channel failed to connect to host (${name}) ===`, error 35 | ); 36 | 37 | reject(error); 38 | } 39 | }); 40 | }); 41 | } 42 | 43 | async function main(scenario) { 44 | let parameters = { 45 | host : "localhost", 46 | port : 1491, 47 | auth : "password:test" 48 | }; 49 | 50 | // Connect to Sonic Channel 51 | let search = new SonicChannel.Search(parameters); 52 | let ingest = new SonicChannel.Ingest(parameters); 53 | 54 | await Promise.all([ 55 | connect(search, "search"), 56 | connect(ingest, "ingest") 57 | ]); 58 | 59 | // Run scenario 60 | await scenario(search, ingest); 61 | 62 | // Close Sonic Channel 63 | await Promise.all([ 64 | search.close(), 65 | ingest.close() 66 | ]); 67 | } 68 | 69 | function wrapper(name, scenario, timeout) { 70 | console.log(`=== Running test scenario ${name} ===`) 71 | 72 | timeout = (timeout || 1000); 73 | 74 | let timer = new Promise((_, reject) => { 75 | setTimeout(() => { 76 | reject("Timeout reached"); 77 | }, timeout); 78 | }); 79 | 80 | let start = process.hrtime(); 81 | 82 | Promise.race([ 83 | main(scenario), timer 84 | ]) 85 | .then( 86 | () => { 87 | let end = process.hrtime(start); 88 | 89 | console.log( 90 | `=== Test scenario ${name} succedeed, execution time: ` + 91 | `${end[0] + end[1] / 1e9} s ===` 92 | ); 93 | }, 94 | 95 | (error) => { 96 | let end = process.hrtime(start); 97 | 98 | console.error( 99 | (`=== Test scenario ${name} failed, execution time: ` + 100 | `${end[0] + end[1] / 1e9} s ===`), 101 | `\nERROR >> ${error}` 102 | ); 103 | 104 | process.exit(-1); 105 | } 106 | ); 107 | } 108 | 109 | module.exports = wrapper; 110 | -------------------------------------------------------------------------------- /tests/integration/scenarios/insert.js: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Nikita Vilunov 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | const expected_documents = { 8 | "conversation:1" : ( 9 | "Batch normalization is a technique for improving the speed, " + 10 | "performance, and stability of artificial neural networks" 11 | ), 12 | 13 | "conversation:2" : ( 14 | "This scratch technique is much like the transform in some ways" 15 | ) 16 | }; 17 | 18 | const unexpected_documents = { 19 | "conversation:3" : "Glissando is a glide from one pitch to another" 20 | } 21 | 22 | async function run(search, ingest) { 23 | // Ingest documents 24 | for (const key in expected_documents) { 25 | await ingest.push("messages", "default", key, expected_documents[key]); 26 | } 27 | 28 | for (const key in unexpected_documents) { 29 | await ingest.push("messages", "default", key, unexpected_documents[key]); 30 | } 31 | 32 | // Perform search on ingested documents 33 | let response = await search.query("messages", "default", "technique"); 34 | 35 | for (const key in expected_documents) { 36 | if (!response.includes(key) === true) { 37 | throw `Expected document ${key} was not found`; 38 | } 39 | } 40 | 41 | for (const key in unexpected_documents) { 42 | if (response.includes(key) === true) { 43 | throw `Unexpected document ${key} was returned`; 44 | } 45 | } 46 | } 47 | 48 | require("../runner/runner.js")( 49 | "Insert & Search", run 50 | ); 51 | -------------------------------------------------------------------------------- /tests/integration/scenarios/ping.js: -------------------------------------------------------------------------------- 1 | // Sonic 2 | // 3 | // Fast, lightweight and schema-less search backend 4 | // Copyright: 2019, Nikita Vilunov 5 | // License: Mozilla Public License v2.0 (MPL v2.0) 6 | 7 | async function run(search) { 8 | // Perform a ping 9 | await search.ping(); 10 | } 11 | 12 | require("../runner/runner.js")( 13 | "Ping", run 14 | ); 15 | -------------------------------------------------------------------------------- /tests/integration/scripts/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ## 4 | # Sonic 5 | # Fast, lightweight and schema-less search backend 6 | # 7 | # Copyright: 2019, Nikita Vilunov , \ 8 | # 2019, Valerian Saliou 9 | # License: Mozilla Public License v2.0 (MPL v2.0) 10 | ## 11 | 12 | ABSPATH=$(cd "$(dirname "$0")"; pwd) 13 | TESTSPATH="$ABSPATH/../" 14 | 15 | STATUS=0 16 | 17 | # Run tests 18 | pushd "$TESTSPATH" > /dev/null 19 | # Install test dependencies from a clean state 20 | pushd "./runner/" > /dev/null 21 | npm ci 22 | popd 23 | 24 | # Run each test scenario 25 | for scenario in $(find ./scenarios/ -name "*.js") 26 | do 27 | [[ -d ./instance/data/ ]] && rm -r ./instance/data/ 28 | 29 | # Run sonic from a clean state 30 | pushd "./instance/" > /dev/null 31 | cargo run -- --config config.cfg & 32 | SONIC_PID=$! 33 | sleep 2 34 | popd 35 | 36 | # Run scenario 37 | node $scenario 38 | 39 | [[ $? -eq 0 ]] || STATUS=1 40 | 41 | # Stop Sonic 42 | kill $SONIC_PID 43 | wait $SONIC_PID 44 | done 45 | 46 | [[ -d ./instance/data/ ]] && rm -r ./instance/data/ 47 | popd 48 | 49 | exit $STATUS 50 | --------------------------------------------------------------------------------