├── rustfmt.toml ├── .github ├── FUNDING.yml ├── ISSUE_TEMPLATE │ ├── feature_request.md │ └── bug_report.md ├── dependabot.yml └── workflows │ └── rust.yml ├── config ├── logging.toml ├── config-bench.toml ├── config.toml └── config-rpc.toml ├── codecov.yml ├── toshi-proto ├── src │ └── lib.rs ├── build.rs ├── Cargo.toml └── proto │ ├── cluster.proto │ └── eraftpb.proto ├── SECURITY.md ├── doc.json ├── toshi-server ├── src │ ├── handlers │ │ ├── mod.rs │ │ ├── list.rs │ │ ├── root.rs │ │ ├── summary.rs │ │ ├── index.rs │ │ ├── bulk.rs │ │ └── search.rs │ ├── shutdown.rs │ ├── utils.rs │ ├── lib.rs │ ├── bin │ │ └── toshi.rs │ ├── router.rs │ ├── commit.rs │ ├── index.rs │ ├── settings.rs │ └── handle.rs ├── tests │ └── lib.rs └── Cargo.toml ├── toshi-types ├── src │ ├── extra_errors.rs │ ├── query │ │ ├── facet.rs │ │ ├── term.rs │ │ ├── regex.rs │ │ ├── fuzzy.rs │ │ ├── phrase.rs │ │ ├── range.rs │ │ ├── mod.rs │ │ └── boolean.rs │ ├── server.rs │ ├── error.rs │ ├── client.rs │ └── lib.rs └── Cargo.toml ├── Cargo.toml ├── toshi-client ├── examples │ ├── sync_search.rs │ ├── exact_query.rs │ ├── range_query.rs │ ├── hyper_example.rs │ └── bool_query.rs ├── src │ ├── error.rs │ ├── lib.rs │ ├── hyper_client.rs │ └── isahc_client.rs └── Cargo.toml ├── ci ├── azure-run-kcov.yml ├── azure-install-deps.yml ├── azure-install-rust.yml └── coverage.sh ├── toshi-raft ├── Cargo.toml └── src │ ├── proposal.rs │ ├── rpc_utils.rs │ ├── raft_io.rs │ ├── handle.rs │ ├── rpc_server.rs │ └── lib.rs ├── LICENSE ├── schema.json ├── docs └── api.raml ├── .gitignore ├── requests.http └── README.md /rustfmt.toml: -------------------------------------------------------------------------------- 1 | max_width = 140 2 | reorder_imports = true 3 | newline_style = "Unix" 4 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: hntd187 4 | -------------------------------------------------------------------------------- /config/logging.toml: -------------------------------------------------------------------------------- 1 | type = "terminal" 2 | level = "info" 3 | timezone = "utc" 4 | format = "full" -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | comment: off 2 | coverage: 3 | status: 4 | patch: 5 | default: 6 | informational: true 7 | project: 8 | default: 9 | informational: true -------------------------------------------------------------------------------- /toshi-proto/src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod cluster_rpc { 2 | tonic::include_proto!("clusterrpc"); 3 | 4 | pub use index_service_client as client; 5 | pub use index_service_server as server; 6 | } 7 | -------------------------------------------------------------------------------- /toshi-proto/build.rs: -------------------------------------------------------------------------------- 1 | fn main() -> std::io::Result<()> { 2 | tonic_build::configure() 3 | .build_client(true) 4 | .build_server(true) 5 | .extern_path(".eraftpb.Message", "raft::eraftpb::Message") 6 | .compile(&["proto/cluster.proto"], &["proto/"]) 7 | } 8 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | ## Supported Versions 4 | 5 | Report security issues for... 6 | 7 | | Version | Supported | 8 | | ------- | ------------------ | 9 | | master | :white_check_mark: | 10 | 11 | 12 | ## Reporting a Vulnerability 13 | 14 | Email me shcarman AT gmail DOT com 15 | -------------------------------------------------------------------------------- /doc.json: -------------------------------------------------------------------------------- 1 | { 2 | "options": { "commit": true }, 3 | "document": { 4 | "song": "he-still-loves-me-f-choir-from-fighting-temptations", 5 | "year": 2007, 6 | "artist": "beyonce-knowles", 7 | "genre": "Pop", 8 | "lyrics": "Took me a while but I'm finally here", 9 | "idx": 23 10 | } 11 | } -------------------------------------------------------------------------------- /toshi-server/src/handlers/mod.rs: -------------------------------------------------------------------------------- 1 | use hyper::Body; 2 | 3 | pub use {bulk::*, index::*, list::*, root::*, search::*, summary::*}; 4 | 5 | pub mod bulk; 6 | pub mod index; 7 | pub mod list; 8 | pub mod root; 9 | pub mod search; 10 | pub mod summary; 11 | 12 | pub type ResponseFuture = Result, hyper::Error>; 13 | -------------------------------------------------------------------------------- /toshi-types/src/extra_errors.rs: -------------------------------------------------------------------------------- 1 | use crate::error::{Error, ErrorResponse}; 2 | 3 | impl From for http::Response { 4 | fn from(err: Error) -> Self { 5 | let body = ErrorResponse::new(err); 6 | let bytes = serde_json::to_vec(&body).unwrap(); 7 | http::Response::new(hyper::Body::from(bytes)) 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | members = [ 3 | "toshi-server", 4 | # "toshi-proto", 5 | "toshi-client", 6 | "toshi-types", 7 | # "toshi-raft" 8 | ] 9 | 10 | [profile.dev.package."*"] 11 | opt-level = 2 12 | 13 | [profile.release] 14 | opt-level = 3 15 | debug = false 16 | debug-assertions = false 17 | lto = true 18 | rpath = false 19 | codegen-units = 1 20 | -------------------------------------------------------------------------------- /config/config-bench.toml: -------------------------------------------------------------------------------- 1 | host = "localhost" 2 | port = 8080 3 | path = "data/" 4 | writer_memory = 8000000000 5 | log_level = "info" 6 | json_parsing_threads = 12 7 | bulk_buffer_size = 0 8 | auto_commit_duration = 10 9 | enable_clustering = false 10 | 11 | [merge_policy] 12 | kind = "log" 13 | min_merge_size = 8 14 | min_layer_size = 10_000 15 | level_log_size = 0.75 16 | -------------------------------------------------------------------------------- /config/config.toml: -------------------------------------------------------------------------------- 1 | host = "127.0.0.1" 2 | port = 8080 3 | path = "data" 4 | writer_memory = 200000000 5 | log_level = "info" 6 | json_parsing_threads = 1 7 | bulk_buffer_size = 10000 8 | auto_commit_duration = 10 9 | experimental = true 10 | 11 | [experimental_features] 12 | leader = true 13 | rpc_port = 8081 14 | id = 1 15 | nodes = [] 16 | 17 | [merge_policy] 18 | kind = "log" 19 | min_merge_size = 8 20 | min_layer_size = 10_000 21 | level_log_size = 0.75 22 | -------------------------------------------------------------------------------- /config/config-rpc.toml: -------------------------------------------------------------------------------- 1 | host = "127.0.0.1" 2 | port = 8082 3 | path = "data/" 4 | writer_memory = 200000000 5 | log_level = "info" 6 | json_parsing_threads = 4 7 | bulk_buffer_size = 10000 8 | auto_commit_duration = 10 9 | experimental = true 10 | 11 | [experimental_features] 12 | leader = false 13 | rpc_port = 8083 14 | id = 2 15 | nodes = ["127.0.0.1:8081"] 16 | 17 | [merge_policy] 18 | kind = "log" 19 | min_merge_size = 8 20 | min_layer_size = 10_000 21 | level_log_size = 0.75 22 | -------------------------------------------------------------------------------- /toshi-proto/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "toshi-proto" 3 | version = "0.1.0" 4 | authors = ["Stephen Carman "] 5 | description = "A full text search engine based on Tantivy" 6 | repository = "https://github.com/toshi-search/Toshi" 7 | license = "MIT" 8 | edition = "2021" 9 | build = "build.rs" 10 | 11 | [build-dependencies] 12 | tonic-build = "^0.6" 13 | 14 | [dependencies] 15 | tonic = "^0.6" 16 | bytes = "^1" 17 | prost = "^0.9" 18 | prost-types = "^0.9" 19 | prost-derive = "^0.9" 20 | -------------------------------------------------------------------------------- /toshi-client/examples/sync_search.rs: -------------------------------------------------------------------------------- 1 | #![allow(dead_code)] 2 | use serde::Deserialize; 3 | 4 | use toshi::*; 5 | 6 | #[derive(Clone, Deserialize)] 7 | pub struct Wiki { 8 | title: String, 9 | url: String, 10 | text: String, 11 | rating: i32, 12 | } 13 | 14 | pub fn main() -> Result<()> { 15 | let c = ToshiClient::new("http://localhost:8080"); 16 | let query = Query::Exact(ExactTerm::with_term("body", "born")); 17 | let search = Search::from_query(query); 18 | let _docs: SearchResults = c.sync_search("wiki", search)?; 19 | 20 | Ok(()) 21 | } 22 | -------------------------------------------------------------------------------- /toshi-client/examples/exact_query.rs: -------------------------------------------------------------------------------- 1 | #![allow(dead_code)] 2 | 3 | use serde::Deserialize; 4 | use toshi::*; 5 | 6 | #[derive(Clone, Deserialize)] 7 | pub struct Wiki { 8 | title: String, 9 | url: String, 10 | text: String, 11 | rating: i32, 12 | } 13 | 14 | #[tokio::main] 15 | pub async fn main() -> Result<()> { 16 | let c = ToshiClient::new("http://localhost:8080"); 17 | let query = Query::Exact(ExactTerm::with_term("body", "born")); 18 | let search = Search::from_query(query); 19 | let _docs: SearchResults = c.search("wiki", search).await?; 20 | 21 | Ok(()) 22 | } 23 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Does another search engine have this functionality? Can you describe it's function?** 14 | 15 | **Do you have a specific use case you are trying to solve?** 16 | 17 | **Additional context** 18 | Add any other context or screenshots about the feature request here. 19 | -------------------------------------------------------------------------------- /toshi-client/examples/range_query.rs: -------------------------------------------------------------------------------- 1 | #![allow(dead_code)] 2 | 3 | use serde::Deserialize; 4 | use toshi::*; 5 | 6 | #[derive(Clone, Deserialize)] 7 | pub struct Wiki { 8 | title: String, 9 | url: String, 10 | text: String, 11 | rating: i32, 12 | } 13 | 14 | #[tokio::main] 15 | pub async fn main() -> Result<()> { 16 | let client = ToshiClient::new("http://localhost:8080"); 17 | let query = RangeQuery::builder().gte(3).lte(5).for_field("rating").build(); 18 | 19 | let search = Search::from_query(query); 20 | let _results: SearchResults = client.search("wiki", search).await?; 21 | 22 | Ok(()) 23 | } 24 | -------------------------------------------------------------------------------- /toshi-client/src/error.rs: -------------------------------------------------------------------------------- 1 | use http::uri::InvalidUri; 2 | use thiserror::Error; 3 | 4 | #[derive(Error, Debug)] 5 | pub enum ToshiClientError { 6 | #[error("Serde deserialization error: {0}")] 7 | JsonError(#[from] serde_json::Error), 8 | 9 | #[cfg(feature = "isahc")] 10 | #[error("Isahc error: {0}")] 11 | IsahcError(#[from] isahc::Error), 12 | 13 | #[cfg(feature = "hyper")] 14 | #[error("Hyper error: {0}")] 15 | HyperError(#[from] hyper::Error), 16 | 17 | #[error("Http Error: {0}")] 18 | HttpError(#[from] http::Error), 19 | 20 | #[error("IO Error: {0}")] 21 | UriError(#[from] InvalidUri), 22 | } 23 | -------------------------------------------------------------------------------- /ci/azure-run-kcov.yml: -------------------------------------------------------------------------------- 1 | steps: 2 | - script: | 3 | cargo coverage --exclude-pattern '/toshi/src/bin,/src/bin,/bin,/target,/toshi/src/query/aggregate,/src/query/aggregate,/src/shutdown.rs,/src/support.rs' 4 | displayName: Run Cargo Coverage 5 | condition: eq( variables['Agent.OS'], 'Linux' ) 6 | - script: | 7 | bash <(curl -s https://codecov.io/bash) -s target/kcov -t $(CODECOV_TOKEN) 8 | displayName: Upload CodeCov 9 | # - script: | 10 | # cargo coveralls --exclude-pattern '/toshi/src/bin,/src/bin,/bin,/target,/toshi/src/query/aggregate,/src/query/aggregate,/src/shutdown.rs,/src/support.rs' 11 | # displayName: Run KCov 12 | condition: eq( variables['Agent.OS'], 'Linux' ) -------------------------------------------------------------------------------- /toshi-client/examples/hyper_example.rs: -------------------------------------------------------------------------------- 1 | #![allow(dead_code)] 2 | 3 | use serde::Deserialize; 4 | 5 | use toshi::{AsyncClient, HyperToshi}; 6 | use toshi_types::{ExactTerm, Query, Search, SearchResults}; 7 | 8 | #[derive(Clone, Deserialize)] 9 | pub struct Wiki { 10 | title: String, 11 | url: String, 12 | text: String, 13 | rating: i32, 14 | } 15 | 16 | #[tokio::main] 17 | pub async fn main() -> toshi::Result<()> { 18 | let c = HyperToshi::new("http://localhost:8080"); 19 | let query = Query::Exact(ExactTerm::with_term("body", "born")); 20 | let search = Search::from_query(query); 21 | let _docs: SearchResults = c.search("wiki", search).await?; 22 | 23 | Ok(()) 24 | } 25 | -------------------------------------------------------------------------------- /ci/azure-install-deps.yml: -------------------------------------------------------------------------------- 1 | steps: 2 | - template: azure-install-rust.yml 3 | parameters: 4 | rust_version: stable 5 | - script: | 6 | rustup component add rustfmt 7 | displayName: Install rustfmt 8 | - script: | 9 | sudo apt update 10 | sudo apt install zlib1g-dev libcurl4-openssl-dev libiberty-dev libdw-dev 11 | displayName: Install Packages 12 | condition: eq( variables['Agent.OS'], 'Linux' ) 13 | 14 | - script: | 15 | cargo install cargo-update || echo "cargo-update already installed" 16 | cargo install cargo-travis || echo "cargo-travis already installed" 17 | cargo install-update -a 18 | displayName: Install cargo-update and cargo-travis -------------------------------------------------------------------------------- /toshi-client/examples/bool_query.rs: -------------------------------------------------------------------------------- 1 | #![allow(dead_code)] 2 | 3 | use serde::Deserialize; 4 | 5 | use toshi::*; 6 | 7 | #[derive(Clone, Deserialize)] 8 | pub struct Wiki { 9 | title: String, 10 | url: String, 11 | text: String, 12 | rating: i32, 13 | } 14 | 15 | #[tokio::main] 16 | pub async fn main() -> Result<()> { 17 | let client = HyperToshi::new("http://localhost:8080"); 18 | let fuzzy_query = FuzzyQuery::builder().for_field("text").with_value("bears").with_distance(2).build(); 19 | let query = BoolQuery::builder().must_match(fuzzy_query).build(); 20 | 21 | let search = Search::from_query(query); 22 | let _results: SearchResults = client.search("wiki", search).await?; 23 | 24 | Ok(()) 25 | } 26 | -------------------------------------------------------------------------------- /toshi-server/src/handlers/list.rs: -------------------------------------------------------------------------------- 1 | use std::sync::Arc; 2 | 3 | use toshi_types::Catalog; 4 | 5 | use crate::handlers::ResponseFuture; 6 | use crate::utils::with_body; 7 | 8 | pub async fn list_indexes(catalog: Arc) -> ResponseFuture { 9 | Ok(with_body(catalog.list_indexes().await)) 10 | } 11 | 12 | #[cfg(test)] 13 | mod tests { 14 | use super::*; 15 | use crate::commit::tests::read_body; 16 | use crate::index::create_test_catalog; 17 | 18 | #[tokio::test] 19 | async fn test_list() -> Result<(), Box> { 20 | let catalog = create_test_catalog("test_index"); 21 | let req = list_indexes(catalog).await?; 22 | let body = read_body(req).await?; 23 | assert_eq!(body, "[\"test_index\"]"); 24 | Ok(()) 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /toshi-server/src/handlers/root.rs: -------------------------------------------------------------------------------- 1 | use hyper::{Body, Response}; 2 | 3 | use crate::handlers::ResponseFuture; 4 | 5 | pub fn toshi_info() -> String { 6 | format!("{{\"name\":\"Toshi Search\",\"version\":\"{}\"}}", "0.0.1") 7 | } 8 | 9 | pub async fn root() -> ResponseFuture { 10 | Ok(Response::builder() 11 | .header(hyper::header::CONTENT_TYPE, "application/json") 12 | .body(Body::from(toshi_info())) 13 | .unwrap()) 14 | } 15 | 16 | #[cfg(test)] 17 | mod tests { 18 | use super::*; 19 | use crate::commit::tests::read_body; 20 | 21 | #[tokio::test] 22 | async fn test_root() -> Result<(), Box> { 23 | let req: Response = root().await?; 24 | let body = read_body(req).await?; 25 | assert_eq!(body, toshi_info()); 26 | Ok(()) 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /toshi-types/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "toshi-types" 3 | version = "0.1.1" 4 | authors = ["Stephen Carman "] 5 | description = "The types for a full text search engine based on Tantivy" 6 | repository = "https://github.com/toshi-search/Toshi" 7 | license = "MIT" 8 | edition = "2021" 9 | resolver = "2" 10 | 11 | [dependencies] 12 | http = "^0.2" 13 | thiserror = "^1.0" 14 | anyhow = "^1.0" 15 | serde = "^1.0" 16 | serde_json = "^1.0" 17 | tantivy = "^0.19" 18 | async-trait = "^0.1" 19 | dashmap = { version = "^5", features = ["serde"] } 20 | slog = "^2.7" 21 | tokio = { version = "^1.13", features = ["sync"] } 22 | toml = "^0.5" 23 | 24 | [dependencies.hyper] 25 | version = "^0.14" 26 | optional = true 27 | 28 | [features] 29 | default = ["extra-errors"] 30 | extra-errors = ["hyper"] 31 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Screenshots** 24 | If applicable, add screenshots to help explain your problem. 25 | 26 | **Desktop (please complete the following information):** 27 | - OS: [e.g. iOS] 28 | - Rust Version: 29 | - Version [e.g. 22] 30 | 31 | **Additional context** 32 | Add any other context about the problem here. 33 | -------------------------------------------------------------------------------- /toshi-raft/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "toshi-raft" 3 | version = "0.1.0" 4 | authors = ["Stephen Carman "] 5 | description = "A full text search engine based on Tantivy" 6 | repository = "https://github.com/toshi-search/Toshi" 7 | license = "MIT" 8 | edition = "2018" 9 | 10 | [dependencies] 11 | toshi-proto = { path = "../toshi-proto" } 12 | toshi-types = { path = "../toshi-types" } 13 | anyhow = "^1.0" 14 | bytes = "^1.0.1" 15 | futures = "^0.3" 16 | async-trait = "^0.1" 17 | http = "^0.2" 18 | dashmap = "^4" 19 | tonic = "^0.4" 20 | prost = "^0.8" 21 | tantivy = "^0.15" 22 | slog = "^2.7" 23 | serde_json = "^1.0" 24 | tokio = { version = "^1", features = ["full", "parking_lot"] } 25 | serde = { version = "^1.0", features = ["derive"] } 26 | bincode = "^1.3" 27 | raft = { git = "https://github.com/tikv/raft-rs", default-features = false, features = ["prost-codec"] } 28 | message-io = "^0.14" 29 | 30 | [dev-dependencies] 31 | remove_dir_all = "^0.7" 32 | toshi-server = { path = "../toshi-server" } 33 | -------------------------------------------------------------------------------- /ci/azure-install-rust.yml: -------------------------------------------------------------------------------- 1 | steps: 2 | # Linux and macOS. 3 | - script: | 4 | set -e 5 | curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain $RUSTUP_TOOLCHAIN 6 | echo "##vso[task.setvariable variable=PATH;]$PATH:$HOME/.cargo/bin" 7 | env: 8 | RUSTUP_TOOLCHAIN: ${{parameters.rust_version}} 9 | displayName: "Install rust (*nix)" 10 | condition: not(eq(variables['Agent.OS'], 'Windows_NT')) 11 | # Windows. 12 | - script: | 13 | curl -sSf -o rustup-init.exe https://win.rustup.rs 14 | rustup-init.exe -y --default-toolchain %RUSTUP_TOOLCHAIN% 15 | set PATH=%PATH%;%USERPROFILE%\.cargo\bin 16 | echo "##vso[task.setvariable variable=PATH;]%PATH%;%USERPROFILE%\.cargo\bin" 17 | env: 18 | RUSTUP_TOOLCHAIN: ${{parameters.rust_version}} 19 | displayName: "Install rust (Windows)" 20 | condition: eq(variables['Agent.OS'], 'Windows_NT') 21 | # All platforms. 22 | - script: | 23 | rustc -Vv 24 | cargo -V 25 | displayName: Query rust and cargo versions -------------------------------------------------------------------------------- /toshi-server/tests/lib.rs: -------------------------------------------------------------------------------- 1 | use std::net::SocketAddr; 2 | use std::sync::atomic::AtomicBool; 3 | use std::sync::Arc; 4 | 5 | use hyper::body::to_bytes; 6 | 7 | use toshi::{AsyncClient, HyperToshi}; 8 | 9 | use toshi_server::index::IndexCatalog; 10 | use toshi_server::router::Router; 11 | use toshi_server::settings::Settings; 12 | 13 | type BoxErr = Box; 14 | 15 | #[tokio::test] 16 | async fn test_client() -> Result<(), BoxErr> { 17 | let addr = "127.0.0.1:8080".parse::()?; 18 | let settings = Settings { 19 | path: "..\\data".into(), 20 | ..Default::default() 21 | }; 22 | 23 | let catalog = IndexCatalog::new(settings)?; 24 | let router = Router::new(Arc::new(catalog), Arc::new(AtomicBool::new(false))); 25 | 26 | tokio::spawn(router.router_with_catalog(addr)); 27 | 28 | let client = HyperToshi::new("http://localhost:8080"); 29 | let index = client.index().await?; 30 | let body = to_bytes(index.into_body()).await?; 31 | dbg!(body); 32 | Ok(()) 33 | } 34 | -------------------------------------------------------------------------------- /toshi-raft/src/proposal.rs: -------------------------------------------------------------------------------- 1 | use raft::prelude::*; 2 | use tokio::sync::oneshot::{channel, Receiver, Sender}; 3 | 4 | #[derive(Debug)] 5 | pub struct Proposal { 6 | pub normal: Option>, 7 | pub conf_change: Option, 8 | pub transfer_leader: Option, 9 | pub proposed: u64, 10 | pub propose_success: Sender, 11 | } 12 | 13 | impl Proposal { 14 | pub fn new(entry: Vec) -> (Self, Receiver) { 15 | let (snd, rcv) = channel(); 16 | let prop = Self { 17 | normal: Some(entry), 18 | conf_change: None, 19 | transfer_leader: None, 20 | proposed: 0, 21 | propose_success: snd, 22 | }; 23 | (prop, rcv) 24 | } 25 | 26 | pub fn conf_change(conf: &ConfChange) -> (Self, Receiver) { 27 | let (snd, rcv) = channel(); 28 | let prop = Self { 29 | normal: None, 30 | conf_change: Some(conf.clone()), 31 | transfer_leader: None, 32 | proposed: 0, 33 | propose_success: snd, 34 | }; 35 | (prop, rcv) 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Stephen Carman 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /toshi-types/src/query/facet.rs: -------------------------------------------------------------------------------- 1 | use serde::{Deserialize, Serialize}; 2 | 3 | use crate::query::KeyValue; 4 | 5 | /// A faceted query, see Tantivy's docs for more information [`tantivy::collector::FacetCollector`] 6 | /// It's also of note that this is the only query that does not implement [`crate::CreateQuery`] this 7 | /// is because facets are collected via a different interface in Tantivy, not via the query API 8 | #[derive(Serialize, Deserialize, Debug, Clone)] 9 | pub struct FacetQuery(KeyValue>); 10 | 11 | impl FacetQuery { 12 | /// Constructor to create a new facet query from a known key value 13 | pub fn new(facets: KeyValue>) -> Self { 14 | Self(facets) 15 | } 16 | 17 | /// Constructor to create the key value for the user 18 | pub fn with_terms(field: String, terms: Vec) -> Self { 19 | Self(KeyValue::new(field, terms)) 20 | } 21 | 22 | /// Return a query's values 23 | pub fn get_facets_values(&self) -> &[String] { 24 | &self.0.value 25 | } 26 | 27 | /// Return the query's fields 28 | pub fn get_facets_fields(&self) -> &str { 29 | &self.0.field 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /ci/coverage.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eu 3 | CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" 4 | cd $CUR_DIR/../ 5 | COVERAGE_DIR="$CUR_DIR/../coverage" 6 | OUTPUT=${1:-Html} 7 | 8 | ## Borrowed from https://github.com/appaquet/exocore/blob/master/utils/coverage.sh 9 | ## See https://github.com/mozilla/grcov#grcov-with-travis 10 | ## 11 | 12 | if [[ -d $CUR_DIR/../target ]]; then 13 | find $CUR_DIR/../target -name "*.gc*" -delete 14 | fi 15 | 16 | export CARGO_INCREMENTAL=0 17 | export RUSTFLAGS="-Zprofile -Ccodegen-units=1 -Cinline-threshold=0 -Clink-dead-code -Coverflow-checks=off" 18 | cargo +nightly clean 19 | cargo +nightly build --all --all-features 20 | cargo +nightly test --all --all-features 21 | 22 | mkdir -p $COVERAGE_DIR 23 | zip -0 $COVERAGE_DIR/ccov.zip `find . \( -name "*toshi*.gc*" \) -print`; 24 | 25 | grcov $COVERAGE_DIR/ccov.zip -s . -t lcov --llvm -o $COVERAGE_DIR/lcov.info --ignore-not-existing --ignore "/*" 26 | 27 | 28 | if [[ "$OUTPUT" == "Html" ]]; then 29 | genhtml -o $COVERAGE_DIR/ --show-details --highlight --ignore-errors source --legend "$COVERAGE_DIR"/lcov.info 30 | else 31 | bash <(curl -s https://codecov.io/bash) -f $COVERAGE_DIR/lcov.info; 32 | fi 33 | -------------------------------------------------------------------------------- /toshi-types/src/query/term.rs: -------------------------------------------------------------------------------- 1 | use std::fmt; 2 | 3 | use serde::{Deserialize, Serialize}; 4 | use tantivy::query::{Query, TermQuery}; 5 | use tantivy::schema::{IndexRecordOption, Schema}; 6 | 7 | use crate::query::*; 8 | use crate::Result; 9 | 10 | /// An exact term to search for 11 | #[derive(Serialize, Deserialize, Debug, Clone)] 12 | pub struct ExactTerm { 13 | term: KeyValue, 14 | } 15 | 16 | impl ExactTerm { 17 | /// Constructor with a known KeyValue 18 | pub fn new(term: KeyValue) -> Self { 19 | Self { term } 20 | } 21 | 22 | /// Constructor to create the key value for the user 23 | pub fn with_term(field: K, value: V) -> Self 24 | where 25 | K: fmt::Display, 26 | V: fmt::Display, 27 | { 28 | Self { 29 | term: KeyValue::new(field.to_string(), value.to_string()), 30 | } 31 | } 32 | } 33 | 34 | impl CreateQuery for ExactTerm { 35 | fn create_query(self, schema: &Schema) -> Result> { 36 | let KeyValue { field, value, .. } = self.term; 37 | let term = make_field_value(schema, &field, &value)?; 38 | Ok(Box::new(TermQuery::new(term, IndexRecordOption::Basic))) 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /schema.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "lyrics", 4 | "type": "text", 5 | "options": { 6 | "indexing": { 7 | "record": "position", 8 | "tokenizer": "default" 9 | }, 10 | "stored": true 11 | } 12 | }, 13 | { 14 | "name": "year", 15 | "type": "i64", 16 | "options": { 17 | "indexed": true, 18 | "stored": true 19 | } 20 | }, 21 | { 22 | "name": "idx", 23 | "type": "u64", 24 | "options": { 25 | "indexed": true, 26 | "stored": true 27 | } 28 | }, 29 | { 30 | "name": "artist", 31 | "type": "text", 32 | "options": { 33 | "indexing": { 34 | "record": "position", 35 | "tokenizer": "default" 36 | }, 37 | "stored": true 38 | } 39 | }, 40 | { 41 | "name": "genre", 42 | "type": "text", 43 | "options": { 44 | "indexing": { 45 | "record": "position", 46 | "tokenizer": "default" 47 | }, 48 | "stored": true 49 | } 50 | }, 51 | { 52 | "name": "song", 53 | "type": "text", 54 | "options": { 55 | "indexing": { 56 | "record": "position", 57 | "tokenizer": "default" 58 | }, 59 | "stored": true 60 | } 61 | } 62 | ] -------------------------------------------------------------------------------- /toshi-server/src/shutdown.rs: -------------------------------------------------------------------------------- 1 | use futures::{Future, FutureExt}; 2 | use log::*; 3 | use tokio::sync::oneshot; 4 | 5 | #[cfg(unix)] 6 | pub fn shutdown(s: oneshot::Sender<()>) -> impl Future> + Unpin + Send { 7 | use futures::future; 8 | use tokio::signal::unix::{signal, SignalKind}; 9 | 10 | let sigint = async { 11 | signal(SignalKind::interrupt()).unwrap().recv().await; 12 | String::from("sigint") 13 | }; 14 | let sigterm = async { 15 | signal(SignalKind::terminate()).unwrap().recv().await; 16 | String::from("sigterm") 17 | }; 18 | let sig = future::select(Box::pin(sigint), Box::pin(sigterm)).map(|_| String::from("Signal")); 19 | Box::pin(handle_shutdown(s, Box::pin(sig))) 20 | } 21 | 22 | #[cfg(not(unix))] 23 | pub fn shutdown(signal: oneshot::Sender<()>) -> impl Future> + Unpin + Send { 24 | let stream = tokio::signal::ctrl_c().map(|_| String::from("ctrl-c")); 25 | Box::pin(handle_shutdown(signal, Box::pin(stream))) 26 | } 27 | 28 | #[cfg_attr(tarpaulin, skip)] 29 | pub async fn handle_shutdown(signal: oneshot::Sender<()>, stream: S) -> Result<(), ()> 30 | where 31 | S: Future + Unpin, 32 | { 33 | let s = stream.await; 34 | info!("Received signal: {}", s); 35 | info!("Gracefully shutting down..."); 36 | signal.send(()) 37 | } 38 | -------------------------------------------------------------------------------- /toshi-client/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "toshi" 3 | version = "0.1.1" 4 | authors = ["Stephen Carman "] 5 | description = "A client for a full text search engine based on Tantivy" 6 | repository = "https://github.com/toshi-search/Toshi" 7 | license = "MIT" 8 | edition = "2021" 9 | autoexamples = false 10 | 11 | [[example]] 12 | name = "sync_search" 13 | required-features = ["isahc_client"] 14 | 15 | [[example]] 16 | name = "bool_query" 17 | 18 | [[example]] 19 | name = "exact_query" 20 | 21 | [[example]] 22 | name = "range_query" 23 | 24 | [[example]] 25 | name = "hyper_example" 26 | required-features = ["hyper_client"] 27 | 28 | [features] 29 | default = ["isahc_client"] 30 | isahc_client = ["isahc"] 31 | hyper_client = ["hyper", "bytes"] 32 | tls = ["hyper_client", "hyper-tls"] 33 | rust_tls = ["hyper_client", "hyper-rustls"] 34 | 35 | [dependencies] 36 | toshi-types = { path = "../toshi-types" } 37 | async-trait = "^0.1" 38 | http = "^0.2" 39 | thiserror = "^1.0" 40 | serde = "^1.0" 41 | serde_json = "^1.0" 42 | tantivy = "^0.19" 43 | tokio = { version = "^1.13", features = ["full"] } 44 | bytes = { version = "^1", optional = true } 45 | 46 | [dependencies.isahc] 47 | version = "^1" 48 | features = ["json"] 49 | optional = true 50 | 51 | [dependencies.hyper] 52 | version = "^0.14" 53 | features = ["full"] 54 | optional = true 55 | 56 | [dependencies.hyper-tls] 57 | version = "^0.5" 58 | optional = true 59 | 60 | [dependencies.hyper-rustls] 61 | version = "^0.23" 62 | optional = true 63 | 64 | [dev-dependencies] 65 | tokio = "^1" 66 | -------------------------------------------------------------------------------- /toshi-server/src/handlers/summary.rs: -------------------------------------------------------------------------------- 1 | use hyper::{Response, StatusCode}; 2 | use log::{debug, info}; 3 | use serde::Serialize; 4 | use std::time::Instant; 5 | 6 | use toshi_types::*; 7 | 8 | use crate::handlers::ResponseFuture; 9 | use crate::utils::{empty_with_code, with_body}; 10 | use std::sync::Arc; 11 | 12 | #[derive(Serialize)] 13 | struct FlushResponse { 14 | opstamp: u64, 15 | } 16 | 17 | pub async fn index_summary(catalog: Arc, index: &str, options: QueryOptions) -> ResponseFuture { 18 | let start = Instant::now(); 19 | if let Ok(index) = catalog.get_index(index) { 20 | let metas = index.get_index().load_metas().unwrap(); 21 | let summary = if options.include_sizes() { 22 | SummaryResponse::new(metas, Some(index.get_space())) 23 | } else { 24 | SummaryResponse::new(metas, None) 25 | }; 26 | info!("Took: {:?}", start.elapsed()); 27 | Ok(with_body(summary)) 28 | } else { 29 | let resp = Response::from(Error::UnknownIndex(index.into())); 30 | info!("Took: {:?}", start.elapsed()); 31 | Ok(resp) 32 | } 33 | } 34 | 35 | pub async fn flush(catalog: Arc, index: &str) -> ResponseFuture { 36 | if let Ok(local_index) = catalog.get_index(index) { 37 | let writer = local_index.get_writer(); 38 | let mut write = writer.lock().await; 39 | let opstamp = write.commit().unwrap(); 40 | info!("Successful commit: {}", index); 41 | Ok(with_body(FlushResponse { opstamp })) 42 | } else { 43 | debug!("Could not find index: {}", index); 44 | Ok(empty_with_code(StatusCode::NOT_FOUND)) 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /toshi-server/src/utils.rs: -------------------------------------------------------------------------------- 1 | use http::{Response, StatusCode}; 2 | use hyper::Body; 3 | use serde::Serialize; 4 | use toshi_types::{Error, ErrorResponse}; 5 | 6 | pub fn with_body(body: T) -> Response 7 | where 8 | T: Serialize, 9 | { 10 | let json = serde_json::to_vec::(&body).unwrap(); 11 | 12 | Response::builder() 13 | .header(hyper::header::CONTENT_TYPE, "application/json") 14 | .body(Body::from(json)) 15 | .unwrap() 16 | } 17 | 18 | pub fn error_response(code: StatusCode, e: Error) -> Response { 19 | let mut resp = with_body(ErrorResponse { message: e.to_string() }); 20 | *resp.status_mut() = code; 21 | resp 22 | } 23 | 24 | pub fn empty_with_code(code: StatusCode) -> Response { 25 | Response::builder().status(code).body(Body::empty()).unwrap() 26 | } 27 | 28 | pub async fn not_found() -> Result, hyper::Error> { 29 | Ok(empty_with_code(StatusCode::NOT_FOUND)) 30 | } 31 | 32 | pub fn parse_path(path: &str) -> Vec<&str> { 33 | path.trim_matches('/').split('/').filter(|s| !s.is_empty()).collect() 34 | } 35 | 36 | #[cfg(test)] 37 | mod tests { 38 | use super::*; 39 | use pretty_assertions::assert_eq; 40 | 41 | #[test] 42 | fn test_parse_path() { 43 | let root = "/"; 44 | let one = "/path"; 45 | let two = "/path/two"; 46 | 47 | let parsed_root = parse_path(root); 48 | let parsed_one = parse_path(one); 49 | let parsed_two = parse_path(two); 50 | assert_eq!(parsed_root.len(), 0); 51 | assert_eq!(parsed_one.len(), 1); 52 | assert_eq!(parsed_one[0], "path"); 53 | assert_eq!(parsed_two.len(), 2); 54 | assert_eq!(parsed_two[0], "path"); 55 | assert_eq!(parsed_two[1], "two"); 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: cargo 4 | directory: "/" 5 | schedule: 6 | interval: daily 7 | open-pull-requests-limit: 10 8 | ignore: 9 | - dependency-name: message-io 10 | versions: 11 | - 0.10.0 12 | - 0.10.1 13 | - 0.10.2 14 | - 0.11.0 15 | - 0.11.1 16 | - 0.12.0 17 | - 0.12.1 18 | - 0.12.2 19 | - 0.13.0 20 | - 0.13.1 21 | - 0.13.2 22 | - 0.9.2 23 | - 0.9.3 24 | - 0.9.4 25 | - dependency-name: async-trait 26 | versions: 27 | - 0.1.42 28 | - 0.1.43 29 | - 0.1.48 30 | - 0.1.49 31 | - dependency-name: tonic 32 | versions: 33 | - 0.4.1 34 | - 0.4.2 35 | - dependency-name: tokio 36 | versions: 37 | - 1.3.0 38 | - 1.4.0 39 | - 1.33.0 40 | - dependency-name: tonic-build 41 | versions: 42 | - 0.4.1 43 | - dependency-name: pretty_assertions 44 | versions: 45 | - 0.7.0 46 | - 0.7.1 47 | - dependency-name: serde 48 | versions: 49 | - 1.0.124 50 | - dependency-name: serde_json 51 | versions: 52 | - 1.0.61 53 | - 1.0.62 54 | - 1.0.63 55 | - dependency-name: remove_dir_all 56 | versions: 57 | - 0.6.1 58 | - dependency-name: futures 59 | versions: 60 | - 0.3.12 61 | - 0.3.13 62 | - 0.3.28 63 | - dependency-name: anyhow 64 | versions: 65 | - 1.0.38 66 | - dependency-name: thiserror 67 | versions: 68 | - 1.0.23 69 | - 1.0.24 70 | - dependency-name: toml 71 | versions: 72 | - 0.5.8 73 | - dependency-name: structopt 74 | versions: 75 | - 0.3.21 76 | - dependency-name: isahc 77 | versions: 78 | - 0.9.12 79 | - dependency-name: async-channel 80 | versions: 81 | - 1.5.1 82 | - 1.6.0 83 | - 1.6.1 84 | - 1.9.0 85 | -------------------------------------------------------------------------------- /toshi-raft/src/rpc_utils.rs: -------------------------------------------------------------------------------- 1 | use std::fs; 2 | use std::path::PathBuf; 3 | 4 | use http::Uri; 5 | use slog::Logger; 6 | use tantivy::directory::MmapDirectory; 7 | use tantivy::schema::Schema; 8 | use tantivy::Index; 9 | use tonic::{transport, Code, Response, Status}; 10 | 11 | use toshi_proto::cluster_rpc::*; 12 | use toshi_types::{Error, Search}; 13 | 14 | pub fn create_from_managed(mut base_path: PathBuf, index_path: &str, schema: Schema) -> Result { 15 | base_path.push(index_path); 16 | if !base_path.exists() { 17 | fs::create_dir(&base_path)?; 18 | } 19 | let dir: MmapDirectory = MmapDirectory::open(base_path)?; 20 | Index::open_or_create(dir, schema).map_err(Into::into) 21 | } 22 | 23 | pub async fn create_client(uri: &Uri, logger: Option) -> Result, transport::Error> { 24 | if let Some(log) = logger { 25 | slog::info!(log, "Creating Client to: {:?}", uri); 26 | } 27 | client::IndexServiceClient::connect(uri.clone()).await.map_err(Into::into) 28 | } 29 | 30 | pub fn ok_result() -> ResultReply { 31 | create_result(0, "".into()) 32 | } 33 | 34 | pub fn create_result(code: i32, message: String) -> ResultReply { 35 | ResultReply { code, message } 36 | } 37 | 38 | pub fn create_search_reply(result: Option, doc: Vec) -> SearchReply { 39 | SearchReply { result, doc } 40 | } 41 | 42 | pub fn error_response(code: Code, msg: String) -> Result, Status> { 43 | let status = Status::new(code, msg); 44 | Err(status) 45 | } 46 | 47 | pub fn query_or_all(b: &[u8]) -> Result { 48 | let deser: Search = serde_json::from_slice(b)?; 49 | if deser.query.is_none() { 50 | return Ok(Search::all_docs()); 51 | } 52 | Ok(deser) 53 | } 54 | -------------------------------------------------------------------------------- /toshi-server/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "toshi-server" 3 | version = "0.1.1" 4 | authors = ["Stephen Carman "] 5 | description = "A full text search engine based on Tantivy" 6 | repository = "https://github.com/toshi-search/Toshi" 7 | license = "MIT" 8 | edition = "2021" 9 | resolver = "2" 10 | 11 | [[bin]] 12 | name = "toshi" 13 | 14 | [lib] 15 | path = "src/lib.rs" 16 | 17 | [features] 18 | extra_tokenizers = ["cang-jie"] 19 | 20 | [dependencies] 21 | toshi-types = { path = "../toshi-types" } 22 | http = "^0.2" 23 | bytes = "^1" 24 | hyper = { version = "^0.14", features = ["full"] } 25 | serde_json = "^1.0" 26 | serde_urlencoded = "^0.7" 27 | futures = "^0.3" 28 | tower-util = "^0.3" 29 | tantivy = "^0.19" 30 | tokio = { version = "^1", features = ["full"] } 31 | async-trait = "^0.1" 32 | config = "^0.13" 33 | structopt = "^0.3" 34 | thiserror = "^1.0" 35 | anyhow = "^1.0" 36 | dashmap = "^5" 37 | serde = { version = "^1.0", features = ["derive"] } 38 | clap = { version = "^4", features = ["color"] } 39 | slog = "^2.7" 40 | slog-stdlog = "^4.0" 41 | slog-scope = "^4.3" 42 | log = { version = "*", features = ["max_level_trace", "release_max_level_warn"] } 43 | sloggers = "^2" 44 | toml = "^0.5" 45 | flume = { version = "^0.10", features = ["async"] } 46 | itertools = "^0.10" 47 | tokio-stream = "^0.1" 48 | tokio-util = { version = "^0.7", features = ["full"] } 49 | cang-jie = { version = "^0.14", optional = true } 50 | 51 | 52 | [dev-dependencies] 53 | remove_dir_all = "^0.7" 54 | pretty_assertions = "^1" 55 | tokio-test = "^0.4" 56 | toshi = { path = "../toshi-client", default-features = false, features = ["hyper_client"] } 57 | -------------------------------------------------------------------------------- /.github/workflows/rust.yml: -------------------------------------------------------------------------------- 1 | name: toshi-push 2 | 3 | on: [push] 4 | 5 | jobs: 6 | build: 7 | runs-on: ${{ matrix.os }} 8 | strategy: 9 | matrix: 10 | os: 11 | - macOS-latest 12 | - ubuntu-latest 13 | - windows-latest 14 | steps: 15 | - uses: actions/checkout@v2 16 | - name: Install Rust & Tools 17 | uses: actions-rs/toolchain@v1 18 | with: 19 | toolchain: stable 20 | override: true 21 | components: clippy, rustfmt 22 | - name: Check Source 23 | uses: actions-rs/cargo@v1 24 | with: 25 | command: check 26 | args: --all 27 | - name: Check Fmt 28 | uses: actions-rs/cargo@v1 29 | if: matrix.os != 'windows-latest' 30 | with: 31 | command: fmt 32 | args: --all -- --check 33 | - name: Run Clippy 34 | uses: actions-rs/cargo@v1 35 | with: 36 | command: clippy 37 | - name: Run Tests 38 | uses: actions-rs/cargo@v1 39 | with: 40 | command: test 41 | args: --package 'toshi' --package 'toshi-server' --package 'toshi-types' 42 | - uses: actions-rs/toolchain@v1 43 | if: matrix.os == 'ubuntu-latest' 44 | with: 45 | toolchain: nightly 46 | override: true 47 | - uses: actions-rs/cargo@v1 48 | if: matrix.os == 'ubuntu-latest' 49 | with: 50 | command: test 51 | args: --no-fail-fast --package 'toshi' --package 'toshi-server' --package 'toshi-types' 52 | env: 53 | CARGO_INCREMENTAL: '0' 54 | RUSTFLAGS: '-Zprofile -Ccodegen-units=1 -Cinline-threshold=0 -Clink-dead-code -Coverflow-checks=off -Cpanic=abort -Zpanic_abort_tests' 55 | RUSTDOCFLAGS: '-Zprofile -Ccodegen-units=1 -Cinline-threshold=0 -Clink-dead-code -Coverflow-checks=off -Cpanic=abort -Zpanic_abort_tests' 56 | - uses: actions-rs/grcov@v0.1 57 | if: matrix.os == 'ubuntu-latest' 58 | - uses: codecov/codecov-action@v1 59 | if: matrix.os == 'ubuntu-latest' 60 | -------------------------------------------------------------------------------- /toshi-proto/proto/cluster.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | package clusterrpc; 4 | 5 | import "eraftpb.proto"; 6 | 7 | service IndexService { 8 | rpc ping (PingRequest) returns (PingReply); 9 | rpc place_index (PlaceRequest) returns (ResultReply); 10 | rpc list_indexes (ListRequest) returns (ListReply); 11 | rpc place_document (DocumentRequest) returns (ResultReply); 12 | rpc delete_document (DeleteRequest) returns (DeleteReply); 13 | rpc search_index (SearchRequest) returns (SearchReply); 14 | rpc get_summary (SummaryRequest) returns (SummaryReply); 15 | rpc raft_request (RaftRequest) returns (RaftReply); 16 | rpc join (JoinRequest) returns (ResultReply); 17 | } 18 | 19 | enum ResultCode { 20 | SUCCESS = 0; 21 | FAILURE = 1; 22 | NO_RESULTS = 2; 23 | UNKNOWNNNNN = 3; 24 | MAGIC_UNICORN = 4; 25 | } 26 | 27 | message RaftRequest { 28 | eraftpb.Message message = 1; 29 | } 30 | 31 | message JoinRequest { 32 | uint64 id = 1; 33 | string host = 2; 34 | } 35 | 36 | message RaftReply { 37 | ResultCode code = 1; 38 | } 39 | 40 | message ResultReply { 41 | ResultCode code = 1; 42 | string message = 2; 43 | } 44 | 45 | message ListRequest { 46 | } 47 | 48 | message ListReply { 49 | repeated string indexes = 1; 50 | } 51 | 52 | message PingReply { 53 | string status = 1; 54 | } 55 | 56 | message PingRequest { 57 | } 58 | 59 | message SearchRequest { 60 | string index = 1; 61 | bytes query = 2; 62 | } 63 | 64 | message SearchReply { 65 | ResultReply result = 1; 66 | bytes doc = 2; 67 | } 68 | 69 | message PlaceRequest { 70 | string index = 1; 71 | bytes schema = 2; 72 | } 73 | 74 | message DocumentRequest { 75 | string index = 1; 76 | bytes document = 2; 77 | } 78 | 79 | message DeleteRequest { 80 | string index = 1; 81 | bytes terms = 2; 82 | } 83 | 84 | message ReplicaRequest { 85 | string index = 1; 86 | string from = 2; 87 | string to = 3; 88 | } 89 | 90 | message SummaryRequest { 91 | string index = 1; 92 | } 93 | 94 | message SummaryReply { 95 | bytes summary = 1; 96 | } 97 | 98 | message DeleteReply { 99 | string index = 1; 100 | uint64 docs_affected = 2; 101 | } 102 | -------------------------------------------------------------------------------- /toshi-raft/src/raft_io.rs: -------------------------------------------------------------------------------- 1 | #![allow(dead_code)] 2 | use std::net::SocketAddr; 3 | 4 | use dashmap::DashMap; 5 | use message_io::events::*; 6 | use message_io::network::{split as net_split, Endpoint, NetEvent, NetworkController, NetworkProcessor, Transport}; 7 | use serde::{Deserialize, Serialize}; 8 | 9 | #[derive(Serialize, Deserialize, Debug, Clone)] 10 | pub enum RaftEvents { 11 | JoinCluster(u32), 12 | } 13 | 14 | pub struct RaftIO { 15 | sender: EventSender, 16 | receiver: EventReceiver, 17 | controller: NetworkController, 18 | processor: NetworkProcessor, 19 | peers: DashMap, 20 | } 21 | 22 | impl Default for RaftIO { 23 | fn default() -> Self { 24 | let (sender, receiver) = split(); 25 | let (controller, processor) = net_split(); 26 | Self { 27 | sender, 28 | receiver, 29 | peers: DashMap::default(), 30 | controller, 31 | processor, 32 | } 33 | } 34 | } 35 | 36 | impl RaftIO { 37 | pub fn new(id: u32, ep: Endpoint) -> Self { 38 | let (sender, receiver) = split(); 39 | let (controller, processor) = net_split(); 40 | 41 | let peers = DashMap::::new(); 42 | peers.insert(id, ep); 43 | Self { 44 | sender, 45 | receiver, 46 | controller, 47 | processor, 48 | peers, 49 | } 50 | } 51 | 52 | fn join_cluster(&mut self, id: u32, addr: SocketAddr) { 53 | if let Ok((endpoint, _)) = self.controller.connect(Transport::Tcp, addr) { 54 | self.peers.insert(id, endpoint); 55 | } 56 | } 57 | 58 | pub fn run(self, endpoint: String) -> Result<(), Box> { 59 | let (_, _) = self.controller.listen(Transport::Tcp, &endpoint)?; 60 | 61 | Ok(()) 62 | } 63 | fn process_event(&mut self, msg: NetEvent) { 64 | match msg { 65 | NetEvent::Message(ep, payload) => { 66 | let msg = bincode::deserialize::(payload).unwrap(); 67 | match msg { 68 | RaftEvents::JoinCluster(id) => self.join_cluster(id, ep.addr()), 69 | } 70 | } 71 | NetEvent::Connected(_, _) => {} 72 | NetEvent::Disconnected(_) => {} 73 | NetEvent::Accepted(_, _) => {} 74 | } 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /toshi-server/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![forbid(unsafe_code)] 2 | #![deny(future_incompatible)] 3 | #![allow(clippy::cognitive_complexity)] 4 | 5 | use std::sync::Arc; 6 | 7 | use slog::Logger; 8 | 9 | use toshi_types::FlatNamedDocument; 10 | 11 | use crate::index::IndexCatalog; 12 | use crate::settings::Settings; 13 | 14 | pub mod commit; 15 | pub mod handle; 16 | pub mod handlers; 17 | pub mod index; 18 | // pub mod local_serve; 19 | pub mod router; 20 | pub mod settings; 21 | pub mod shutdown; 22 | pub mod utils; 23 | 24 | pub type Result = std::result::Result; 25 | pub type AddDocument = toshi_types::AddDocument; 26 | pub type SearchResults = toshi_types::SearchResults; 27 | pub type SharedCatalog = Arc; 28 | 29 | pub fn setup_catalog(settings: &Settings) -> SharedCatalog { 30 | let index_catalog = IndexCatalog::new(settings.clone()).unwrap(); 31 | Arc::new(index_catalog) 32 | } 33 | 34 | #[cfg(not(debug_assertions))] 35 | pub fn setup_logging_from_file(path: &str) -> Result { 36 | use sloggers::{Config, LoggerConfig}; 37 | let file = std::fs::read(path)?; 38 | toml::from_slice(&file) 39 | .map(|cfg: LoggerConfig| cfg.build_logger().expect("Bad Config Format")) 40 | .map_err(toshi_types::Error::TomlError) 41 | } 42 | 43 | #[cfg(debug_assertions)] 44 | pub fn setup_logging_from_file(_: &str) -> Result { 45 | use sloggers::types::*; 46 | use sloggers::Build; 47 | let log = sloggers::terminal::TerminalLoggerBuilder::new() 48 | .format(Format::Full) 49 | .level(Severity::Info) 50 | .timezone(TimeZone::Local) 51 | .build() 52 | .map_err(anyhow::Error::from)?; 53 | 54 | Ok(log) 55 | } 56 | 57 | #[cfg(feature = "extra_tokenizers")] 58 | pub fn register_tokenizers(idx: tantivy::Index) -> tantivy::Index { 59 | let schema = idx.schema(); 60 | let has_tokenizer = schema.fields().find(|(_, entry)| match entry.field_type() { 61 | tantivy::schema::FieldType::Str(ref opts) => opts 62 | .get_indexing_options() 63 | .map(|to| to.tokenizer() == cang_jie::CANG_JIE) 64 | .unwrap_or(false), 65 | _ => false, 66 | }); 67 | if has_tokenizer.is_some() { 68 | let tokenizer = cang_jie::CangJieTokenizer::default(); 69 | idx.tokenizers().register(cang_jie::CANG_JIE, tokenizer) 70 | } 71 | idx 72 | } 73 | 74 | #[cfg(not(feature = "extra_tokenizers"))] 75 | pub fn register_tokenizers(idx: tantivy::Index) -> tantivy::Index { 76 | idx 77 | } 78 | -------------------------------------------------------------------------------- /toshi-types/src/query/regex.rs: -------------------------------------------------------------------------------- 1 | use serde::{Deserialize, Serialize}; 2 | use tantivy::query::{Query, RegexQuery as TantivyRegexQuery}; 3 | use tantivy::schema::Schema; 4 | 5 | use crate::query::{CreateQuery, KeyValue}; 6 | use crate::{error::Error, Result}; 7 | 8 | /// A search query based around a regular expression 9 | #[derive(Serialize, Deserialize, Debug, Clone)] 10 | pub struct RegexQuery { 11 | regex: KeyValue, 12 | } 13 | 14 | impl RegexQuery { 15 | /// Constructor for a query from a known key value 16 | pub fn new(regex: KeyValue) -> Self { 17 | Self { regex } 18 | } 19 | /// Constructor to create a key value for the user 20 | pub fn from_str(field: String, regex: R) -> Self 21 | where 22 | R: ToString, 23 | { 24 | Self::new(KeyValue::new(field, regex.to_string())) 25 | } 26 | } 27 | 28 | impl CreateQuery for RegexQuery { 29 | fn create_query(self, schema: &Schema) -> Result> { 30 | let KeyValue { field, value, .. } = self.regex; 31 | let field = schema 32 | .get_field(&field) 33 | .ok_or_else(|| Error::QueryError(format!("Field: {} does not exist", field)))?; 34 | Ok(Box::new(TantivyRegexQuery::from_pattern(&value, field)?)) 35 | } 36 | } 37 | 38 | #[cfg(test)] 39 | mod tests { 40 | use tantivy::schema::*; 41 | 42 | use super::*; 43 | 44 | #[test] 45 | fn test_valid_regex() { 46 | let body = r#"{ "regex": { "test_text": ".*" } }"#; 47 | let mut schema = SchemaBuilder::new(); 48 | schema.add_u64_field("test_text", FAST); 49 | let phrase: RegexQuery = serde_json::from_str(body).unwrap(); 50 | let query = phrase.create_query(&schema.build()); 51 | assert!(query.is_ok()); 52 | } 53 | 54 | #[test] 55 | fn test_bad_regex() { 56 | let body = r#"{ "regex": { "test_text": "[(.!" } }"#; 57 | let mut schema = SchemaBuilder::new(); 58 | schema.add_u64_field("test_text", FAST); 59 | let phrase: RegexQuery = serde_json::from_str(body).unwrap(); 60 | let query = phrase.create_query(&schema.build()); 61 | assert!(query.is_err()); 62 | } 63 | 64 | #[test] 65 | fn test_create_regex() { 66 | let mut schema = SchemaBuilder::new(); 67 | schema.add_u64_field("test_text", FAST); 68 | let phrase: RegexQuery = RegexQuery::from_str("test_text".into(), ".*"); 69 | let query = phrase.create_query(&schema.build()); 70 | 71 | assert!(query.is_ok()); 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /docs/api.raml: -------------------------------------------------------------------------------- 1 | #%RAML 1.0 2 | title: Toshi Search 3 | version: 0.1.1 4 | baseUri: localhost:8080 5 | mediaType: application/json 6 | protocols: [HTTP, HTTPS] 7 | types: 8 | Index: 9 | type: object 10 | properties: 11 | name: string 12 | version: string 13 | example: 14 | name: "Toshi Search" 15 | version: "0.1.1" 16 | Options: 17 | type: object 18 | properties: 19 | commit?: boolean 20 | example: 21 | commit: true 22 | Document: 23 | type: object 24 | description: A Tantivy Document in a Key, Value format 25 | AddDocument: 26 | type: object 27 | properties: 28 | options?: Options 29 | document: Document 30 | example: 31 | options: 32 | commit: true 33 | document: 34 | key: value 35 | /: 36 | displayName: Get Version 37 | description: Returns the current version of Toshi running. 38 | get: 39 | protocols: [HTTP, HTTPS] 40 | responses: 41 | 200: 42 | body: 43 | application/json: 44 | type: Index 45 | /{index}: 46 | displayName: Index Operations 47 | get: 48 | protocols: [HTTP, HTTPS] 49 | displayName: Get All Docs for an Index 50 | responses: 51 | 200: 52 | body: 53 | post: 54 | protocols: [HTTP, HTTPS] 55 | displayName: Return Docs Matching a Query 56 | body: 57 | application/json: 58 | properties: 59 | query: 60 | type: object 61 | responses: 62 | 200: 63 | put: 64 | protocols: [HTTP, HTTPS] 65 | displayName: Add A Document 66 | description: Provide a document that document will be added to the defined Index 67 | body: 68 | application/json: 69 | type: AddDocument 70 | responses: 71 | 201: 72 | delete: 73 | protocols: [HTTP, HTTPS] 74 | displayName: Delete Docs Containing Terms 75 | responses: 76 | 200: 77 | /_create: 78 | displayName: Creates an index. 79 | put: 80 | protocols: [HTTP, HTTPS] 81 | responses: 82 | 201: 83 | /_summary: 84 | displayName: Index Summary 85 | get: 86 | protocols: [HTTP, HTTPS] 87 | responses: 88 | 200: 89 | /_bulk: 90 | displayName: Bulk Ingest 91 | post: 92 | protocols: [HTTP, HTTPS] 93 | responses: 94 | 200: 95 | /_flush: 96 | displayName: Force a commit to an index 97 | get: 98 | protocols: [HTTP, HTTPS] 99 | responses: 100 | 200: 101 | -------------------------------------------------------------------------------- /toshi-client/src/lib.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::Display; 2 | 3 | use http::Response; 4 | use serde::{de::DeserializeOwned, Serialize}; 5 | use tantivy::schema::Schema; 6 | 7 | use async_trait::async_trait; 8 | 9 | pub use toshi_types::*; 10 | 11 | pub use crate::error::ToshiClientError; 12 | 13 | pub mod error; 14 | 15 | #[cfg(feature = "isahc_client")] 16 | pub use isahc_client::ToshiClient; 17 | #[cfg(feature = "isahc_client")] 18 | mod isahc_client; 19 | 20 | #[cfg(feature = "hyper_client")] 21 | pub use hyper_client::HyperToshi; 22 | #[cfg(feature = "hyper_client")] 23 | mod hyper_client; 24 | 25 | pub type Result = std::result::Result; 26 | 27 | #[async_trait] 28 | pub trait AsyncClient { 29 | type Body; 30 | 31 | async fn index(&self) -> Result>; 32 | 33 | async fn list(&self) -> Result>; 34 | 35 | async fn index_summary(&self, index: I, include_sizes: bool) -> Result> 36 | where 37 | I: ToString + Send + Sync + Display; 38 | 39 | async fn create_index(&self, name: I, schema: Schema) -> Result> 40 | where 41 | I: ToString + Send + Sync + Display; 42 | 43 | async fn add_document(&self, index: I, document: D, options: Option) -> Result> 44 | where 45 | I: ToString + Send + Sync + Display, 46 | D: Serialize + Send + Sync; 47 | 48 | async fn search(&self, index: I, search: Search) -> Result> 49 | where 50 | I: ToString + Send + Sync + Display, 51 | D: DeserializeOwned + Clone + Send + Sync + Unpin; 52 | 53 | async fn all_docs(&self, index: I) -> Result> 54 | where 55 | I: ToString + Send + Sync + Display, 56 | D: DeserializeOwned + Clone + Send + Sync + Unpin; 57 | } 58 | 59 | pub trait SyncClient { 60 | type Body; 61 | 62 | fn sync_index(&self) -> Result>; 63 | 64 | fn sync_index_summary(&self, index: I, include_sizes: bool) -> Result> 65 | where 66 | I: ToString + Display; 67 | 68 | fn sync_create_index(&self, name: I, schema: Schema) -> Result> 69 | where 70 | I: ToString + Display; 71 | 72 | fn sync_add_document(&self, index: I, document: D, options: Option) -> Result> 73 | where 74 | I: ToString + Display, 75 | D: Serialize; 76 | 77 | fn sync_search(&self, index: I, search: Search) -> Result> 78 | where 79 | I: ToString + Display, 80 | D: DeserializeOwned + Clone; 81 | 82 | fn sync_all_docs(&self, index: I) -> Result> 83 | where 84 | I: ToString + Display, 85 | D: DeserializeOwned + Clone; 86 | } 87 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | **/*.rs.bk 3 | logs/ 4 | new_index/ 5 | .node_id.txt 6 | data/* 7 | 8 | # Created by https://www.gitignore.io/api/rust,intellij 9 | # Edit at https://www.gitignore.io/?templates=rust,intellij 10 | 11 | ### Intellij ### 12 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm 13 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 14 | 15 | # User-specific stuff 16 | .idea/**/workspace.xml 17 | .idea/**/tasks.xml 18 | .idea/**/usage.statistics.xml 19 | .idea/**/dictionaries 20 | .idea/**/shelf 21 | 22 | # Generated files 23 | .idea/**/contentModel.xml 24 | 25 | # Sensitive or high-churn files 26 | .idea/**/dataSources/ 27 | .idea/**/dataSources.ids 28 | .idea/**/dataSources.local.xml 29 | .idea/**/sqlDataSources.xml 30 | .idea/**/dynamic.xml 31 | .idea/**/uiDesigner.xml 32 | .idea/**/dbnavigator.xml 33 | 34 | # Gradle 35 | .idea/**/gradle.xml 36 | .idea/**/libraries 37 | 38 | # Gradle and Maven with auto-import 39 | # When using Gradle or Maven with auto-import, you should exclude module files, 40 | # since they will be recreated, and may cause churn. Uncomment if using 41 | # auto-import. 42 | # .idea/modules.xml 43 | # .idea/*.iml 44 | # .idea/modules 45 | # *.iml 46 | # *.ipr 47 | 48 | # CMake 49 | cmake-build-*/ 50 | 51 | # Mongo Explorer plugin 52 | .idea/**/mongoSettings.xml 53 | 54 | # File-based project format 55 | *.iws 56 | 57 | # IntelliJ 58 | out/ 59 | 60 | # mpeltonen/sbt-idea plugin 61 | .idea_modules/ 62 | 63 | # JIRA plugin 64 | atlassian-ide-plugin.xml 65 | 66 | # Cursive Clojure plugin 67 | .idea/replstate.xml 68 | 69 | # Crashlytics plugin (for Android Studio and IntelliJ) 70 | com_crashlytics_export_strings.xml 71 | crashlytics.properties 72 | crashlytics-build.properties 73 | fabric.properties 74 | 75 | # Editor-based Rest Client 76 | .idea/httpRequests 77 | 78 | # Android studio 3.1+ serialized cache file 79 | .idea/caches/build_file_checksums.ser 80 | 81 | ### Intellij Patch ### 82 | # Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721 83 | 84 | # *.iml 85 | # modules.xml 86 | # .idea/misc.xml 87 | # *.ipr 88 | 89 | # Sonarlint plugin 90 | .idea/sonarlint 91 | 92 | ### Rust ### 93 | # Generated by Cargo 94 | # will have compiled files and executables 95 | /target/ 96 | 97 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries 98 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html 99 | Cargo.lock 100 | 101 | # These are backup files generated by rustfmt 102 | **/*.rs.bk 103 | 104 | # End of https://www.gitignore.io/api/rust,intellij 105 | -------------------------------------------------------------------------------- /requests.http: -------------------------------------------------------------------------------- 1 | ### 2 | ### Assuming Toshi is running on port 8080 3 | ### Create an index for some song lyrics 4 | GET http://{{host}}:{{port}} 5 | 6 | ### 7 | PUT http://{{host}}:{{port}}/lyrics/_create 8 | Content-Type: application/json 9 | 10 | [ 11 | { 12 | "name": "lyrics", 13 | "type": "text", 14 | "options": { 15 | "indexing": { 16 | "record": "position", 17 | "tokenizer": "default" 18 | }, 19 | "stored": true 20 | } 21 | }, 22 | { 23 | "name": "year", 24 | "type": "i64", 25 | "options": { 26 | "indexed": true, 27 | "stored": true 28 | } 29 | }, 30 | { 31 | "name": "idx", 32 | "type": "u64", 33 | "options": { 34 | "indexed": true, 35 | "stored": true 36 | } 37 | }, 38 | { 39 | "name": "artist", 40 | "type": "text", 41 | "options": { 42 | "indexing": { 43 | "record": "position", 44 | "tokenizer": "default" 45 | }, 46 | "stored": true 47 | } 48 | }, 49 | { 50 | "name": "genre", 51 | "type": "text", 52 | "options": { 53 | "indexing": { 54 | "record": "position", 55 | "tokenizer": "default" 56 | }, 57 | "stored": true 58 | } 59 | }, 60 | { 61 | "name": "song", 62 | "type": "text", 63 | "options": { 64 | "indexing": { 65 | "record": "position", 66 | "tokenizer": "default" 67 | }, 68 | "stored": true 69 | } 70 | } 71 | ] 72 | 73 | ### Get the schema summary back to see the index was created 74 | GET {{host}}:{{port}}/lyrics/_summary?include_sizes=true 75 | Content-Type: application/JSON 76 | 77 | ### Add a single song to the index 78 | PUT http://{{host}}:{{port}}/lyrics/ 79 | Content-Type: application/json 80 | 81 | { 82 | "options": { 83 | "commit": false 84 | }, 85 | "document": { 86 | "song": "he-still-loves-me-f-choir-from-fighting-temptations", 87 | "year": 2007, 88 | "artist": "beyonce-knowles", 89 | "genre": "Pop", 90 | "lyrics": "Took me a while but I'm finally here", 91 | "idx": 23 92 | } 93 | } 94 | 95 | ### List the indexes 96 | GET http://{{host}}:{{port}}/_list 97 | Content-Type: application/json 98 | 99 | ### Force a commit if necessary at any time 100 | GET http://{{host}}:{{port}}/lyrics/_flush 101 | Accept: application/json 102 | 103 | ### Get that document back from the engine 104 | GET http://{{host}}:{{port}}/lyrics/ 105 | Content-Type: application/json 106 | 107 | ### Test Term Query 108 | POST http://{{host}}:{{port}}/lyrics/ 109 | Content-Type: application/JSON 110 | 111 | { 112 | "query": { 113 | "term": { 114 | "lyrics": "me" 115 | } 116 | }, 117 | "limit": 1 118 | } 119 | -------------------------------------------------------------------------------- /toshi-types/src/query/fuzzy.rs: -------------------------------------------------------------------------------- 1 | use std::fmt; 2 | 3 | use serde::{Deserialize, Serialize}; 4 | use tantivy::query::{FuzzyTermQuery, Query as TantivyQuery}; 5 | use tantivy::schema::Schema; 6 | 7 | use crate::query::{make_field_value, CreateQuery, KeyValue, Query}; 8 | use crate::Result; 9 | 10 | /// A query where terms can have distance between them, but still be a match 11 | #[derive(Serialize, Deserialize, Debug, Clone)] 12 | pub struct FuzzyQuery { 13 | fuzzy: KeyValue, 14 | } 15 | 16 | impl FuzzyQuery { 17 | /// Constructor to create a fuzzy query from a known key value 18 | pub fn new(fuzzy: KeyValue) -> Self { 19 | Self { fuzzy } 20 | } 21 | /// Creates a builder for a fuzzy query 22 | pub fn builder() -> FuzzyQueryBuilder { 23 | FuzzyQueryBuilder::default() 24 | } 25 | } 26 | 27 | impl CreateQuery for FuzzyQuery { 28 | fn create_query(self, schema: &Schema) -> Result> { 29 | let KeyValue { field, value } = self.fuzzy; 30 | let term = make_field_value(schema, &field, &value.value)?; 31 | Ok(Box::new(FuzzyTermQuery::new(term, value.distance, value.transposition))) 32 | } 33 | } 34 | 35 | /// A term to be considered in the query 36 | #[derive(Serialize, Deserialize, Debug, Clone)] 37 | pub struct FuzzyTerm { 38 | value: String, 39 | #[serde(default)] 40 | distance: u8, 41 | #[serde(default)] 42 | transposition: bool, 43 | } 44 | 45 | impl FuzzyTerm { 46 | /// Constructor to create a fuzzy query 47 | pub fn new(value: String, distance: u8, transposition: bool) -> Self { 48 | Self { 49 | value, 50 | distance, 51 | transposition, 52 | } 53 | } 54 | } 55 | 56 | #[derive(Debug, Default)] 57 | pub struct FuzzyQueryBuilder { 58 | field: String, 59 | value: String, 60 | distance: u8, 61 | transposition: bool, 62 | } 63 | 64 | impl FuzzyQueryBuilder { 65 | pub fn new() -> Self { 66 | FuzzyQueryBuilder::default() 67 | } 68 | 69 | pub fn with_value(mut self, value: V) -> Self 70 | where 71 | V: fmt::Display, 72 | { 73 | self.value = value.to_string(); 74 | self 75 | } 76 | 77 | pub fn for_field(mut self, field: V) -> Self 78 | where 79 | V: fmt::Display, 80 | { 81 | self.field = field.to_string(); 82 | self 83 | } 84 | 85 | pub fn with_distance(mut self, distance: u8) -> Self { 86 | self.distance = distance; 87 | self 88 | } 89 | 90 | pub fn with_transposition(mut self) -> Self { 91 | self.transposition = true; 92 | self 93 | } 94 | 95 | pub fn build(self) -> Query { 96 | let term = FuzzyTerm::new(self.value, self.distance, self.transposition); 97 | let query = FuzzyQuery::new(KeyValue::new(self.field, term)); 98 | Query::Fuzzy(query) 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /toshi-types/src/server.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | 3 | use serde::{Deserialize, Serialize}; 4 | use std::fmt::Formatter; 5 | use tantivy::schema::Schema; 6 | 7 | /// In a delete query, this is returned indicating the number of documents that were removed 8 | /// by the delete. 9 | #[derive(Debug, Serialize, Deserialize, Clone)] 10 | pub struct DocsAffected { 11 | /// The number of documents removed by the query 12 | pub docs_affected: u64, 13 | } 14 | 15 | /// Indicates whether or not a commit should be done at the end of a document insert, the default 16 | /// is false 17 | #[derive(Debug, Serialize, Deserialize, Clone)] 18 | pub struct IndexOptions { 19 | /// Whether to commit after insertion 20 | #[serde(default)] 21 | pub commit: bool, 22 | } 23 | 24 | /// The request body for adding a single document to an index 25 | #[derive(Debug, Serialize, Deserialize, Clone)] 26 | pub struct AddDocument { 27 | /// Options surrounding the insert [`IndexOptions`] 28 | pub options: Option, 29 | /// The actual document to insert 30 | pub document: D, 31 | } 32 | 33 | impl AddDocument { 34 | /// Convenience method for Raft Implementation 35 | pub fn new(document: D, options: Option) -> Self { 36 | Self { options, document } 37 | } 38 | } 39 | 40 | /// A wrapper around Tantivy's schema for when an index is created. [`tantivy::schema::Schema`] 41 | #[derive(Serialize, Deserialize, Clone)] 42 | pub struct SchemaBody(pub Schema); 43 | 44 | impl std::fmt::Debug for SchemaBody { 45 | fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> { 46 | f.write_str("Schema {\n")?; 47 | for field in self.0.fields() { 48 | f.write_fmt(format_args!("{:2}{:15}: {:?},\n", " ", field.1.name(), field.1.field_type()))?; 49 | } 50 | f.write_str("};")?; 51 | Ok(()) 52 | } 53 | } 54 | 55 | /// The request body for performing a delete request to an index 56 | #[derive(Debug, Serialize, Deserialize, Clone)] 57 | pub struct DeleteDoc { 58 | /// Options surrounding the delete [`IndexOptions`] 59 | pub options: Option, 60 | /// The term pairs to delete, since this could be any number of term pairs this does not use 61 | /// KeyValue like a lot of other queries do that only accept a single term pair at a time 62 | pub terms: HashMap, 63 | } 64 | 65 | #[cfg(test)] 66 | mod tests { 67 | 68 | use crate::SchemaBody; 69 | use tantivy::schema::*; 70 | 71 | #[test] 72 | fn test_debug() { 73 | let mut builder = SchemaBuilder::new(); 74 | builder.add_text_field("test_text", STORED | TEXT); 75 | builder.add_i64_field("test_i64", STORED | INDEXED | FAST); 76 | builder.add_u64_field("test_u64", STORED | INDEXED); 77 | builder.add_text_field("test_unindex", STORED); 78 | builder.add_facet_field("test_facet", FacetOptions::default()); 79 | builder.add_date_field("test_date", INDEXED | FAST); 80 | let schema = SchemaBody(builder.build()); 81 | 82 | println!("{:?}", schema); 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /toshi-server/src/bin/toshi.rs: -------------------------------------------------------------------------------- 1 | use std::error::Error; 2 | use std::fs::create_dir; 3 | use std::net::{IpAddr, SocketAddr}; 4 | use std::path::Path; 5 | use std::sync::{atomic::AtomicBool, Arc}; 6 | 7 | use futures::prelude::*; 8 | 9 | use log::info; 10 | 11 | use tokio::sync::oneshot; 12 | 13 | use std::str::FromStr; 14 | use toshi_server::commit::watcher; 15 | use toshi_server::index::IndexCatalog; 16 | use toshi_server::router::Router; 17 | use toshi_server::settings::{settings, Settings, HEADER}; 18 | use toshi_server::{setup_logging_from_file, shutdown, SharedCatalog}; 19 | use toshi_types::Catalog; 20 | 21 | #[tokio::main] 22 | pub async fn main() -> Result<(), Box> { 23 | let settings = settings(); 24 | let logger = setup_logging_from_file("config/logging.toml")?; 25 | let _scope = slog_scope::set_global_logger(logger.clone()); 26 | let _guard = slog_stdlog::init_with_level(log::Level::from_str(&settings.log_level)?)?; 27 | 28 | let (tx, shutdown_signal) = oneshot::channel(); 29 | if !Path::new(&settings.path).exists() { 30 | info!("Base data path {} does not exist, creating it...", settings.path); 31 | create_dir(settings.path.clone()).expect("Unable to create data directory"); 32 | } 33 | 34 | let index_catalog = setup_catalog(&settings).await?; 35 | 36 | let s_clone = settings.clone(); 37 | let toshi = setup_toshi(s_clone.clone(), Arc::clone(&index_catalog), tx); 38 | tokio::spawn(toshi); 39 | info!("Toshi running on {}:{}", &settings.host, &settings.port); 40 | 41 | setup_shutdown(shutdown_signal, index_catalog).await.map_err(Into::into) 42 | } 43 | 44 | async fn setup_shutdown(shutdown_signal: oneshot::Receiver<()>, index_catalog: SharedCatalog) -> Result<(), oneshot::error::RecvError> { 45 | shutdown_signal.await?; 46 | info!("Shutting down..."); 47 | index_catalog.clear().await; 48 | Ok(()) 49 | } 50 | 51 | async fn setup_toshi(settings: Settings, index_catalog: SharedCatalog, tx: oneshot::Sender<()>) -> Result<(), ()> { 52 | let shutdown = shutdown::shutdown(tx); 53 | if settings.experimental { 54 | let master = run_master(Arc::clone(&index_catalog), settings.clone()); 55 | future::try_select(shutdown, master).map(|_| Ok(())).await 56 | } else { 57 | let master = run_master(Arc::clone(&index_catalog), settings); 58 | future::try_select(shutdown, master).map(|_| Ok(())).await 59 | } 60 | } 61 | 62 | async fn setup_catalog(settings: &Settings) -> Result { 63 | let mut index_catalog = match IndexCatalog::new(settings.clone()) { 64 | Ok(v) => v, 65 | Err(e) => { 66 | eprintln!("Error creating IndexCatalog from path {} - {:?}", settings.path, e); 67 | std::process::exit(1); 68 | } 69 | }; 70 | index_catalog.refresh_catalog().await?; 71 | info!("{} Indexes loaded...", index_catalog.get_collection().len()); 72 | Ok(Arc::new(index_catalog)) 73 | } 74 | 75 | fn run_master(catalog: SharedCatalog, settings: Settings) -> impl Future> + Unpin + Send { 76 | let bulk_lock = Arc::new(AtomicBool::new(false)); 77 | let commit_watcher = watcher(Arc::clone(&catalog), settings.auto_commit_duration, Arc::clone(&bulk_lock)); 78 | let addr: IpAddr = settings 79 | .host 80 | .parse() 81 | .unwrap_or_else(|_| panic!("Invalid ip address: {}", &settings.host)); 82 | let bind: SocketAddr = SocketAddr::new(addr, settings.port); 83 | 84 | println!("{}", HEADER); 85 | 86 | tokio::spawn(commit_watcher); 87 | let watcher_clone = Arc::clone(&bulk_lock); 88 | let router = Router::from_settings(catalog, watcher_clone, settings); 89 | Box::pin(router.router_with_catalog(bind)) 90 | } 91 | -------------------------------------------------------------------------------- /toshi-types/src/error.rs: -------------------------------------------------------------------------------- 1 | //! This contains Toshi's server errors, if you are looking for the hyper or tonic errors they are 2 | //! located in the [`crate::extra_errors`]: extra_errors 3 | //! module 4 | 5 | use std::fmt::Debug; 6 | 7 | use serde::{Deserialize, Serialize}; 8 | use tantivy::directory::error::OpenDirectoryError; 9 | use tantivy::query::QueryParserError; 10 | use tantivy::schema::DocParsingError; 11 | use tantivy::TantivyError; 12 | use thiserror::Error; 13 | 14 | /// The type returned when an error occurs with a query 15 | #[derive(Debug, Serialize, Deserialize)] 16 | pub struct ErrorResponse { 17 | /// The human-readable message given back 18 | pub message: String, 19 | } 20 | 21 | impl ErrorResponse { 22 | /// Create an error response from anything that implements ToString 23 | pub fn new(message: M) -> Self { 24 | Self { 25 | message: message.to_string(), 26 | } 27 | } 28 | } 29 | 30 | /// Toshi's base error types 31 | #[derive(Debug, Error)] 32 | pub enum Error { 33 | /// IO error that deals with anything related to reading from disk or network communications 34 | #[error("IO Error: {0}")] 35 | IOError(#[from] std::io::Error), 36 | /// Unlikely error related to Slog 37 | #[error("IO Error: {0}")] 38 | SlogError(#[from] slog::Error), 39 | /// A query tried to reference a field that does not exist 40 | #[error("Unknown Field: '{0}' queried")] 41 | UnknownIndexField(String), 42 | /// A query tried to query an index that does not exist 43 | #[error("Unknown Index: '{0}' does not exist")] 44 | UnknownIndex(String), 45 | /// A query that had a syntax error or was otherwise not valid 46 | #[error("Error in query execution: '{0}'")] 47 | QueryError(String), 48 | /// This should never occur and is a bug that should be reported 49 | #[error("Failed to find known executor")] 50 | SpawnError, 51 | /// oOoOOoOOOoOOo Spooooky ghosts, maybe, we don't know. 52 | #[error("An unknown error occurred")] 53 | UnknownError, 54 | /// This should never occur and is a bug that should be reported 55 | #[error("Thread pool is poisoned")] 56 | PoisonedError, 57 | /// An error occured in Toshi's internal RPC communications 58 | #[error("An RPC error occurred: '{0}'")] 59 | RPCError(String), 60 | /// Any Error related to Tantivy 61 | #[error("Error in Index: '{0}'")] 62 | TantivyError(#[from] anyhow::Error), 63 | /// Any error related to serde_json 64 | #[error("Error Parsing Json: '{0}'")] 65 | JsonParsing(#[from] serde_json::Error), 66 | /// Any error related to Hyper 67 | #[error("Http Error: '{0}'")] 68 | HyperError(#[from] hyper::Error), 69 | /// Any error related to http 70 | #[error("Http Crate Error: '{0}'")] 71 | HttpError(#[from] http::Error), 72 | /// When attempting to create an index that already exists 73 | #[error("Index: '{0}' already exists")] 74 | AlreadyExists(String), 75 | /// When an invalid log config is provided 76 | #[error("Error Deserializing Error: '{0}'")] 77 | TomlError(toml::de::Error), 78 | } 79 | 80 | impl From for Error { 81 | fn from(err: OpenDirectoryError) -> Self { 82 | Error::TantivyError(err.into()) 83 | } 84 | } 85 | 86 | impl From for Error { 87 | fn from(err: QueryParserError) -> Self { 88 | Error::TantivyError(err.into()) 89 | } 90 | } 91 | 92 | impl From for Error { 93 | fn from(err: DocParsingError) -> Self { 94 | Error::TantivyError(err.into()) 95 | } 96 | } 97 | 98 | impl From for Error { 99 | fn from(err: TantivyError) -> Self { 100 | Error::TantivyError(err.into()) 101 | } 102 | } 103 | -------------------------------------------------------------------------------- /toshi-server/src/router.rs: -------------------------------------------------------------------------------- 1 | use std::convert::Infallible; 2 | use std::net::{SocketAddr, TcpListener}; 3 | use std::sync::atomic::AtomicBool; 4 | use std::sync::Arc; 5 | 6 | use hyper::service::{make_service_fn, service_fn}; 7 | use hyper::{Body, Method, Request, Response, Server}; 8 | 9 | use log::*; 10 | use tower_util::BoxService; 11 | 12 | use toshi_types::{Catalog, QueryOptions}; 13 | 14 | use crate::handlers::*; 15 | use crate::settings::Settings; 16 | use crate::utils::{not_found, parse_path}; 17 | 18 | pub type BoxedFn = BoxService, Response, hyper::Error>; 19 | 20 | #[derive(Clone)] 21 | pub struct Router { 22 | pub cat: Arc, 23 | pub watcher: Arc, 24 | pub settings: Settings, 25 | } 26 | 27 | impl Router { 28 | pub fn new(cat: Arc, watcher: Arc) -> Self { 29 | Self::from_settings(cat, watcher, Settings::default()) 30 | } 31 | 32 | pub fn from_settings(cat: Arc, watcher: Arc, settings: Settings) -> Self { 33 | Self { cat, watcher, settings } 34 | } 35 | 36 | pub async fn route( 37 | catalog: Arc, 38 | watcher: Arc, 39 | req: Request, 40 | settings: Settings, 41 | ) -> Result, hyper::Error> { 42 | let (parts, body) = req.into_parts(); 43 | let query_options: QueryOptions = parts 44 | .uri 45 | .query() 46 | .and_then(|q| serde_urlencoded::from_str(q).ok()) 47 | .unwrap_or_default(); 48 | 49 | let method = parts.method; 50 | let path = parse_path(parts.uri.path()); 51 | 52 | match (&method, &path[..]) { 53 | (m, ["_list"]) if m == Method::GET => list_indexes(catalog).await, 54 | (m, [idx, "_create"]) if m == Method::PUT => create_index(catalog, body, idx).await, 55 | (m, [idx, "_summary"]) if m == Method::GET => index_summary(catalog, idx, query_options).await, 56 | (m, [idx, "_flush"]) if m == Method::GET => flush(catalog, idx).await, 57 | (m, [idx, "_bulk"]) if m == Method::POST => { 58 | let w = Arc::clone(&watcher); 59 | bulk_insert(catalog, w, body, idx, settings.json_parsing_threads, settings.max_line_length).await 60 | } 61 | (m, [idx]) if m == Method::POST => doc_search(catalog, body, idx).await, 62 | (m, [idx]) if m == Method::PUT => add_document(catalog, body, idx).await, 63 | (m, [idx]) if m == Method::DELETE => delete_term(catalog, body, idx).await, 64 | (m, [idx]) if m == Method::GET => { 65 | if idx == &"favicon.ico" { 66 | not_found().await 67 | } else { 68 | all_docs(catalog, idx).await 69 | } 70 | } 71 | (m, []) if m == Method::GET => root().await, 72 | _ => not_found().await, 73 | } 74 | } 75 | 76 | pub async fn service_call(catalog: Arc, watcher: Arc, settings: Settings) -> Result { 77 | Ok(BoxService::new(service_fn(move |req| { 78 | info!("REQ = {:?}", &req); 79 | Self::route(Arc::clone(&catalog), Arc::clone(&watcher), req, settings.clone()) 80 | }))) 81 | } 82 | 83 | pub async fn router_with_catalog(self, addr: SocketAddr) -> Result<(), hyper::Error> { 84 | let routes = make_service_fn(move |_| Self::service_call(Arc::clone(&self.cat), Arc::clone(&self.watcher), self.settings.clone())); 85 | let server = Server::bind(&addr).serve(routes); 86 | if let Err(err) = server.await { 87 | trace!("server error: {}", err); 88 | } 89 | Ok(()) 90 | } 91 | 92 | #[allow(dead_code)] 93 | pub(crate) async fn router_from_tcp(self, listener: TcpListener) -> Result<(), hyper::Error> { 94 | let routes = make_service_fn(move |_| Self::service_call(Arc::clone(&self.cat), Arc::clone(&self.watcher), self.settings.clone())); 95 | let server = Server::from_tcp(listener)?.serve(routes); 96 | if let Err(err) = server.await { 97 | trace!("server error: {}", err); 98 | } 99 | Ok(()) 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /toshi-types/src/client.rs: -------------------------------------------------------------------------------- 1 | use std::iter::Sum; 2 | use std::ops::Add; 3 | 4 | use serde::{Deserialize, Serialize}; 5 | use tantivy::space_usage::SearcherSpaceUsage; 6 | use tantivy::IndexMeta; 7 | 8 | use crate::query::KeyValue; 9 | 10 | /// A single document returned from a Tantivy Index 11 | #[derive(Serialize, Deserialize, Debug, Clone)] 12 | pub struct ScoredDoc { 13 | /// The document's relevancy score 14 | pub score: Option, 15 | /// The actual document 16 | pub doc: D, 17 | } 18 | 19 | impl ScoredDoc { 20 | /// Constructor for a new ScoredDoc 21 | pub fn new(score: Option, doc: D) -> Self { 22 | Self { score, doc } 23 | } 24 | } 25 | 26 | /// The Search response object from Toshi 27 | #[derive(Serialize, Deserialize, Debug, Clone)] 28 | pub struct SearchResults { 29 | /// The number of documents returned 30 | pub hits: usize, 31 | /// The actual documents, see [`ScoredDoc`]: ScoredDoc 32 | docs: Vec>, 33 | /// The, if any, facets returned 34 | facets: Vec>, 35 | } 36 | 37 | impl Add for SearchResults { 38 | type Output = SearchResults; 39 | 40 | fn add(self, mut rhs: SearchResults) -> Self::Output { 41 | let mut docs = self.docs; 42 | let mut facets = self.facets; 43 | let hits = self.hits + rhs.hits; 44 | facets.append(&mut rhs.facets); 45 | docs.append(&mut rhs.get_docs().to_vec()); 46 | 47 | Self { hits, docs, facets } 48 | } 49 | } 50 | 51 | impl Sum for SearchResults { 52 | fn sum>>(iter: I) -> Self { 53 | iter.fold(Self::new(Vec::new()), |r, sr| r + sr) 54 | } 55 | } 56 | 57 | impl SearchResults { 58 | /// Getter for returned documents 59 | pub fn get_docs(&self) -> &[ScoredDoc] { 60 | &self.docs 61 | } 62 | /// Getter for the returned facets 63 | pub fn get_facets(&self) -> &[KeyValue] { 64 | &self.facets 65 | } 66 | 67 | /// Constructor for just documents 68 | pub fn new(docs: Vec>) -> Self { 69 | Self { 70 | hits: docs.len(), 71 | docs, 72 | facets: Vec::new(), 73 | } 74 | } 75 | 76 | /// Constructor for documents with facets 77 | pub fn with_facets(docs: Vec>, facets: Vec>) -> Self { 78 | Self { 79 | hits: docs.len(), 80 | docs, 81 | facets, 82 | } 83 | } 84 | } 85 | 86 | /// A response gotten from the _summary route for an index 87 | #[derive(Debug, Serialize)] 88 | pub struct SummaryResponse { 89 | summaries: IndexMeta, 90 | #[serde(skip_serializing_if = "Option::is_none")] 91 | segment_sizes: Option, 92 | } 93 | 94 | impl SummaryResponse { 95 | /// Constructor for a new summary response 96 | pub fn new(summaries: IndexMeta, segment_sizes: Option) -> Self { 97 | Self { summaries, segment_sizes } 98 | } 99 | } 100 | 101 | #[cfg(test)] 102 | mod tests { 103 | use crate::{ScoredDoc, SearchResults}; 104 | use std::collections::BTreeMap; 105 | 106 | #[test] 107 | fn test_add() { 108 | let scored = ScoredDoc::new(Some(1.0), BTreeMap::::new()); 109 | let scored2 = ScoredDoc::new(Some(0.5), BTreeMap::::new()); 110 | let results = SearchResults::new(vec![scored]); 111 | let results2 = SearchResults::new(vec![scored2]); 112 | let both = results + results2; 113 | 114 | assert_eq!(both.docs.len(), 2); 115 | assert_eq!(both.hits, 2); 116 | } 117 | 118 | #[test] 119 | fn test_sum() { 120 | let scored = ScoredDoc::new(Some(1.0), BTreeMap::::new()); 121 | let scored2 = ScoredDoc::new(Some(0.5), BTreeMap::::new()); 122 | let results = SearchResults::new(vec![scored]); 123 | let results2 = SearchResults::new(vec![scored2]); 124 | let both: SearchResults> = vec![results2, results].into_iter().sum(); 125 | 126 | assert_eq!(both.docs.len(), 2); 127 | assert_eq!(both.hits, 2); 128 | } 129 | } 130 | -------------------------------------------------------------------------------- /toshi-raft/src/handle.rs: -------------------------------------------------------------------------------- 1 | use std::sync::Arc; 2 | 3 | use raft::prelude::*; 4 | use raft::Result; 5 | use serde_json::Value; 6 | use tantivy::space_usage::SearcherSpaceUsage; 7 | use tantivy::{Index, IndexWriter}; 8 | use tokio::runtime::Runtime; 9 | use tokio::sync::mpsc::Sender; 10 | use tokio::sync::Mutex; 11 | 12 | use toshi_types::Result as ToshiResult; 13 | use toshi_types::*; 14 | 15 | use crate::proposal::Proposal; 16 | 17 | #[derive(Clone, Debug)] 18 | pub struct RaftHandle 19 | where 20 | T: IndexHandle + Send + Sync, 21 | { 22 | handle: Arc, 23 | rt: Arc, 24 | raft_state: Option, 25 | prop_chan: Arc>, 26 | } 27 | 28 | impl RaftHandle { 29 | pub fn new(handle: T, prop_chan: Arc>) -> Self { 30 | Self { 31 | handle: Arc::new(handle), 32 | rt: Arc::new(Runtime::new().unwrap()), 33 | raft_state: None, 34 | prop_chan, 35 | } 36 | } 37 | } 38 | 39 | impl Storage for RaftHandle 40 | where 41 | T: IndexHandle + Send + Sync, 42 | { 43 | fn initial_state(&self) -> Result { 44 | if let Some(ref rs) = self.raft_state { 45 | Ok(rs.clone()) 46 | } else { 47 | let rs = RaftState::new(HardState::default(), ConfState::default()); 48 | Ok(rs) 49 | } 50 | } 51 | 52 | fn entries(&self, low: u64, high: u64, max_size: impl Into>) -> Result> { 53 | let range = RangeQuery::builder().for_field("_id").gte(low).lte(high).build(); 54 | let diff = high - low; 55 | let max = max_size.into().unwrap_or(diff) as usize; 56 | let search = Search::builder().with_query(range).with_limit(max).build(); 57 | let result: Vec = self 58 | .rt 59 | .block_on(Box::pin(self.search_index(search))) 60 | .unwrap_or_else(|_| panic!("Getting Documents for {}", self.get_name())) 61 | .get_docs() 62 | .iter() 63 | .flat_map(|doc| serde_json::to_string(&doc)) 64 | .map(|doc| { 65 | let mut ent = Entry::new_(); 66 | ent.set_data(doc.into_bytes()); 67 | ent 68 | }) 69 | .collect(); 70 | 71 | Ok(result) 72 | } 73 | 74 | fn term(&self, _idx: u64) -> Result { 75 | Ok(self.raft_state.as_ref().map(|rs| rs.hard_state.term).unwrap_or(_idx)) 76 | } 77 | 78 | fn first_index(&self) -> Result { 79 | Ok(1) 80 | } 81 | 82 | fn last_index(&self) -> Result { 83 | Ok(self.handle.get_opstamp() as u64) 84 | } 85 | 86 | fn snapshot(&self, _request_index: u64) -> Result { 87 | unimplemented!() 88 | } 89 | } 90 | 91 | #[async_trait::async_trait] 92 | impl IndexHandle for RaftHandle 93 | where 94 | T: IndexHandle + Send + Sync, 95 | { 96 | fn get_name(&self) -> String { 97 | self.handle.get_name() 98 | } 99 | 100 | fn index_location(&self) -> IndexLocation { 101 | self.handle.index_location() 102 | } 103 | 104 | fn get_index(&self) -> Index { 105 | self.handle.get_index() 106 | } 107 | 108 | fn get_writer(&self) -> Arc> { 109 | self.handle.get_writer() 110 | } 111 | 112 | fn get_space(&self) -> SearcherSpaceUsage { 113 | self.handle.get_space() 114 | } 115 | 116 | fn get_opstamp(&self) -> usize { 117 | self.handle.get_opstamp() 118 | } 119 | 120 | fn set_opstamp(&self, opstamp: usize) { 121 | self.handle.set_opstamp(opstamp); 122 | } 123 | 124 | async fn commit(&self) -> std::result::Result { 125 | self.commit().await 126 | } 127 | 128 | async fn search_index(&self, search: Search) -> ToshiResult> { 129 | self.handle.search_index(search).await 130 | } 131 | 132 | async fn add_document(&self, doc: AddDocument) -> ToshiResult<()> { 133 | // self.prop_chan.send() 134 | self.handle.add_document(doc).await 135 | } 136 | 137 | async fn delete_term(&self, term: DeleteDoc) -> ToshiResult { 138 | self.handle.delete_term(term).await 139 | } 140 | } 141 | -------------------------------------------------------------------------------- /toshi-types/src/query/phrase.rs: -------------------------------------------------------------------------------- 1 | use serde::{Deserialize, Serialize}; 2 | use tantivy::query::{PhraseQuery as TantivyPhraseQuery, Query}; 3 | use tantivy::schema::Schema; 4 | use tantivy::Term; 5 | 6 | use crate::query::{make_field_value, CreateQuery, KeyValue}; 7 | use crate::{error::Error, Result}; 8 | 9 | /// A query for a phrase of terms, see [`tantivy::query::PhraseQuery`] for more info on what 10 | /// can be included here 11 | #[derive(Serialize, Deserialize, Debug, Clone)] 12 | pub struct PhraseQuery { 13 | phrase: KeyValue, 14 | } 15 | 16 | impl PhraseQuery { 17 | /// Constructor to create a phrase query from a known key value 18 | pub fn new(phrase: KeyValue) -> Self { 19 | PhraseQuery { phrase } 20 | } 21 | /// Constructor to create the key value for the user 22 | pub fn with_phrase(key: String, value: TermPair) -> Self { 23 | PhraseQuery { 24 | phrase: KeyValue::new(key, value), 25 | } 26 | } 27 | } 28 | 29 | /// The tokens used in the phrase query 30 | #[derive(Serialize, Deserialize, Debug, Clone)] 31 | pub struct TermPair { 32 | terms: Vec, 33 | #[serde(skip_serializing_if = "Option::is_none")] 34 | offsets: Option>, 35 | } 36 | 37 | impl TermPair { 38 | /// Constructor for creating a term pair 39 | pub fn new(terms: Vec, offsets: Option>) -> Self { 40 | TermPair { terms, offsets } 41 | } 42 | } 43 | 44 | impl CreateQuery for PhraseQuery { 45 | fn create_query(self, schema: &Schema) -> Result> { 46 | let KeyValue { field, value } = self.phrase; 47 | if value.terms.len() <= 1 { 48 | return Err(Error::QueryError("Phrase Query must have more than 1 term".into())); 49 | } 50 | if let Some(offsets) = &value.offsets { 51 | if value.terms.len() != offsets.len() { 52 | return Err(Error::QueryError(format!( 53 | "Differing numbers of offsets and query terms ({} and {})", 54 | value.terms.len(), 55 | offsets.len() 56 | ))); 57 | } 58 | let paired_terms = value 59 | .terms 60 | .iter() 61 | .zip(offsets) 62 | .map(|(t, o)| match make_field_value(schema, &field, t) { 63 | Ok(f) => Ok((*o, f)), 64 | Err(e) => Err(e), 65 | }) 66 | .collect::>>()?; 67 | Ok(Box::new(TantivyPhraseQuery::new_with_offset(paired_terms))) 68 | } else { 69 | let terms = value 70 | .terms 71 | .into_iter() 72 | .map(|t| make_field_value(schema, &field, &t)) 73 | .collect::>>()?; 74 | Ok(Box::new(TantivyPhraseQuery::new(terms))) 75 | } 76 | } 77 | } 78 | 79 | #[cfg(test)] 80 | mod tests { 81 | use super::*; 82 | use tantivy::schema::*; 83 | 84 | #[test] 85 | fn test_no_terms() { 86 | let body = r#"{ "phrase": { "test_u64": { "terms": [ ] } } }"#; 87 | let mut schema = SchemaBuilder::new(); 88 | schema.add_u64_field("test_u64", FAST); 89 | let built = schema.build(); 90 | let query = serde_json::from_str::(body).unwrap().create_query(&built); 91 | 92 | assert!(query.is_err()); 93 | assert_eq!( 94 | query.unwrap_err().to_string(), 95 | "Error in query execution: 'Phrase Query must have more than 1 term'" 96 | ); 97 | } 98 | 99 | #[test] 100 | fn test_diff_terms_offsets() { 101 | let body = r#"{ "phrase": { "test_u64": { "terms": ["asdf", "asdf2"], "offsets": [1] } } }"#; 102 | let mut schema = SchemaBuilder::new(); 103 | schema.add_u64_field("test_u64", FAST); 104 | let built = schema.build(); 105 | let phrase: PhraseQuery = serde_json::from_str(body).unwrap(); 106 | let query = phrase.create_query(&built); 107 | 108 | assert!(query.is_err()); 109 | assert_eq!( 110 | query.unwrap_err().to_string(), 111 | "Error in query execution: 'Differing numbers of offsets and query terms (2 and 1)'" 112 | ); 113 | } 114 | 115 | #[test] 116 | fn test_query() { 117 | let body = r#"{ "phrase": { "test_u64": { "terms": ["asdf", "asdf2"], "offsets": [1, 2] } } }"#; 118 | let mut schema = SchemaBuilder::new(); 119 | schema.add_u64_field("test_u64", FAST); 120 | let built = schema.build(); 121 | let phrase: PhraseQuery = serde_json::from_str(body).unwrap(); 122 | let query = phrase.create_query(&built); 123 | 124 | assert!(query.is_ok()); 125 | let result = query.unwrap(); 126 | let q: &TantivyPhraseQuery = result.downcast_ref::().unwrap(); 127 | assert_eq!(q.phrase_terms().len(), 2); 128 | } 129 | } 130 | -------------------------------------------------------------------------------- /toshi-server/src/commit.rs: -------------------------------------------------------------------------------- 1 | use std::sync::atomic::{AtomicBool, Ordering}; 2 | use std::sync::Arc; 3 | use std::time::Duration; 4 | 5 | use log::trace; 6 | use tokio::time; 7 | 8 | use toshi_types::{Catalog, IndexHandle}; 9 | 10 | #[allow(irrefutable_let_patterns)] 11 | pub async fn watcher(cat: Arc, commit_duration: f32, lock: Arc) -> Result<(), ()> { 12 | while let _ = time::interval(Duration::from_secs_f32(commit_duration)).tick().await { 13 | for e in cat.get_collection().iter() { 14 | let (k, v) = e.pair(); 15 | let writer = v.get_writer(); 16 | let current_ops = v.get_opstamp(); 17 | if current_ops == 0 { 18 | trace!("No update to index={}, opstamp={}", k, current_ops); 19 | } else if !lock.load(Ordering::SeqCst) { 20 | let mut w = writer.lock().await; 21 | trace!("Committing: {}...", k); 22 | w.commit().unwrap(); 23 | v.set_opstamp(0); 24 | } 25 | } 26 | } 27 | Ok(()) 28 | } 29 | 30 | #[cfg(test)] 31 | pub mod tests { 32 | use hyper::Body; 33 | 34 | use crate::handlers::{add_document, all_docs}; 35 | use crate::index::create_test_catalog; 36 | use crate::SearchResults; 37 | 38 | use super::*; 39 | use http::Response; 40 | use serde::de::DeserializeOwned; 41 | use tantivy::schema::*; 42 | use tantivy::{doc, Index}; 43 | 44 | pub fn create_test_index() -> Index { 45 | let mut builder = SchemaBuilder::new(); 46 | let test_text = builder.add_text_field("test_text", STORED | TEXT); 47 | let test_int = builder.add_i64_field("test_i64", STORED | INDEXED); 48 | let test_unsign = builder.add_u64_field("test_u64", STORED | INDEXED); 49 | let test_unindexed = builder.add_text_field("test_unindex", STORED); 50 | let test_facet = builder.add_facet_field("test_facet", INDEXED | STORED); 51 | 52 | let schema = builder.build(); 53 | let idx = Index::create_in_ram(schema); 54 | let mut writer = idx.writer(30_000_000).unwrap(); 55 | 56 | writer.add_document(doc! { test_text => "Test Document 1", test_int => 2014i64, test_unsign => 10u64, test_unindexed => "no", test_facet => Facet::from("/cat/cat2") }).unwrap(); 57 | writer.add_document(doc! { test_text => "Test Dockument 2", test_int => -2015i64, test_unsign => 11u64, test_unindexed => "yes", test_facet => Facet::from("/cat/cat2") }).unwrap(); 58 | writer.add_document(doc! { test_text => "Test Duckiment 3", test_int => 2016i64, test_unsign => 12u64, test_unindexed => "noo", test_facet => Facet::from("/cat/cat3") }).unwrap(); 59 | writer.add_document(doc! { test_text => "Test Document 4", test_int => -2017i64, test_unsign => 13u64, test_unindexed => "yess", test_facet => Facet::from("/cat/cat4") }).unwrap(); 60 | writer.add_document(doc! { test_text => "Test Document 5", test_int => 2018i64, test_unsign => 14u64, test_unindexed => "nooo", test_facet => Facet::from("/dog/cat2") }).unwrap(); 61 | writer.commit().unwrap(); 62 | 63 | idx 64 | } 65 | 66 | pub async fn wait_json(r: Response) -> T { 67 | let bytes = read_body(r).await.unwrap(); 68 | serde_json::from_slice::(bytes.as_bytes()).unwrap_or_else(|e| panic!("Could not deserialize JSON: {:?}", e)) 69 | } 70 | 71 | pub fn cmp_float(a: f32, b: f32) -> bool { 72 | let abs_a = a.abs(); 73 | let abs_b = b.abs(); 74 | let diff = (a - b).abs(); 75 | if diff == 0.0 { 76 | return true; 77 | } else if a == 0.0 || b == 0.0 || (abs_a + abs_b < f32::MIN_POSITIVE) { 78 | return diff < (f32::EPSILON * f32::MIN_POSITIVE); 79 | } 80 | diff / (abs_a + abs_b).min(f32::MAX) < f32::EPSILON 81 | } 82 | 83 | pub async fn read_body(resp: Response) -> Result> { 84 | let b = hyper::body::to_bytes(resp.into_body()).await?; 85 | Ok(String::from_utf8(b.to_vec())?) 86 | } 87 | 88 | #[tokio::test] 89 | pub async fn test_auto_commit() { 90 | let catalog = create_test_catalog("test_index"); 91 | let lock = Arc::new(AtomicBool::new(false)); 92 | let watcher = watcher(Arc::clone(&catalog), 0.1, Arc::clone(&lock)); 93 | 94 | tokio::spawn(watcher); 95 | 96 | let body = r#"{"document": { "test_text": "Babbaboo!", "test_u64": 10 , "test_i64": -10, "test_unindex": "asdf1234" } }"#; 97 | 98 | add_document(Arc::clone(&catalog), Body::from(body), "test_index").await.unwrap(); 99 | 100 | let expected = 6; 101 | for _ in 0..2 { 102 | let req = all_docs(Arc::clone(&catalog), "test_index").await.unwrap(); 103 | let body = read_body(req).await.unwrap(); 104 | let docs: SearchResults = serde_json::from_slice(body.as_bytes()).unwrap(); 105 | if docs.hits == expected { 106 | break; 107 | } 108 | } 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /toshi-types/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![warn(clippy::all)] 2 | //! Toshi-Types 3 | //! These are the high level types available in the Toshi search engine. 4 | //! The client for Toshi as well as Toshi itself is built on top of these types. If you are 5 | //! looking for Toshi's protobuf types then you will want to look in the toshi-proto module 6 | //! of Toshi's source code. 7 | 8 | use std::sync::atomic::AtomicBool; 9 | use std::sync::Arc; 10 | 11 | use hyper::{Body, Response}; 12 | use serde_json::Value as SerdeValue; 13 | use tantivy::schema::Schema; 14 | use tantivy::space_usage::SearcherSpaceUsage; 15 | use tantivy::{Index, IndexWriter}; 16 | use tokio::sync::Mutex; 17 | 18 | pub use client::{ScoredDoc, SearchResults, SummaryResponse}; 19 | pub use error::{Error, ErrorResponse}; 20 | pub use query::{ 21 | boolean::BoolQuery, facet::FacetQuery, fuzzy::FuzzyQuery, fuzzy::FuzzyTerm, phrase::PhraseQuery, phrase::TermPair, range::RangeQuery, 22 | range::Ranges, regex::RegexQuery, term::ExactTerm, CreateQuery, FlatNamedDocument, KeyValue, Query, QueryOptions, Search, 23 | }; 24 | pub use server::*; 25 | 26 | /// Toshi client result type 27 | pub type Result = std::result::Result; 28 | 29 | /// Types related to the response Toshi gives back to requests 30 | mod client; 31 | 32 | /// Errors associated with Toshi's responses 33 | mod error; 34 | 35 | /// Types related to Toshi's Query DSL 36 | mod query; 37 | 38 | /// Types related to the POST bodies that Toshi accepts for requests 39 | mod server; 40 | 41 | /// Extra error conversions Toshi uses, if users want they can omit this feature to not pull in 42 | /// hyper and tonic dependencies 43 | #[cfg(feature = "extra-errors")] 44 | mod extra_errors; 45 | 46 | /// Defines an interface on how operations are done on indexes inside Toshi 47 | #[async_trait::async_trait] 48 | pub trait IndexHandle: Clone { 49 | /// The human-readable name of the index 50 | fn get_name(&self) -> String; 51 | /// Return the underlying index 52 | fn get_index(&self) -> Index; 53 | /// Return index writer 54 | fn get_writer(&self) -> Arc>; 55 | /// Get size of an index 56 | fn get_space(&self) -> SearcherSpaceUsage; 57 | /// The agreed upon raft commit ID this index is currently at. 58 | fn get_opstamp(&self) -> usize; 59 | /// Set that opstamp 60 | fn set_opstamp(&self, opstamp: usize); 61 | /// Commit the current index writes 62 | async fn commit(&self) -> Result; 63 | /// Search for documents in this index 64 | async fn search_index(&self, search: Search) -> Result>; 65 | /// Add documents to this index 66 | async fn add_document(&self, doc: AddDocument) -> Result<()>; 67 | /// Delete terms/documents from this index 68 | async fn delete_term(&self, term: DeleteDoc) -> Result; 69 | } 70 | 71 | /// Defines the interface for obtaining a handle from a catalog to an index 72 | #[async_trait::async_trait] 73 | pub trait Catalog: Send + Sync + 'static { 74 | /// The type of handle the catalog returns when the index is local 75 | type Handle: IndexHandle + Send + Sync; 76 | 77 | /// The base path for local indexes, useless for remote 78 | fn base_path(&self) -> String; 79 | /// Return the entire collection of handles 80 | fn get_collection(&self) -> &dashmap::DashMap; 81 | /// Add a local index to the catalog 82 | async fn add_index(&self, name: &str, schema: Schema) -> Result<()>; 83 | /// Return a list of index names 84 | async fn list_indexes(&self) -> Vec; 85 | /// Return a handle to a single index 86 | fn get_index(&self, name: &str) -> Result; 87 | /// Determine if an index exists locally 88 | fn exists(&self, index: &str) -> bool; 89 | } 90 | 91 | #[allow(missing_docs)] 92 | #[async_trait::async_trait] 93 | pub trait Serve 94 | where 95 | C: crate::Catalog, 96 | { 97 | async fn list_indexes(&self, catalog: Arc) -> std::result::Result, hyper::Error>; 98 | 99 | async fn create_index(catalog: Arc, body: Body, idx: &str) -> std::result::Result, hyper::Error>; 100 | 101 | async fn index_summary(catalog: Arc, idx: &str, options: QueryOptions) -> std::result::Result, hyper::Error>; 102 | 103 | async fn flush(catalog: Arc, idx: &str) -> std::result::Result, hyper::Error>; 104 | 105 | async fn bulk_insert( 106 | catalog: Arc, 107 | watcher: Arc, 108 | mut body: Body, 109 | index: &str, 110 | num_threads: usize, 111 | ) -> std::result::Result, hyper::Error>; 112 | 113 | async fn doc_search(catalog: Arc, body: Body, idx: &str) -> std::result::Result, hyper::Error>; 114 | 115 | async fn add_document(catalog: Arc, body: Body, idx: &str) -> std::result::Result, hyper::Error>; 116 | 117 | async fn delete_term(catalog: Arc, body: Body, idx: &str) -> std::result::Result, hyper::Error>; 118 | 119 | async fn all_docs(catalog: Arc, idx: &str) -> std::result::Result, hyper::Error>; 120 | } 121 | -------------------------------------------------------------------------------- /toshi-client/src/hyper_client.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::Display; 2 | 3 | use async_trait::async_trait; 4 | use http::Response; 5 | use hyper::client::connect::Connect; 6 | use hyper::client::HttpConnector; 7 | use hyper::{Body, Client, Request, Uri}; 8 | use serde::{de::DeserializeOwned, Serialize}; 9 | use tantivy::schema::Schema; 10 | 11 | use toshi_types::*; 12 | 13 | use crate::Result; 14 | 15 | #[derive(Debug, Clone)] 16 | pub struct HyperToshi 17 | where 18 | C: Connect + Clone + Send + Sync + 'static, 19 | { 20 | host: String, 21 | client: Client, 22 | } 23 | 24 | impl HyperToshi { 25 | pub fn new(host: H) -> Self { 26 | let client = Client::new(); 27 | Self::with_client(host, client) 28 | } 29 | } 30 | 31 | #[cfg(feature = "rust_tls")] 32 | #[cfg(not(feature = "hyper_tls"))] 33 | impl HyperToshi> { 34 | pub fn with_tls(host: H, connector: hyper_rustls::HttpsConnector) -> Self { 35 | let client = Client::builder().build(connector); 36 | Self::with_client(host, client) 37 | } 38 | } 39 | 40 | #[cfg(feature = "hyper_tls")] 41 | #[cfg(not(feature = "rust_tls"))] 42 | impl HyperToshi> { 43 | pub fn with_tls(host: H, connector: hyper_tls::HttpsConnector) -> Self { 44 | let client = Client::builder().build(connector); 45 | Self::with_client(host, client) 46 | } 47 | } 48 | 49 | impl HyperToshi 50 | where 51 | C: Connect + Clone + Send + Sync + 'static, 52 | { 53 | pub fn with_client(host: H, client: Client) -> Self { 54 | Self { 55 | host: host.to_string(), 56 | client, 57 | } 58 | } 59 | 60 | #[inline] 61 | fn uri(&self, index: I) -> String 62 | where 63 | I: ToString, 64 | { 65 | format!("{}/{}", self.host, index.to_string()) 66 | } 67 | 68 | #[inline] 69 | async fn make_request(&self, request: Request) -> Result 70 | where 71 | R: DeserializeOwned + Send + Sync, 72 | { 73 | let response = self.client.request(request).await?; 74 | let body_bytes = hyper::body::to_bytes(response.into_body()).await?; 75 | serde_json::from_slice::(&body_bytes).map_err(Into::into) 76 | } 77 | } 78 | 79 | #[async_trait] 80 | impl crate::AsyncClient for HyperToshi 81 | where 82 | C: Connect + Clone + Send + Sync + 'static, 83 | { 84 | type Body = hyper::Body; 85 | 86 | async fn index(&self) -> Result> { 87 | let request = Request::get(&self.host).body(Body::empty())?; 88 | self.client.request(request).await.map_err(Into::into) 89 | } 90 | 91 | async fn list(&self) -> Result> { 92 | let uri: Uri = self.uri("_list").parse()?; 93 | self.client.get(uri).await.map_err(Into::into) 94 | } 95 | 96 | async fn index_summary(&self, index: I, include_sizes: bool) -> Result> 97 | where 98 | I: ToString + Send + Sync + Display, 99 | { 100 | let uri = self.uri(format!("{}/_summary?include_sizes={}", index, include_sizes)); 101 | let parsed_uri = uri.parse::()?; 102 | self.client.get(parsed_uri).await.map_err(Into::into) 103 | } 104 | 105 | async fn create_index(&self, name: I, schema: Schema) -> Result> 106 | where 107 | I: ToString + Send + Sync + Display, 108 | { 109 | let uri = self.uri(format!("{}/_create", name)); 110 | let body = serde_json::to_vec(&SchemaBody(schema))?; 111 | let request = Request::put(uri).body(Body::from(body))?; 112 | self.client.request(request).await.map_err(Into::into) 113 | } 114 | 115 | async fn add_document(&self, index: I, document: D, options: Option) -> Result> 116 | where 117 | I: ToString + Send + Sync + Display, 118 | D: Serialize + Send + Sync, 119 | { 120 | let uri = self.uri(index); 121 | let body = serde_json::to_vec(&AddDocument { options, document })?; 122 | let request = Request::put(uri).body(Body::from(body))?; 123 | self.client.request(request).await.map_err(Into::into) 124 | } 125 | 126 | async fn search(&self, index: I, search: Search) -> Result> 127 | where 128 | I: ToString + Send + Sync + Display, 129 | D: DeserializeOwned + Clone + Send + Sync, 130 | { 131 | let uri = self.uri(index); 132 | let body = serde_json::to_vec(&search)?; 133 | let request = Request::post(uri).body(Body::from(body))?; 134 | self.make_request::>(request).await 135 | } 136 | 137 | async fn all_docs(&self, index: I) -> Result> 138 | where 139 | I: ToString + Send + Sync + Display, 140 | D: DeserializeOwned + Clone + Send + Sync, 141 | { 142 | let uri = self.uri(index); 143 | let request = Request::get(uri).body(Body::empty())?; 144 | self.make_request::>(request).await 145 | } 146 | } 147 | -------------------------------------------------------------------------------- /toshi-server/src/index.rs: -------------------------------------------------------------------------------- 1 | use std::clone::Clone; 2 | use std::fs; 3 | use std::path::{PathBuf, MAIN_SEPARATOR}; 4 | 5 | use dashmap::DashMap; 6 | use tantivy::schema::Schema; 7 | use tantivy::Index; 8 | 9 | use toshi_types::{Catalog, Error}; 10 | 11 | use crate::handle::LocalIndex; 12 | use crate::settings::Settings; 13 | use crate::Result; 14 | 15 | pub struct IndexCatalog { 16 | settings: Settings, 17 | base_path: PathBuf, 18 | local_handles: DashMap, 19 | } 20 | 21 | impl IndexCatalog { 22 | pub fn get_settings(&self) -> &Settings { 23 | &self.settings 24 | } 25 | } 26 | 27 | #[async_trait::async_trait] 28 | impl Catalog for IndexCatalog { 29 | type Handle = LocalIndex; 30 | 31 | fn base_path(&self) -> String { 32 | format!("{}", self.base_path.display()) 33 | } 34 | 35 | fn get_collection(&self) -> &DashMap { 36 | &self.local_handles 37 | } 38 | 39 | async fn add_index(&self, name: &str, schema: Schema) -> Result<()> { 40 | let handle = LocalIndex::new( 41 | self.base_path.clone(), 42 | name, 43 | schema, 44 | self.settings.writer_memory, 45 | self.settings.get_merge_policy(), 46 | )?; 47 | self.local_handles.insert(name.to_string(), handle); 48 | Ok(()) 49 | } 50 | 51 | async fn list_indexes(&self) -> Vec { 52 | let mut local_keys: Vec = self.local_handles.iter().map(|e| e.key().to_owned()).collect(); 53 | local_keys.sort(); 54 | local_keys.dedup(); 55 | local_keys 56 | } 57 | 58 | fn get_index(&self, name: &str) -> Result { 59 | self.local_handles.get(name).map(|r| r.value().to_owned()).ok_or_else(|| { 60 | let _ = &name; 61 | Error::UnknownIndex(name.into()) 62 | }) 63 | } 64 | 65 | fn exists(&self, index: &str) -> bool { 66 | self.get_collection().contains_key(index) 67 | } 68 | } 69 | 70 | impl IndexCatalog { 71 | pub fn new(settings: Settings) -> Result { 72 | let local_idxs = DashMap::new(); 73 | let path = PathBuf::from(&settings.path); 74 | let index_cat = IndexCatalog { 75 | settings, 76 | base_path: path, 77 | local_handles: local_idxs, 78 | }; 79 | 80 | Ok(index_cat) 81 | } 82 | 83 | pub fn load_index(path: &str) -> Result { 84 | let p = PathBuf::from(path); 85 | if p.exists() { 86 | Index::open_in_dir(&p).map_err(|_| Error::UnknownIndex(p.display().to_string())) 87 | } else { 88 | Err(Error::UnknownIndex(path.to_string())) 89 | } 90 | } 91 | 92 | pub fn get_mut_collection(&mut self) -> &mut DashMap { 93 | &mut self.local_handles 94 | } 95 | 96 | #[allow(dead_code)] 97 | pub(crate) fn add_test_index(&mut self, name: String, index: Index) { 98 | let local = LocalIndex::from_existing(name.clone(), index).unwrap(); 99 | self.local_handles.insert(name, local); 100 | } 101 | 102 | pub async fn refresh_catalog(&mut self) -> Result<()> { 103 | self.local_handles.clear(); 104 | 105 | for dir in fs::read_dir(self.base_path.clone())? { 106 | let entry = dir?.path(); 107 | if let Some(entry_str) = entry.to_str() { 108 | if entry.exists() { 109 | if !entry_str.ends_with(".node_id") { 110 | let pth: String = entry_str.rsplit(MAIN_SEPARATOR).take(1).collect(); 111 | log::debug!("Loading Path: {} - {}", pth, entry_str); 112 | 113 | let idx = IndexCatalog::load_index(entry_str)?; 114 | self.add_index(&pth, idx.schema()).await?; 115 | } 116 | } else { 117 | return Err(Error::UnknownIndex(format!("Path {}", entry.display()))); 118 | } 119 | } else { 120 | return Err(Error::UnknownIndex(format!("Path {} is not a valid unicode path", entry.display()))); 121 | } 122 | } 123 | Ok(()) 124 | } 125 | 126 | pub async fn clear(&self) { 127 | self.local_handles.clear(); 128 | } 129 | 130 | #[doc(hidden)] 131 | #[allow(dead_code)] 132 | pub fn from_index(name: String, index: Index) -> Result { 133 | let map = DashMap::new(); 134 | let settings = Settings { 135 | json_parsing_threads: 1, 136 | ..Default::default() 137 | }; 138 | let new_index = LocalIndex::from_existing(name.clone(), index) 139 | .unwrap_or_else(|e| panic!("Unable to open index: {} because it's locked: {:?}", name, e)); 140 | 141 | map.insert(name, new_index); 142 | 143 | Ok(IndexCatalog { 144 | settings, 145 | base_path: PathBuf::new(), 146 | local_handles: map, 147 | }) 148 | } 149 | } 150 | 151 | #[cfg(test)] 152 | pub fn create_test_catalog(name: &str) -> crate::SharedCatalog { 153 | let idx = crate::commit::tests::create_test_index(); 154 | let catalog = IndexCatalog::from_index(name.into(), idx).unwrap(); 155 | std::sync::Arc::new(catalog) 156 | } 157 | -------------------------------------------------------------------------------- /toshi-client/src/isahc_client.rs: -------------------------------------------------------------------------------- 1 | use isahc::prelude::*; 2 | use isahc::*; 3 | use serde::de::DeserializeOwned; 4 | use serde::Serialize; 5 | use tantivy::schema::Schema; 6 | 7 | use async_trait::async_trait; 8 | use toshi_types::*; 9 | 10 | use crate::{AsyncClient, Result, SyncClient}; 11 | use isahc::{HttpClient, Response}; 12 | use std::fmt::Display; 13 | 14 | #[derive(Debug)] 15 | pub struct ToshiClient { 16 | host: String, 17 | client: HttpClient, 18 | } 19 | 20 | impl ToshiClient { 21 | pub fn new(host: H) -> Self { 22 | let client = HttpClient::new().unwrap(); 23 | Self::with_client(host, client) 24 | } 25 | 26 | pub fn with_client(host: H, client: HttpClient) -> Self { 27 | Self { 28 | host: host.to_string(), 29 | client, 30 | } 31 | } 32 | 33 | #[inline] 34 | fn uri(&self, index: I) -> String 35 | where 36 | I: ToString, 37 | { 38 | format!("{}/{}", self.host, index.to_string()) 39 | } 40 | } 41 | 42 | #[async_trait] 43 | impl AsyncClient for ToshiClient { 44 | type Body = isahc::AsyncBody; 45 | 46 | async fn index(&self) -> Result> { 47 | self.client.get_async(&self.host).await.map_err(Into::into) 48 | } 49 | 50 | async fn list(&self) -> Result> { 51 | let uri = self.uri(""); 52 | self.client.get_async(uri).await.map_err(Into::into) 53 | } 54 | 55 | async fn index_summary(&self, index: I, include_sizes: bool) -> Result> 56 | where 57 | I: ToString + Send + Sync + Display, 58 | { 59 | let uri = self.uri(format!("{}/_summary?include_sizes={}", index, include_sizes)); 60 | self.client.get_async(uri).await.map_err(Into::into) 61 | } 62 | 63 | async fn create_index(&self, name: I, schema: Schema) -> Result> 64 | where 65 | I: ToString + Send + Sync + Display, 66 | { 67 | let uri = self.uri(format!("{}/_create", name)); 68 | let body = serde_json::to_vec(&SchemaBody(schema))?; 69 | self.client.put_async(uri, body).await.map_err(Into::into) 70 | } 71 | 72 | async fn add_document(&self, index: I, document: D, options: Option) -> Result> 73 | where 74 | I: ToString + Send + Sync + Display, 75 | D: Serialize + Send + Sync, 76 | { 77 | let uri = self.uri(index); 78 | let body = serde_json::to_vec(&AddDocument { options, document })?; 79 | self.client.put_async(uri, body).await.map_err(Into::into) 80 | } 81 | 82 | async fn search(&self, index: I, search: Search) -> Result> 83 | where 84 | I: ToString + Send + Sync + Display, 85 | D: DeserializeOwned + Clone + Send + Sync + Unpin, 86 | { 87 | let uri = self.uri(index); 88 | let body = serde_json::to_vec(&search)?; 89 | self.client.post_async(uri, body).await?.json().await.map_err(Into::into) 90 | } 91 | 92 | async fn all_docs(&self, index: I) -> Result> 93 | where 94 | I: ToString + Send + Sync + Display, 95 | D: DeserializeOwned + Clone + Send + Sync + Unpin, 96 | { 97 | let uri = self.uri(index); 98 | self.client.get_async(uri).await?.json().await.map_err(Into::into) 99 | } 100 | } 101 | 102 | impl SyncClient for ToshiClient { 103 | type Body = isahc::Body; 104 | 105 | fn sync_index(&self) -> Result> { 106 | self.client.get(self.host.clone()).map_err(Into::into) 107 | } 108 | 109 | fn sync_index_summary(&self, index: I, include_sizes: bool) -> Result> 110 | where 111 | I: ToString + Display, 112 | { 113 | let uri = self.uri(format!("{}/_summary?include_sizes={}", index, include_sizes)); 114 | self.client.get(uri).map_err(Into::into) 115 | } 116 | 117 | fn sync_create_index(&self, name: I, schema: Schema) -> Result> 118 | where 119 | I: ToString + Display, 120 | { 121 | let uri = self.uri(format!("{}/_create", name)); 122 | let body = serde_json::to_vec(&SchemaBody(schema))?; 123 | self.client.put(uri, body).map_err(Into::into) 124 | } 125 | 126 | fn sync_add_document(&self, index: I, document: D, options: Option) -> Result> 127 | where 128 | I: ToString + Display, 129 | D: Serialize, 130 | { 131 | let uri = self.uri(index); 132 | let body = serde_json::to_vec(&AddDocument { options, document })?; 133 | self.client.put(uri, body).map_err(Into::into) 134 | } 135 | 136 | fn sync_search(&self, index: I, search: Search) -> Result> 137 | where 138 | I: ToString + Display, 139 | D: DeserializeOwned + Clone, 140 | { 141 | let uri = self.uri(index); 142 | let body = serde_json::to_vec(&search)?; 143 | self.client.post(uri, body)?.json().map_err(Into::into) 144 | } 145 | 146 | fn sync_all_docs(&self, index: I) -> Result> 147 | where 148 | I: ToString + Display, 149 | D: DeserializeOwned + Clone, 150 | { 151 | let uri = self.uri(index); 152 | self.client.get(uri)?.json().map_err(Into::into) 153 | } 154 | } 155 | -------------------------------------------------------------------------------- /toshi-proto/proto/eraftpb.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | package eraftpb; 3 | 4 | enum EntryType { 5 | EntryNormal = 0; 6 | EntryConfChange = 1; 7 | EntryConfChangeV2 = 2; 8 | } 9 | 10 | // The entry is a type of change that needs to be applied. It contains two data fields. 11 | // While the fields are built into the model; their usage is determined by the entry_type. 12 | // 13 | // For normal entries, the data field should contain the data change that should be applied. 14 | // The context field can be used for any contextual data that might be relevant to the 15 | // application of the data. 16 | // 17 | // For configuration changes, the data will contain the ConfChange message and the 18 | // context will provide anything needed to assist the configuration change. The context 19 | // if for the user to set and use in this case. 20 | message Entry { 21 | EntryType entry_type = 1; 22 | uint64 term = 2; 23 | uint64 index = 3; 24 | bytes data = 4; 25 | bytes context = 6; 26 | 27 | // Deprecated! It is kept for backward compatibility. 28 | // TODO: remove it in the next major release. 29 | bool sync_log = 5; 30 | } 31 | 32 | message SnapshotMetadata { 33 | // The current `ConfState`. 34 | ConfState conf_state = 1; 35 | // The applied index. 36 | uint64 index = 2; 37 | // The term of the applied index. 38 | uint64 term = 3; 39 | } 40 | 41 | message Snapshot { 42 | bytes data = 1; 43 | SnapshotMetadata metadata = 2; 44 | } 45 | 46 | enum MessageType { 47 | MsgHup = 0; 48 | MsgBeat = 1; 49 | MsgPropose = 2; 50 | MsgAppend = 3; 51 | MsgAppendResponse = 4; 52 | MsgRequestVote = 5; 53 | MsgRequestVoteResponse = 6; 54 | MsgSnapshot = 7; 55 | MsgHeartbeat = 8; 56 | MsgHeartbeatResponse = 9; 57 | MsgUnreachable = 10; 58 | MsgSnapStatus = 11; 59 | MsgCheckQuorum = 12; 60 | MsgTransferLeader = 13; 61 | MsgTimeoutNow = 14; 62 | MsgReadIndex = 15; 63 | MsgReadIndexResp = 16; 64 | MsgRequestPreVote = 17; 65 | MsgRequestPreVoteResponse = 18; 66 | } 67 | 68 | message Message { 69 | MessageType msg_type = 1; 70 | uint64 to = 2; 71 | uint64 from = 3; 72 | uint64 term = 4; 73 | uint64 log_term = 5; 74 | uint64 index = 6; 75 | repeated Entry entries = 7; 76 | uint64 commit = 8; 77 | Snapshot snapshot = 9; 78 | uint64 request_snapshot = 13; 79 | bool reject = 10; 80 | uint64 reject_hint = 11; 81 | bytes context = 12; 82 | } 83 | 84 | message HardState { 85 | uint64 term = 1; 86 | uint64 vote = 2; 87 | uint64 commit = 3; 88 | } 89 | 90 | enum ConfChangeTransition { 91 | // Automatically use the simple protocol if possible, otherwise fall back 92 | // to ConfChangeType::Implicit. Most applications will want to use this. 93 | Auto = 0; 94 | // Use joint consensus unconditionally, and transition out of them 95 | // automatically (by proposing a zero configuration change). 96 | // 97 | // This option is suitable for applications that want to minimize the time 98 | // spent in the joint configuration and do not store the joint configuration 99 | // in the state machine (outside of InitialState). 100 | Implicit = 1; 101 | // Use joint consensus and remain in the joint configuration until the 102 | // application proposes a no-op configuration change. This is suitable for 103 | // applications that want to explicitly control the transitions, for example 104 | // to use a custom payload (via the Context field). 105 | Explicit = 2; 106 | } 107 | 108 | message ConfState { 109 | repeated uint64 voters = 1; 110 | repeated uint64 learners = 2; 111 | 112 | // The voters in the outgoing config. If not empty the node is in joint consensus. 113 | repeated uint64 voters_outgoing = 3; 114 | // The nodes that will become learners when the outgoing config is removed. 115 | // These nodes are necessarily currently in nodes_joint (or they would have 116 | // been added to the incoming config right away). 117 | repeated uint64 learners_next = 4; 118 | // If set, the config is joint and Raft will automatically transition into 119 | // the final config (i.e. remove the outgoing config) when this is safe. 120 | bool auto_leave = 5; 121 | } 122 | 123 | enum ConfChangeType { 124 | AddNode = 0; 125 | RemoveNode = 1; 126 | AddLearnerNode = 2; 127 | } 128 | 129 | message ConfChange { 130 | ConfChangeType change_type = 2; 131 | uint64 node_id = 3; 132 | bytes context = 4; 133 | 134 | uint64 id = 1; 135 | } 136 | 137 | // ConfChangeSingle is an individual configuration change operation. Multiple 138 | // such operations can be carried out atomically via a ConfChangeV2. 139 | message ConfChangeSingle { 140 | ConfChangeType type = 1; 141 | uint64 node_id = 2; 142 | } 143 | 144 | // ConfChangeV2 messages initiate configuration changes. They support both the 145 | // simple "one at a time" membership change protocol and full Joint Consensus 146 | // allowing for arbitrary changes in membership. 147 | // 148 | // The supplied context is treated as an opaque payload and can be used to 149 | // attach an action on the state machine to the application of the config change 150 | // proposal. Note that contrary to Joint Consensus as outlined in the Raft 151 | // paper[1], configuration changes become active when they are *applied* to the 152 | // state machine (not when they are appended to the log). 153 | // 154 | // The simple protocol can be used whenever only a single change is made. 155 | // 156 | // Non-simple changes require the use of Joint Consensus, for which two 157 | // configuration changes are run. The first configuration change specifies the 158 | // desired changes and transitions the Raft group into the joint configuration, 159 | // in which quorum requires a majority of both the pre-changes and post-changes 160 | // configuration. Joint Consensus avoids entering fragile intermediate 161 | // configurations that could compromise survivability. For example, without the 162 | // use of Joint Consensus and running across three availability zones with a 163 | // replication factor of three, it is not possible to replace a voter without 164 | // entering an intermediate configuration that does not survive the outage of 165 | // one availability zone. 166 | // 167 | // The provided ConfChangeTransition specifies how (and whether) Joint Consensus 168 | // is used, and assigns the task of leaving the joint configuration either to 169 | // Raft or the application. Leaving the joint configuration is accomplished by 170 | // proposing a ConfChangeV2 with only and optionally the Context field 171 | // populated. 172 | // 173 | // For details on Raft membership changes, see: 174 | // 175 | // [1]: https://github.com/ongardie/dissertation/blob/master/online-trim.pdf 176 | message ConfChangeV2 { 177 | ConfChangeTransition transition = 1; 178 | repeated ConfChangeSingle changes = 2; 179 | bytes context = 3; 180 | } -------------------------------------------------------------------------------- /toshi-raft/src/rpc_server.rs: -------------------------------------------------------------------------------- 1 | use std::net::SocketAddr; 2 | use std::sync::Arc; 3 | 4 | use slog::{info, Logger}; 5 | use tantivy::schema::Schema; 6 | use tonic::{transport::Server, Code, Request, Response, Status}; 7 | 8 | use toshi_proto::cluster_rpc::*; 9 | use toshi_types::{AddDocument, Catalog, DeleteDoc, DocsAffected, IndexHandle}; 10 | 11 | use crate::handle::RaftHandle; 12 | use crate::rpc_utils::*; 13 | use crate::BoxErr; 14 | 15 | pub struct RpcServer 16 | where 17 | C: Catalog>, 18 | H: IndexHandle + Send + Sync + 'static, 19 | { 20 | logger: Logger, 21 | catalog: Arc, 22 | } 23 | 24 | impl RpcServer 25 | where 26 | C: Catalog>, 27 | H: IndexHandle + Send + Sync + 'static, 28 | { 29 | pub async fn serve(addr: SocketAddr, catalog: Arc, logger: Logger) -> Result<(), BoxErr> { 30 | let service = server::IndexServiceServer::new(RpcServer { 31 | catalog, 32 | logger: logger.clone(), 33 | }); 34 | 35 | Ok(Server::builder().add_service(service).serve(addr).await?) 36 | } 37 | } 38 | 39 | #[async_trait::async_trait] 40 | impl server::IndexService for RpcServer 41 | where 42 | C: Catalog>, 43 | H: IndexHandle + Send + Sync + 'static, 44 | { 45 | async fn ping(&self, _: Request) -> Result, Status> { 46 | Ok(Response::new(PingReply { status: "OK".into() })) 47 | } 48 | 49 | async fn place_index(&self, request: Request) -> Result, Status> { 50 | let PlaceRequest { index, schema } = request.into_inner(); 51 | let cat = Arc::clone(&self.catalog); 52 | if let Ok(schema) = serde_json::from_slice::(&schema) { 53 | if cat.add_index(&index, schema).await.is_ok() { 54 | Ok(Response::new(ok_result())) 55 | } else { 56 | error_response(Code::Internal, format!("Insert: {} failed", index)) 57 | } 58 | } else { 59 | error_response(Code::NotFound, "Invalid schema in request".into()) 60 | } 61 | } 62 | 63 | async fn list_indexes(&self, req: Request) -> Result, Status> { 64 | let cat = Arc::clone(&self.catalog); 65 | info!(self.logger, "Request From: {:?}", req); 66 | let indexes = cat.list_indexes().await; 67 | info!(self.logger, "Response: {:?}", indexes.join(", ")); 68 | let resp = Response::new(ListReply { indexes }); 69 | Ok(resp) 70 | } 71 | 72 | async fn place_document(&self, request: Request) -> Result, Status> { 73 | info!(self.logger, "REQ = {:?}", &request); 74 | let DocumentRequest { index, document } = request.into_inner(); 75 | let cat = Arc::clone(&self.catalog); 76 | if let Ok(idx) = cat.get_index(&index) { 77 | if let Ok(doc) = serde_json::from_slice::>(&document) { 78 | if idx.add_document(doc).await.is_ok() { 79 | Ok(Response::new(ok_result())) 80 | } else { 81 | error_response(Code::Internal, format!("Add Document Failed: {}", index)) 82 | } 83 | } else { 84 | error_response(Code::Internal, format!("Invalid Document request: {}", index)) 85 | } 86 | } else { 87 | error_response(Code::NotFound, "Could not find index".into()) 88 | } 89 | } 90 | 91 | async fn delete_document(&self, request: Request) -> Result, Status> { 92 | let DeleteRequest { index, terms } = request.into_inner(); 93 | let cat = Arc::clone(&self.catalog); 94 | if let Ok(idx) = cat.get_index(&index) { 95 | if let Ok(delete_docs) = serde_json::from_slice::(&terms) { 96 | let DocsAffected { docs_affected } = idx.delete_term(delete_docs).await.unwrap(); 97 | Ok(Response::new(DeleteReply { index, docs_affected })) 98 | } else { 99 | error_response(Code::Internal, format!("Invalid Document request: {}", index)) 100 | } 101 | } else { 102 | error_response(Code::NotFound, "Could not find index".into()) 103 | } 104 | } 105 | 106 | async fn search_index(&self, request: Request) -> Result, Status> { 107 | let inner = request.into_inner(); 108 | let cat = Arc::clone(&self.catalog); 109 | { 110 | if let Ok(index) = cat.get_index(&inner.index) { 111 | let query = match query_or_all(&inner.query) { 112 | Ok(v) => v, 113 | Err(e) => return error_response(Code::Internal, e.to_string()), 114 | }; 115 | info!(self.logger, "QUERY = {:?}", &query); 116 | 117 | match index.search_index(query).await { 118 | Ok(query_results) => { 119 | info!(self.logger, "Query Response = {:?}", query_results); 120 | let query_bytes: Vec = serde_json::to_vec(&query_results).unwrap(); 121 | let result = Some(ok_result()); 122 | Ok(Response::new(create_search_reply(result, query_bytes))) 123 | } 124 | Err(e) => error_response(Code::Internal, e.to_string()), 125 | } 126 | } else { 127 | error_response(Code::NotFound, format!("Index: {} not found", inner.index)) 128 | } 129 | } 130 | } 131 | 132 | async fn get_summary(&self, request: Request) -> Result, Status> { 133 | let SummaryRequest { index } = request.into_inner(); 134 | let cat = Arc::clone(&self.catalog); 135 | if let Ok(idx) = cat.get_index(&index) { 136 | if let Ok(metas) = idx.get_index().load_metas() { 137 | let meta_json = serde_json::to_vec(&metas).unwrap(); 138 | Ok(Response::new(SummaryReply { summary: meta_json })) 139 | } else { 140 | error_response(Code::DataLoss, format!("Could not load metas for: {}", index)) 141 | } 142 | } else { 143 | error_response(Code::NotFound, "Could not find index".into()) 144 | } 145 | } 146 | 147 | async fn raft_request(&self, request: Request) -> Result, Status> { 148 | let message: raft::eraftpb::Message = request.into_inner().message.unwrap(); 149 | 150 | slog::debug!(self.logger, "MSG = {:?}", message); 151 | 152 | let response = Response::new(RaftReply { code: 0 }); 153 | Ok(response) 154 | } 155 | 156 | async fn join(&self, request: Request) -> Result, Status> { 157 | let JoinRequest { host, id } = request.into_inner(); 158 | let conf = raft::prelude::ConfChange { 159 | id, 160 | change_type: 0, 161 | node_id: id, 162 | context: host.as_bytes().to_vec(), 163 | }; 164 | slog::debug!(self.logger, "CONF = {:?}", conf); 165 | 166 | let response = Response::new(ResultReply::default()); 167 | Ok(response) 168 | } 169 | } 170 | -------------------------------------------------------------------------------- /toshi-server/src/handlers/index.rs: -------------------------------------------------------------------------------- 1 | use hyper::body::to_bytes; 2 | use hyper::{Body, Response, StatusCode}; 3 | 4 | use toshi_types::{Catalog, IndexHandle}; 5 | use toshi_types::{DeleteDoc, Error, SchemaBody}; 6 | 7 | use crate::handlers::ResponseFuture; 8 | use crate::utils::{empty_with_code, error_response, with_body}; 9 | use crate::AddDocument; 10 | use std::sync::Arc; 11 | 12 | pub async fn delete_term(catalog: Arc, body: Body, index: &str) -> ResponseFuture { 13 | if !catalog.exists(index) { 14 | return Ok(error_response(StatusCode::BAD_REQUEST, Error::UnknownIndex(index.to_string()))); 15 | } 16 | let agg_body = to_bytes(body).await?; 17 | match serde_json::from_slice::(&agg_body) { 18 | Ok(dd) => match catalog.get_index(index) { 19 | Ok(c) => c 20 | .delete_term(dd) 21 | .await 22 | .map(with_body) 23 | .or_else(|e| Ok(error_response(StatusCode::BAD_REQUEST, e))), 24 | Err(e) => Ok(error_response(StatusCode::BAD_REQUEST, e)), 25 | }, 26 | Err(e) => Ok(error_response(StatusCode::BAD_REQUEST, e.into())), 27 | } 28 | } 29 | 30 | pub async fn create_index(catalog: Arc, body: Body, index: &str) -> ResponseFuture { 31 | if catalog.exists(index) { 32 | return Ok(error_response(StatusCode::BAD_REQUEST, Error::AlreadyExists(index.to_string()))); 33 | } 34 | let req = to_bytes(body).await?; 35 | match serde_json::from_slice::(&req) { 36 | Ok(schema_body) => match catalog.add_index(index, schema_body.0).await { 37 | Ok(_) => Ok(empty_with_code(StatusCode::CREATED)), 38 | Err(e) => Ok(Response::from(e)), 39 | }, 40 | Err(e) => Ok(error_response(StatusCode::BAD_REQUEST, e.into())), 41 | } 42 | } 43 | 44 | pub async fn add_document(catalog: Arc, body: Body, index: &str) -> ResponseFuture { 45 | if !catalog.exists(index) { 46 | return Ok(error_response(StatusCode::BAD_REQUEST, Error::UnknownIndex(index.to_string()))); 47 | } 48 | let full_body = to_bytes(body).await?; 49 | match serde_json::from_slice::(&full_body) { 50 | Ok(v) => match catalog.get_index(index) { 51 | Ok(c) => c 52 | .add_document(v) 53 | .await 54 | .map(|_| empty_with_code(StatusCode::CREATED)) 55 | .or_else(|e| Ok(error_response(StatusCode::BAD_REQUEST, e))), 56 | Err(e) => Ok(error_response(StatusCode::BAD_REQUEST, e)), 57 | }, 58 | Err(e) => Ok(error_response(StatusCode::BAD_REQUEST, e.into())), 59 | } 60 | } 61 | 62 | #[cfg(test)] 63 | mod tests { 64 | #![allow(unused_must_use)] 65 | use std::collections::HashMap; 66 | use std::sync::Arc; 67 | 68 | use pretty_assertions::assert_eq; 69 | 70 | use toshi_types::IndexOptions; 71 | 72 | use crate::handlers::all_docs; 73 | use crate::index::create_test_catalog; 74 | 75 | use super::*; 76 | use crate::commit::tests::wait_json; 77 | 78 | fn test_index() -> String { 79 | String::from("test_index") 80 | } 81 | 82 | #[tokio::test] 83 | async fn test_create_index() -> Result<(), Box> { 84 | let shared_cat = create_test_catalog("test_index"); 85 | let schema = r#"[ 86 | { "name": "test_text", "type": "text", "options": { "indexing": { "record": "position", "tokenizer": "default" }, "stored": true } }, 87 | { "name": "test_unindex", "type": "text", "options": { "indexing": { "record": "position", "tokenizer": "default" }, "stored": true } }, 88 | { "name": "test_i64", "type": "i64", "options": { "indexed": true, "stored": true } }, 89 | { "name": "test_u64", "type": "u64", "options": { "indexed": true, "stored": true } } 90 | ]"#; 91 | 92 | create_index(Arc::clone(&shared_cat), Body::from(schema), "new_index").await?; 93 | let resp = all_docs(Arc::clone(&shared_cat), "new_index").await?; 94 | let b = wait_json::(resp).await; 95 | assert_eq!(b.hits, 0); 96 | remove_dir_all::remove_dir_all("new_index"); // Try, but don't fail on this. 97 | Ok(()) 98 | } 99 | 100 | #[cfg(feature = "extra_tokenizers")] 101 | #[tokio::test] 102 | async fn test_create_index_extra_tokenizers() -> Result<(), Box> { 103 | let shared_cat = create_test_catalog("test_index"); 104 | let schema = r#"[ 105 | { "name": "test_text", "type": "text", "options": { "indexing": { "record": "position", "tokenizer": "CANG_JIE" }, "stored": true } }, 106 | { "name": "test_unindex", "type": "text", "options": { "indexing": { "record": "position", "tokenizer": "CANG_JIE" }, "stored": true } }, 107 | { "name": "test_i64", "type": "i64", "options": { "indexed": true, "stored": true } }, 108 | { "name": "test_u64", "type": "u64", "options": { "indexed": true, "stored": true } } 109 | ]"#; 110 | 111 | create_index(Arc::clone(&shared_cat), Body::from(schema), "new_index_extra_tok").await?; 112 | 113 | let q = r#" {"options": {"commit": true }, "document": {"test_text": "南京长江大桥", "test_u64": 10, "test_i64": -10} }"#; 114 | 115 | add_document(Arc::clone(&shared_cat), Body::from(q), "new_index_extra_tok").await?; 116 | tokio::time::sleep(tokio::time::Duration::from_secs(1)).await; 117 | let resp = all_docs(Arc::clone(&shared_cat), "new_index_extra_tok").await?; 118 | let b = wait_json::(resp).await; 119 | assert_eq!(b.hits, 1); 120 | 121 | remove_dir_all::remove_dir_all("new_index_extra_tok"); // Try, but don't fail on this. 122 | remove_dir_all::remove_dir_all("test_index"); 123 | Ok(()) 124 | } 125 | 126 | #[tokio::test] 127 | async fn test_doc_create() { 128 | let shared_cat = create_test_catalog("test_index"); 129 | let q = r#" {"options": {"commit": true }, "document": {"test_text": "Babbaboo!", "test_u64": 10, "test_i64": -10} }"#; 130 | let req = add_document(Arc::clone(&shared_cat), Body::from(q), &test_index()).await; 131 | assert!(req.is_ok()); 132 | } 133 | 134 | #[tokio::test] 135 | async fn test_doc_delete() { 136 | let shared_cat = create_test_catalog("test_index"); 137 | 138 | let mut terms = HashMap::new(); 139 | terms.insert(test_index(), "document".to_string()); 140 | let delete = DeleteDoc { 141 | options: Some(IndexOptions { commit: true }), 142 | terms, 143 | }; 144 | let body_bytes = serde_json::to_vec(&delete).unwrap(); 145 | let del = delete_term(Arc::clone(&shared_cat), Body::from(body_bytes), &test_index()).await; 146 | assert!(del.is_ok()); 147 | } 148 | 149 | #[tokio::test] 150 | async fn test_bad_json() { 151 | let shared_cat = create_test_catalog("test_index"); 152 | 153 | let bad_json: serde_json::Value = serde_json::Value::String("".into()); 154 | let add_doc = AddDocument { 155 | document: bad_json, 156 | options: None, 157 | }; 158 | let body_bytes = serde_json::to_vec(&add_doc).unwrap(); 159 | let req = add_document(Arc::clone(&shared_cat), Body::from(body_bytes), &test_index()) 160 | .await 161 | .unwrap() 162 | .into_body(); 163 | let buf = hyper::body::to_bytes(req).await.unwrap(); 164 | let str_buf = std::str::from_utf8(&buf).unwrap(); 165 | assert_eq!( 166 | str_buf, 167 | "{\"message\":\"Error in Index: \'The provided string is not valid JSON\'\"}" 168 | ) 169 | } 170 | } 171 | -------------------------------------------------------------------------------- /toshi-types/src/query/range.rs: -------------------------------------------------------------------------------- 1 | use std::ops::Bound; 2 | 3 | use serde::de::DeserializeOwned; 4 | use serde::{Deserialize, Serialize}; 5 | use serde_json::{to_value, Value}; 6 | use tantivy::query::{Query as TantivyQuery, RangeQuery as TantivyRangeQuery}; 7 | use tantivy::schema::{FieldType, Schema}; 8 | 9 | use crate::query::{CreateQuery, KeyValue, Query}; 10 | use crate::{error::Error, Result}; 11 | 12 | /// The possible values a range can take on 13 | /// gte = greater than or equal 14 | /// lte = less than or equal 15 | /// lt = less than 16 | /// gt = greater than 17 | #[derive(Serialize, Deserialize, Debug, Clone)] 18 | #[serde(untagged)] 19 | pub enum Ranges { 20 | /// Possible range values, all are optional, but on parsing if enough do not exist the query 21 | /// generation will fail 22 | ValueRange { 23 | /// Greater than or equal 24 | gte: Option, 25 | /// Less than or equal 26 | lte: Option, 27 | /// Less than 28 | lt: Option, 29 | /// Greater than 30 | gt: Option, 31 | /// Currently does nothing and can be safely omitted 32 | boost: Option, 33 | }, 34 | } 35 | 36 | /// A query for a range of values, for example 1 through 10 37 | #[derive(Serialize, Deserialize, Debug, Clone)] 38 | pub struct RangeQuery { 39 | pub(crate) range: KeyValue, 40 | } 41 | 42 | impl CreateQuery for RangeQuery { 43 | fn create_query(self, schema: &Schema) -> Result> { 44 | let KeyValue { field, value, .. } = self.range; 45 | create_range_query(schema, &field, value) 46 | } 47 | } 48 | 49 | impl RangeQuery { 50 | /// Constructor for create a key value for a user 51 | pub fn new(field: String, ranges: Ranges) -> Self { 52 | Self { 53 | range: KeyValue::new(field, ranges), 54 | } 55 | } 56 | 57 | /// Creating a builder used to create a ranged query 58 | pub fn builder() -> RangeQueryBuilder 59 | where 60 | V: Serialize + Default, 61 | { 62 | RangeQueryBuilder::::default() 63 | } 64 | } 65 | 66 | #[derive(Debug, Default)] 67 | pub struct RangeQueryBuilder 68 | where 69 | V: Serialize + Default, 70 | { 71 | field: String, 72 | gte: V, 73 | lte: V, 74 | lt: V, 75 | gt: V, 76 | boost: f32, 77 | } 78 | 79 | impl RangeQueryBuilder 80 | where 81 | V: Serialize + Default, 82 | { 83 | pub fn new() -> Self { 84 | Self::default() 85 | } 86 | 87 | pub fn for_field(mut self, field: F) -> Self 88 | where 89 | F: ToString, 90 | { 91 | self.field = field.to_string(); 92 | self 93 | } 94 | 95 | pub fn gte(mut self, gte: V) -> Self { 96 | self.gte = gte; 97 | self 98 | } 99 | 100 | pub fn lte(mut self, lte: V) -> Self { 101 | self.lte = lte; 102 | self 103 | } 104 | pub fn lt(mut self, lt: V) -> Self { 105 | self.lt = lt; 106 | self 107 | } 108 | 109 | pub fn gt(mut self, gt: V) -> Self { 110 | self.gt = gt; 111 | self 112 | } 113 | 114 | pub fn with_boost(mut self, boost: f32) -> Self { 115 | self.boost = boost; 116 | self 117 | } 118 | 119 | pub fn build(self) -> Query { 120 | let range_q = Ranges::ValueRange { 121 | gte: to_value(self.gte).ok(), 122 | lte: to_value(self.lte).ok(), 123 | lt: to_value(self.lt).ok(), 124 | gt: to_value(self.gt).ok(), 125 | boost: Some(self.boost), 126 | }; 127 | Query::Range(RangeQuery::new(self.field, range_q)) 128 | } 129 | } 130 | 131 | #[inline] 132 | fn include_exclude(r: Option, r2: Option) -> Result> 133 | where 134 | V: DeserializeOwned, 135 | { 136 | if let Some(b) = r { 137 | let value = serde_json::from_value(b).map_err(Error::from)?; 138 | Ok(Bound::Excluded(value)) 139 | } else if let Some(b) = r2 { 140 | let value = serde_json::from_value(b).map_err(Error::from)?; 141 | Ok(Bound::Included(value)) 142 | } else { 143 | Ok(Bound::Unbounded) 144 | } 145 | } 146 | 147 | #[inline] 148 | fn create_ranges(gte: Option, lte: Option, lt: Option, gt: Option) -> Result<(Bound, Bound)> 149 | where 150 | V: DeserializeOwned, 151 | { 152 | Ok((include_exclude(lt, lte)?, include_exclude(gt, gte)?)) 153 | } 154 | 155 | fn create_range_query(schema: &Schema, field: &str, r: Ranges) -> Result> { 156 | match r { 157 | Ranges::ValueRange { gte, lte, lt, gt, .. } => { 158 | let field = schema 159 | .get_field(field) 160 | .ok_or_else(|| Error::QueryError(format!("Field {} does not exist", field)))?; 161 | let field_type = schema.get_field_entry(field).field_type(); 162 | match field_type { 163 | &FieldType::I64(_) => { 164 | let (upper, lower) = create_ranges::(gte, lte, lt, gt)?; 165 | Ok(Box::new(TantivyRangeQuery::new_i64_bounds(field, lower, upper))) 166 | } 167 | &FieldType::U64(_) => { 168 | let (upper, lower) = create_ranges::(gte, lte, lt, gt)?; 169 | Ok(Box::new(TantivyRangeQuery::new_u64_bounds(field, lower, upper))) 170 | } 171 | ref ft => Err(Error::QueryError(format!("Invalid field type: {:?} for range query", ft))), 172 | } 173 | } 174 | } 175 | } 176 | 177 | #[cfg(test)] 178 | mod tests { 179 | use tantivy::schema::*; 180 | 181 | use super::*; 182 | 183 | #[test] 184 | fn test_deserialize_missing_ranges() { 185 | let body = r#"{ "range" : { "test_i64" : { "gte" : 2012 } } }"#; 186 | let req = serde_json::from_str::(body); 187 | assert!(!req.is_err()); 188 | } 189 | 190 | #[test] 191 | fn test_query_creation_bad_type() { 192 | let body = r#"{ "range" : { "test_i64" : { "gte" : 3.14 } } }"#; 193 | let mut schema = SchemaBuilder::new(); 194 | schema.add_i64_field("test_i64", FAST); 195 | let built = schema.build(); 196 | let req = serde_json::from_str::(body).unwrap().create_query(&built); 197 | 198 | assert!(req.is_err()); 199 | assert_eq!( 200 | req.unwrap_err().to_string(), 201 | "Error Parsing Json: 'invalid type: floating point `3.14`, expected i64'" 202 | ); 203 | } 204 | 205 | #[test] 206 | fn test_query_creation_bad_range() { 207 | let body = r#"{ "range" : { "test_u64" : { "gte" : -1 } } }"#; 208 | let mut schema = SchemaBuilder::new(); 209 | schema.add_u64_field("test_u64", FAST); 210 | let built = schema.build(); 211 | let req = serde_json::from_str::(body).unwrap().create_query(&built); 212 | 213 | assert!(req.is_err()); 214 | assert_eq!( 215 | req.unwrap_err().to_string(), 216 | "Error Parsing Json: 'invalid value: integer `-1`, expected u64'" 217 | ); 218 | } 219 | 220 | #[test] 221 | fn test_query_impossible_range() { 222 | let body = r#"{ "range" : { "test_u64" : { "gte" : 10, "lte" : 1 } } }"#; 223 | let mut schema = SchemaBuilder::new(); 224 | schema.add_u64_field("test_u64", FAST); 225 | let built = schema.build(); 226 | let req = serde_json::from_str::(body).unwrap().create_query(&built); 227 | 228 | assert!(!req.is_err()); 229 | } 230 | 231 | #[test] 232 | fn test_range_builder() { 233 | let builder = RangeQuery::builder().gte(5).lte(10).for_field("test"); 234 | let query: Query = builder.build(); 235 | if let Query::Range(rq) = query { 236 | assert_eq!(rq.range.field, "test"); 237 | } 238 | } 239 | } 240 | -------------------------------------------------------------------------------- /toshi-server/src/handlers/bulk.rs: -------------------------------------------------------------------------------- 1 | use bytes::BytesMut; 2 | use std::sync::atomic::{AtomicBool, Ordering}; 3 | use std::sync::Arc; 4 | use std::time::{Duration, Instant}; 5 | 6 | use flume::{unbounded, Receiver, Sender}; 7 | use futures::StreamExt; 8 | use hyper::Body; 9 | use hyper::StatusCode; 10 | 11 | use log::*; 12 | use tantivy::schema::Schema; 13 | use tantivy::{Document, IndexWriter}; 14 | use tokio::sync::Mutex; 15 | use tokio::time::timeout; 16 | use tokio_util::codec::{Decoder, LinesCodec, LinesCodecError}; 17 | 18 | use toshi_types::{Catalog, Error, IndexHandle}; 19 | 20 | use crate::handlers::ResponseFuture; 21 | use crate::utils::{empty_with_code, error_response, not_found}; 22 | 23 | const DEFAULT_TIMEOUT: Duration = Duration::from_millis(100); 24 | 25 | async fn index_documents(iw: Arc>, dr: Receiver, wr: Arc) -> Result<(), Error> { 26 | let start = Instant::now(); 27 | while let Ok(Ok(doc)) = timeout(DEFAULT_TIMEOUT, dr.recv_async()).await { 28 | let w = iw.lock().await; 29 | w.add_document(doc)?; 30 | } 31 | 32 | info!("Piping Documents took: {:?}", start.elapsed()); 33 | wr.store(false, Ordering::SeqCst); 34 | Ok(()) 35 | } 36 | 37 | async fn parsing_documents(s: Schema, ds: Sender, lr: Receiver, ec: Sender) -> Result<(), ()> { 38 | while let Ok(Ok(line)) = timeout(DEFAULT_TIMEOUT, lr.recv_async()).await { 39 | if !line.is_empty() { 40 | match s.parse_document(&line) { 41 | Ok(doc) => { 42 | info!("Piped document... {}", doc.len()); 43 | ds.send_async(doc).await.expect("Parsing Thread failed."); 44 | } 45 | Err(e) => { 46 | let err = anyhow::Error::msg("Error parsing document").context(line).context(e); 47 | ec.send_async(Error::TantivyError(err)).await.expect("Parsing thread loop failed."); 48 | break; 49 | } 50 | }; 51 | } 52 | } 53 | Ok(()) 54 | } 55 | 56 | pub async fn bulk_insert( 57 | catalog: Arc, 58 | watcher: Arc, 59 | mut body: Body, 60 | index: &str, 61 | num_threads: usize, 62 | max_line_length: usize, 63 | ) -> ResponseFuture { 64 | if !catalog.exists(index) { 65 | return not_found().await; 66 | } 67 | watcher.store(true, Ordering::SeqCst); 68 | let index_handle = catalog.get_index(index).unwrap(); 69 | let writer = index_handle.get_writer(); 70 | let i = index_handle.get_index(); 71 | let schema = i.schema(); 72 | 73 | let (line_sender, line_recv) = unbounded::(); 74 | let (doc_sender, doc_recv) = unbounded::(); 75 | let (err_snd, err_rcv) = unbounded(); 76 | 77 | info!("Spawning {} parsing threads...", num_threads); 78 | let mut parsing_handles = Vec::with_capacity(num_threads); 79 | for _ in 0..num_threads { 80 | let schema = schema.clone(); 81 | let doc_sender = doc_sender.clone(); 82 | let line_recv = line_recv.clone(); 83 | let err_snd = err_snd.clone(); 84 | parsing_handles.push(tokio::spawn(parsing_documents(schema, doc_sender, line_recv, err_snd))); 85 | } 86 | info!("Spawned threads finished..."); 87 | let mut buf = BytesMut::new(); 88 | let mut decoder = if max_line_length > 0 { 89 | LinesCodec::new_with_max_length(max_line_length) 90 | } else { 91 | LinesCodec::new() 92 | }; 93 | 94 | while let Some(Ok(line)) = body.next().await { 95 | buf.extend_from_slice(&line); 96 | 97 | loop { 98 | match decoder.decode_eof(&mut buf) { 99 | Ok(Some(l)) if !l.is_empty() => { 100 | let l = l.trim(); 101 | line_sender.send_async(l.into()).await.unwrap(); 102 | } 103 | Ok(None) | Ok(Some(_)) => break, 104 | Err(LinesCodecError::MaxLineLengthExceeded) => { 105 | let err_txt = format!( 106 | "Line exceeded max length of {}, you can increase this with the max_line_length config option", 107 | max_line_length 108 | ); 109 | let err_msg = anyhow::Error::msg(err_txt); 110 | return Ok(error_response(StatusCode::BAD_REQUEST, Error::TantivyError(err_msg))); 111 | } 112 | Err(err) => { 113 | let err_msg = anyhow::Error::msg("Error with codec.").context(err); 114 | return Ok(error_response(StatusCode::BAD_REQUEST, Error::TantivyError(err_msg))); 115 | } 116 | } 117 | } 118 | } 119 | 120 | futures::future::join_all(parsing_handles).await; 121 | if !err_rcv.is_empty() { 122 | let mut iw = writer.lock().await; 123 | iw.rollback() 124 | .unwrap_or_else(|e| panic!("Error rolling back index: {}, this should be reported as a bug. {}", index, e)); 125 | match err_rcv.recv_async().await { 126 | Ok(err) => return Ok(error_response(StatusCode::BAD_REQUEST, err)), 127 | Err(err) => panic!("Panic receiving error: {:?}", err), 128 | } 129 | } 130 | 131 | match index_documents(writer, doc_recv, Arc::clone(&watcher)).await { 132 | Ok(_) => Ok(empty_with_code(StatusCode::CREATED)), 133 | Err(err) => Ok(error_response(StatusCode::BAD_REQUEST, err)), 134 | } 135 | } 136 | 137 | #[cfg(test)] 138 | mod tests { 139 | use std::time::Duration; 140 | 141 | use crate::commit::tests::read_body; 142 | use crate::handlers::all_docs; 143 | use crate::handlers::summary::flush; 144 | use crate::index::create_test_catalog; 145 | use crate::SearchResults; 146 | 147 | use super::*; 148 | 149 | #[tokio::test] 150 | async fn test_bulk_index() -> Result<(), Box> { 151 | let server = create_test_catalog("test_index_bulk"); 152 | let lock = Arc::new(AtomicBool::new(false)); 153 | 154 | let body = r#"{"test_text": "asdf1234", "test_i64": 123, "test_u64": 321, "test_unindex": "asdf", "test_facet": "/cat/cat4"} 155 | {"test_text": "asdf5678", "test_i64": 456, "test_u64": 678, "test_unindex": "asdf", "test_facet": "/cat/cat4"} 156 | {"test_text": "asdf9012", "test_i64": -12, "test_u64": 901, "test_unindex": "asdf", "test_facet": "/cat/cat4"}"#; 157 | 158 | let index_docs = bulk_insert(Arc::clone(&server), lock, Body::from(body), "test_index_bulk", 2, 2048).await?; 159 | assert_eq!(index_docs.status(), StatusCode::CREATED); 160 | 161 | let f = flush(Arc::clone(&server), "test_index_bulk").await?; 162 | 163 | assert_eq!(f.status(), StatusCode::OK); 164 | 165 | std::thread::sleep(Duration::from_secs(1)); 166 | let check_docs = all_docs(Arc::clone(&server), "test_index_bulk").await?; 167 | let body: String = read_body(check_docs).await?; 168 | let docs: SearchResults = serde_json::from_slice(body.as_bytes())?; 169 | 170 | assert_eq!(docs.hits, 8); 171 | Ok(()) 172 | } 173 | 174 | #[tokio::test] 175 | async fn test_errors() -> Result<(), Box> { 176 | let server = create_test_catalog("test_index"); 177 | let lock = Arc::new(AtomicBool::new(false)); 178 | 179 | let body: &str = r#"{"test_text": "asdf1234", "test_i64": 123, "test_u64": 321, "test_unindex": "asdf", "test_facet": "/cat/cat4"} 180 | {"test_text": "asdf5678", "test_i64": 456, "test_u64": 678, "test_unindex": "asdf", "test_facet": "/cat/cat4"} 181 | {"test_text": "asdf9012", "test_i64": -12, "test_u64": -9, "test_unindex": "asdf", "test_facet": "/cat/cat4"}"#; 182 | 183 | let index_docs = bulk_insert(Arc::clone(&server), lock, Body::from(body), "test_index", 2, 2048).await?; 184 | assert_eq!(index_docs.status(), StatusCode::BAD_REQUEST); 185 | 186 | let body = read_body(index_docs).await?; 187 | println!("{}", body); 188 | Ok(()) 189 | } 190 | } 191 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Toshi 2 | ##### A Full-Text Search Engine in Rust 3 | 4 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) 5 | [![Codacy Badge](https://api.codacy.com/project/badge/Grade/4751c082efd74f849b5274d74c284c87)](https://app.codacy.com/app/shcarman/Toshi?utm_source=github.com&utm_medium=referral&utm_content=toshi-search/Toshi&utm_campaign=Badge_Grade_Settings) 6 | [![Actions Status](https://github.com/toshi-search/toshi/workflows/toshi-push/badge.svg)](https://github.com/toshi-search/toshi/actions) 7 | [![codecov](https://codecov.io/gh/toshi-search/Toshi/branch/master/graph/badge.svg)](https://codecov.io/gh/toshi-search/Toshi) 8 | [![Join the chat at https://gitter.im/toshi-search/Toshi](https://badges.gitter.im/toshi-search/Toshi.svg)](https://gitter.im/toshi-search/Toshi?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) 9 | [![dependency status](https://deps.rs/repo/github/toshi-search/toshi/status.svg)](https://deps.rs/repo/github/toshi-search/toshi) 10 | 11 | > *Please note that this is far from production ready, also Toshi is still under active development, I'm just slow.* 12 | 13 | #### Description 14 | Toshi is meant to be a full-text search engine similar to Elasticsearch. Toshi strives 15 | to be to Elasticsearch what [Tantivy](https://github.com/tantivy-search/tantivy) is to Lucene. 16 | 17 | #### Motivations 18 | Toshi will always target stable Rust and will try our best to never make any use of unsafe Rust. While underlying libraries may make some 19 | use of unsafe, Toshi will make a concerted effort to vet these libraries in an effort to be completely free of unsafe Rust usage. The 20 | reason I chose this was because I felt that for this to actually become an attractive option for people to consider it would have to have 21 | be safe, stable and consistent. This was why stable Rust was chosen because of the guarantees and safety it provides. I did not want to go down the rabbit hole of using nightly features to then have issues with their stability later on. Since Toshi is not 22 | meant to be a library, I'm perfectly fine with having this requirement because people who would want to use this more than likely will 23 | take it off the shelf and not modify it. My motivation was to cater to that use case when building Toshi. 24 | 25 | #### Build Requirements 26 | At this current time Toshi should build and work fine on Windows, Mac OS X, and Linux. From dependency requirements you are going to need 1.39.0 and Cargo installed in order to build. You can get rust easily from 27 | [rustup](https://rustup.rs). 28 | 29 | #### Configuration 30 | 31 | There is a default configuration file in config/config.toml: 32 | 33 | ```toml 34 | host = "127.0.0.1" 35 | port = 8080 36 | path = "data2/" 37 | writer_memory = 200000000 38 | log_level = "info" 39 | json_parsing_threads = 4 40 | bulk_buffer_size = 10000 41 | auto_commit_duration = 10 42 | experimental = false 43 | 44 | [experimental_features] 45 | master = true 46 | nodes = [ 47 | "127.0.0.1:8081" 48 | ] 49 | 50 | [merge_policy] 51 | kind = "log" 52 | min_merge_size = 8 53 | min_layer_size = 10_000 54 | level_log_size = 0.75 55 | ``` 56 | 57 | ##### Host 58 | `host = "localhost"` 59 | 60 | The hostname Toshi will bind to upon start. 61 | 62 | ##### Port 63 | `port = 8080` 64 | 65 | The port Toshi will bind to upon start. 66 | 67 | ##### Path 68 | `path = "data/"` 69 | 70 | The data path where Toshi will store its data and indices. 71 | 72 | ##### Writer Memory 73 | `writer_memory = 200000000` 74 | 75 | The amount of memory (in bytes) Toshi should allocate to commits for new documents. 76 | 77 | ##### Log Level 78 | `log_level = "info"` 79 | 80 | The detail level to use for Toshi's logging. 81 | 82 | ##### Json Parsing 83 | `json_parsing_threads = 4` 84 | 85 | When Toshi does a bulk ingest of documents it will spin up a number of threads to parse the document's json as it's 86 | received. This controls the number of threads spawned to handle this job. 87 | 88 | ##### Bulk Buffer 89 | `bulk_buffer_size = 10000` 90 | 91 | This will control the buffer size for parsing documents into an index. It will control the amount of memory a bulk ingest will 92 | take up by blocking when the message buffer is filled. If you want to go totally off the rails you can set this to 0 in order to make the buffer unbounded. 93 | 94 | ##### Auto Commit Duration 95 | `auto_commit_duration = 10` 96 | 97 | This controls how often an index will automatically commit documents if there are docs to be committed. Set this to 0 to disable this feature, but you will have to do commits yourself when you submit documents. 98 | 99 | ##### Merge Policy 100 | ```toml 101 | [merge_policy] 102 | kind = "log" 103 | ``` 104 | 105 | Tantivy will merge index segments according to the configuration outlined here. There are 2 options for this. "log" which is the default 106 | segment merge behavior. Log has 3 additional values to it as well. Any of these 3 values can be omitted to use Tantivy's default value. 107 | The default values are listed below. 108 | 109 | ```toml 110 | min_merge_size = 8 111 | min_layer_size = 10_000 112 | level_log_size = 0.75 113 | ``` 114 | 115 | In addition there is the "nomerge" option, in which Tantivy will do no merging of segments. 116 | 117 | ##### Experimental Settings 118 | ```toml 119 | experimental = false 120 | 121 | [experimental_features] 122 | master = true 123 | nodes = [ 124 | "127.0.0.1:8081" 125 | ] 126 | ``` 127 | 128 | In general these settings aren't ready for usage yet as they are very unstable or flat out broken. Right now the distribution of Toshi 129 | is behind this flag, so if experimental is set to false then all these settings are ignored. 130 | 131 | 132 | #### Building and Running 133 | Toshi can be built using `cargo build --release`. Once Toshi is built you can run `./target/release/toshi` from the top level directory to start Toshi according to the configuration in config/config.toml 134 | 135 | You should get a startup message like this. 136 | 137 | ```bash 138 | ______ __ _ ____ __ 139 | /_ __/__ ___ / / (_) / __/__ ___ _________/ / 140 | / / / _ \(_- Indexes: [] 145 | ``` 146 | 147 | You can verify Toshi is running with: 148 | 149 | ```bash 150 | curl -X GET http://localhost:8080/ 151 | ``` 152 | 153 | which should return: 154 | 155 | ```json 156 | { 157 | "name": "Toshi Search", 158 | "version": "0.1.1" 159 | } 160 | ``` 161 | Once toshi is running it's best to check the `requests.http` file in the root of this project to see some more examples of usage. 162 | 163 | #### Example Queries 164 | ##### Term Query 165 | ```json 166 | { "query": {"term": {"test_text": "document" } }, "limit": 10 } 167 | ``` 168 | ##### Fuzzy Term Query 169 | ```json 170 | { "query": {"fuzzy": {"test_text": {"value": "document", "distance": 0, "transposition": false } } }, "limit": 10 } 171 | ``` 172 | ##### Phrase Query 173 | ```json 174 | { "query": {"phrase": {"test_text": {"terms": ["test","document"] } } }, "limit": 10 } 175 | ``` 176 | ##### Range Query 177 | ```json 178 | { "query": {"range": { "test_i64": { "gte": 2012, "lte": 2015 } } }, "limit": 10 } 179 | ``` 180 | ##### Regex Query 181 | ```json 182 | { "query": {"regex": { "test_text": "d[ou]{1}c[k]?ument" } }, "limit": 10 } 183 | ``` 184 | ##### Boolean Query 185 | ```json 186 | { "query": {"bool": {"must": [ { "term": { "test_text": "document" } } ], "must_not": [ {"range": {"test_i64": { "gt": 2017 } } } ] } }, "limit": 10 } 187 | ``` 188 | 189 | ##### Usage 190 | To try any of the above queries you can use the above example 191 | ```bash 192 | curl -X POST http://localhost:8080/test_index -H 'Content-Type: application/json' -d '{ "query": {"term": {"test_text": "document" } }, "limit": 10 }' 193 | ``` 194 | Also, to note, limit is optional, 10 is the default value. It's only included here for completeness. 195 | 196 | #### Running Tests 197 | 198 | `cargo test` 199 | 200 | #### What is a Toshi? 201 | 202 | Toshi is a three year old Shiba Inu. He is a very good boy and is the official mascot of this project. Toshi personally reviews all code before it is committed to this repository and is dedicated to only accepting the highest quality contributions from his human. He will, though, accept treats for easier code reviews. 203 | -------------------------------------------------------------------------------- /toshi-raft/src/lib.rs: -------------------------------------------------------------------------------- 1 | use std::sync::Arc; 2 | use std::time::{Duration, Instant}; 3 | 4 | use dashmap::DashMap; 5 | use http::Uri; 6 | use prost::Message; 7 | use raft::eraftpb::Message as RaftMessage; 8 | use raft::prelude::*; 9 | use raft::StateRole; 10 | use tokio::sync::mpsc::Receiver; 11 | use tokio::sync::RwLock; 12 | 13 | use toshi_proto::cluster_rpc::RaftRequest; 14 | use toshi_types::{AddDocument, Error, IndexHandle}; 15 | 16 | use crate::proposal::Proposal; 17 | use crate::rpc_utils::create_client; 18 | 19 | pub mod handle; 20 | pub mod proposal; 21 | pub mod raft_io; 22 | pub mod rpc_server; 23 | pub mod rpc_utils; 24 | 25 | pub type BoxErr = Box; 26 | pub type Result = std::result::Result; 27 | 28 | pub async fn run( 29 | mut raft_group: RawNode, 30 | proposals: Arc>>, 31 | pending_messages: Arc>>, 32 | nodes: Arc>, 33 | ) -> Result<()> 34 | where 35 | H: Storage + IndexHandle + Send + Sync, 36 | { 37 | let mut t = Instant::now(); 38 | loop { 39 | let pending_messages = Arc::clone(&pending_messages); 40 | let mut pending = pending_messages.write().await; 41 | let proposals = Arc::clone(&proposals); 42 | let mut props = proposals.write().await; 43 | if !pending.is_empty() { 44 | if let Some(msg) = pending.pop() { 45 | step(msg, &mut raft_group, Arc::clone(&pending_messages)).await?; 46 | } 47 | } 48 | 49 | if t.elapsed() >= Duration::from_millis(100) { 50 | // Tick the raft. 51 | raft_group.tick(); 52 | t = Instant::now(); 53 | } 54 | 55 | // Let the leader pick pending proposals from the global queue. 56 | if raft_group.raft.state == StateRole::Leader { 57 | let p: Proposal = props.recv().await.unwrap(); 58 | propose(p, &mut raft_group) 59 | } 60 | 61 | // Handle readies from the raft. 62 | 63 | on_ready(&mut raft_group, Arc::clone(&nodes), Arc::clone(&proposals)).await?; 64 | } 65 | } 66 | 67 | pub async fn on_ready( 68 | raft_group: &mut RawNode, 69 | nodes: Arc>, 70 | proposals: Arc>>, 71 | ) -> Result<()> 72 | where 73 | H: Storage + IndexHandle + Send + Sync, 74 | { 75 | if !raft_group.has_ready() { 76 | return Ok(()); 77 | } 78 | let mut ready = raft_group.ready(); 79 | 80 | handle_messages(ready.take_messages(), Arc::clone(&nodes)).await?; 81 | 82 | // Apply the snapshot. It's necessary because in `RawNode::advance` we stabilize the snapshot. 83 | if *ready.snapshot() != Snapshot::default() { 84 | let _s = ready.snapshot().clone(); 85 | // Apply the snapshot here. 86 | } 87 | 88 | handle_committed(ready.take_committed_entries(), raft_group, Arc::clone(&proposals)).await?; 89 | 90 | for msg in ready.entries() { 91 | let add = AddDocument::new(serde_json::from_slice(&msg.data)?, None); 92 | raft_group.store().add_document(add).await?; 93 | } 94 | raft_group.store().commit().await?; 95 | 96 | let mut light_rd = raft_group.advance(ready); 97 | handle_messages(light_rd.take_messages(), Arc::clone(&nodes)).await?; 98 | handle_committed(light_rd.take_committed_entries(), raft_group, Arc::clone(&proposals)).await?; 99 | // Call `RawNode::advance` interface to update position flags in the raft. 100 | raft_group.advance_apply(); 101 | Ok(()) 102 | } 103 | 104 | pub async fn handle_messages(msgs: Vec, nodes: Arc>) -> Result<()> { 105 | for msg in msgs { 106 | let to = msg.to; 107 | let node = nodes 108 | .get(&to.to_string()) 109 | .ok_or_else(|| Error::RPCError(format!("Unable to get node for: {}", &to)))?; 110 | 111 | let mut client = create_client(&node, None).await?; 112 | let req = RaftRequest { message: Some(msg) }; 113 | client.raft_request(req).await?; 114 | } 115 | 116 | Ok(()) 117 | } 118 | 119 | pub async fn handle_committed(entries: Vec, raft_group: &mut RawNode, proposals: Arc>>) -> Result<()> 120 | where 121 | H: Storage + IndexHandle + Send + Sync, 122 | { 123 | for entry in entries { 124 | if entry.data.is_empty() { 125 | // From new elected leaders. 126 | continue; 127 | } 128 | if let EntryType::EntryConfChange = entry.get_entry_type() { 129 | // For conf change messages, make them effective. 130 | let mut cc = ConfChange::default(); 131 | cc.merge(&*entry.data)?; 132 | raft_group.apply_conf_change(&cc)?; 133 | } else { 134 | let doc = serde_json::from_slice(&entry.data)?; 135 | raft_group.store().add_document(AddDocument::new(doc, None)).await?; 136 | } 137 | if raft_group.raft.leader_id == raft_group.raft.id { 138 | // The leader should response to the clients, tell them if their proposals 139 | // succeeded or not. 140 | 141 | let prop = proposals.write().await.recv().await.unwrap(); 142 | prop.propose_success.send(true).unwrap(); 143 | } 144 | } 145 | Ok(()) 146 | } 147 | 148 | pub async fn step(mut m: RaftMessage, raft_group: &mut RawNode, pending_messages: Arc>>) -> Result<()> 149 | where 150 | H: Storage + IndexHandle + Send + Sync, 151 | { 152 | // Here we hold up MsgReadIndex. If current peer has valid lease, then we could handle the 153 | // request directly, rather than send a heartbeat to check quorum. 154 | let msg_type = m.get_msg_type(); 155 | let committed = raft_group.raft.raft_log.committed; 156 | let expected_term = raft_group.raft.raft_log.term(committed).unwrap_or(1); 157 | if msg_type == MessageType::MsgReadIndex && expected_term == raft_group.status().hs.term { 158 | // If the leader hasn't committed any entries in its term, it can't response read only 159 | // requests. Please also take a look at raft-rs. 160 | 161 | let mut resp = RaftMessage::default(); 162 | resp.set_msg_type(MessageType::MsgReadIndexResp); 163 | resp.term = raft_group.raft.term; 164 | resp.to = m.from; 165 | resp.index = raft_group.store().get_opstamp() as u64; 166 | resp.set_entries(m.take_entries()); 167 | let mut pending = pending_messages.write().await; 168 | pending.push(resp); 169 | return Ok(()); 170 | } 171 | if msg_type == MessageType::MsgTransferLeader { 172 | execute_transfer_leader(&m, raft_group); 173 | return Ok(()); 174 | } 175 | 176 | raft_group.step(m)?; 177 | Ok(()) 178 | } 179 | 180 | pub fn execute_transfer_leader(msg: &RaftMessage, raft_group: &mut RawNode) 181 | where 182 | H: Storage + IndexHandle + Send + Sync, 183 | { 184 | if msg.get_log_term() != raft_group.raft.term { 185 | return; 186 | } 187 | 188 | if raft_group.raft.leader_id == raft_group.raft.id { 189 | let from = msg.get_from(); 190 | raft_group.transfer_leader(from); 191 | return; 192 | } 193 | 194 | let mut msg = RaftMessage::default(); 195 | msg.set_from(raft_group.raft.id); 196 | msg.set_to(raft_group.raft.leader_id); 197 | msg.set_msg_type(MessageType::MsgTransferLeader); 198 | msg.set_index(raft_group.store().get_opstamp() as u64); 199 | msg.set_log_term(raft_group.raft.term); 200 | raft_group.raft.msgs.push(msg); 201 | } 202 | 203 | fn propose(mut proposal: Proposal, raft_group: &mut RawNode) 204 | where 205 | H: Storage + IndexHandle + Send + Sync, 206 | { 207 | let last_index1 = raft_group.raft.raft_log.last_index() + 1; 208 | if let Some(ref data) = proposal.normal { 209 | let _ = raft_group.propose(vec![], data.to_vec()); 210 | } else if let Some(ref cc) = proposal.conf_change { 211 | let _ = raft_group.propose_conf_change(vec![], cc.clone()); 212 | } else if let Some(_transferee) = proposal.transfer_leader { 213 | // TODO: implement transfer leader. 214 | unimplemented!(); 215 | } 216 | 217 | let last_index2 = raft_group.raft.raft_log.last_index() + 1; 218 | if last_index2 == last_index1 { 219 | // Propose failed, don't forget to respond to the client. 220 | proposal.propose_success.send(true).unwrap(); 221 | } else { 222 | proposal.proposed = last_index1; 223 | } 224 | } 225 | -------------------------------------------------------------------------------- /toshi-server/src/handlers/search.rs: -------------------------------------------------------------------------------- 1 | use std::sync::Arc; 2 | 3 | use hyper::body::to_bytes; 4 | use hyper::Response; 5 | use hyper::{Body, StatusCode}; 6 | use log::info; 7 | 8 | use toshi_types::*; 9 | 10 | use crate::handlers::ResponseFuture; 11 | use crate::utils::{empty_with_code, with_body}; 12 | 13 | pub async fn doc_search(catalog: Arc, body: Body, index: &str) -> ResponseFuture { 14 | let b = to_bytes(body).await?; 15 | match serde_json::from_slice::(&b) { 16 | Ok(req) => { 17 | let req = if req.query.is_none() { Search::all_limit(req.limit) } else { req }; 18 | if catalog.exists(index) { 19 | info!("Query: {:?}", req); 20 | let index = catalog.get_index(index).unwrap(); // If this unwrap fails, this is a bug. 21 | match index.search_index(req).await { 22 | Ok(results) => Ok(with_body(results)), 23 | Err(e) => Ok(Response::from(e)), 24 | } 25 | } else { 26 | Ok(empty_with_code(StatusCode::NOT_FOUND)) 27 | } 28 | } 29 | Err(err) => Ok(Response::from(Error::QueryError(format!("Bad JSON Query: {}", err)))), 30 | } 31 | } 32 | 33 | pub async fn all_docs(catalog: Arc, index: &str) -> ResponseFuture { 34 | let body = Body::from(serde_json::to_vec(&Search::all_docs()).unwrap()); 35 | doc_search(catalog, body, index).await 36 | } 37 | 38 | #[cfg(test)] 39 | pub mod tests { 40 | use std::sync::Arc; 41 | 42 | use hyper::Body; 43 | use pretty_assertions::assert_eq; 44 | 45 | use toshi_types::{ErrorResponse, ExactTerm, FuzzyQuery, FuzzyTerm, KeyValue, PhraseQuery, Query, Search, TermPair}; 46 | 47 | use crate::commit::tests::*; 48 | use crate::handlers::{doc_search, ResponseFuture}; 49 | use crate::index::create_test_catalog; 50 | use crate::SearchResults; 51 | 52 | type ReturnUnit = Result<(), Box>; 53 | 54 | pub async fn run_query(req: Search, index: &str) -> ResponseFuture { 55 | let cat = create_test_catalog(index); 56 | doc_search(Arc::clone(&cat), Body::from(serde_json::to_vec(&req).unwrap()), index).await 57 | } 58 | 59 | #[tokio::test] 60 | async fn test_term_query() -> Result<(), Box> { 61 | let term = KeyValue::new("test_text".into(), "document".into()); 62 | let term_query = Query::Exact(ExactTerm::new(term)); 63 | let search = Search::new(Some(term_query), None, 10, None); 64 | let q = run_query(search, "test_index").await?; 65 | let body: SearchResults = wait_json(q).await; 66 | assert_eq!(body.hits, 3); 67 | Ok(()) 68 | } 69 | 70 | #[tokio::test] 71 | async fn test_phrase_query() -> Result<(), Box> { 72 | let terms = TermPair::new(vec!["test".into(), "document".into()], None); 73 | let phrase = KeyValue::new("test_text".into(), terms); 74 | let term_query = Query::Phrase(PhraseQuery::new(phrase)); 75 | let search = Search::new(Some(term_query), None, 10, None); 76 | let q = run_query(search, "test_index").await?; 77 | let body: SearchResults = wait_json(q).await; 78 | assert_eq!(body.hits, 3); 79 | Ok(()) 80 | } 81 | 82 | #[tokio::test] 83 | async fn test_bad_raw_query_syntax() -> ReturnUnit { 84 | let cat = create_test_catalog("test_index"); 85 | let body = r#"{ "query" : { "raw": "asd*(@sq__" } }"#; 86 | let err = doc_search(Arc::clone(&cat), Body::from(body), "test_index").await?; 87 | let body: ErrorResponse = wait_json::(err).await; 88 | assert_eq!(body.message, "Error in Index: \'Syntax Error: asd*(@sq__\'"); 89 | Ok(()) 90 | } 91 | 92 | #[tokio::test] 93 | async fn test_unindexed_field() -> ReturnUnit { 94 | let cat = create_test_catalog("test_index"); 95 | let body = r#"{ "query" : { "raw": "test_unindex:yes" } }"#; 96 | let r = doc_search(Arc::clone(&cat), Body::from(body), "test_index").await?; 97 | let b = read_body(r).await?; 98 | let expected = r#"{"message":"Error in Index: 'The field 'test_unindex' is not declared as indexed'"}"#; 99 | assert_eq!(b, expected); 100 | Ok(()) 101 | } 102 | 103 | #[tokio::test] 104 | async fn test_bad_term_field_syntax() -> ReturnUnit { 105 | let cat = create_test_catalog("test_index"); 106 | let body = r#"{ "query" : { "term": { "asdf": "Document" } } }"#; 107 | let q = doc_search(Arc::clone(&cat), Body::from(body), "test_index").await?; 108 | let b: ErrorResponse = wait_json(q).await; 109 | assert_eq!(b.message, "Error in query execution: 'Unknown field: asdf'"); 110 | Ok(()) 111 | } 112 | 113 | #[tokio::test] 114 | async fn test_facets() -> ReturnUnit { 115 | let body = r#"{ "query" : { "term": { "test_text": "document" } }, "facets": { "test_facet": ["/cat"] } }"#; 116 | let req: Search = serde_json::from_str(body)?; 117 | let q = run_query(req, "test_index").await?; 118 | let b: SearchResults = wait_json(q).await; 119 | assert_eq!(b.get_facets()[0].value, 1); 120 | assert_eq!(b.get_facets()[1].value, 1); 121 | assert_eq!(b.get_facets()[0].field, "/cat/cat2"); 122 | Ok(()) 123 | } 124 | 125 | // This code is just...the worst thing ever. 126 | #[tokio::test] 127 | async fn test_raw_query() -> ReturnUnit { 128 | let b = r#"test_text:"Duckiment""#; 129 | let req = Search::new(Some(Query::Raw { raw: b.into() }), None, 10, None); 130 | let q = run_query(req, "test_index").await?; 131 | let body: SearchResults = wait_json(q).await; 132 | assert_eq!(body.hits as usize, body.get_docs().len()); 133 | let b2 = body; 134 | let map = b2.get_docs()[0].clone().doc.0; 135 | let text = String::from(map.remove("test_text").unwrap().1.as_str().unwrap()); 136 | assert_eq!(text, "Test Duckiment 3"); 137 | Ok(()) 138 | } 139 | 140 | #[tokio::test] 141 | async fn test_fuzzy_term_query() -> ReturnUnit { 142 | let fuzzy = KeyValue::new("test_text".into(), FuzzyTerm::new("document".into(), 0, false)); 143 | let term_query = Query::Fuzzy(FuzzyQuery::new(fuzzy)); 144 | let search = Search::new(Some(term_query), None, 10, None); 145 | let q = run_query(search, "test_index").await?; 146 | let body: SearchResults = wait_json(q).await; 147 | 148 | assert_eq!(body.hits as usize, body.get_docs().len()); 149 | assert_eq!(body.hits, 3); 150 | assert_eq!(body.get_docs().len(), 3); 151 | Ok(()) 152 | } 153 | 154 | #[tokio::test] 155 | async fn test_inclusive_range_query() -> ReturnUnit { 156 | let body = r#"{ "query" : { "range" : { "test_i64" : { "gte" : 2012, "lte" : 2015 } } } }"#; 157 | let req: Search = serde_json::from_str(body)?; 158 | let q = run_query(req, "test_index").await?; 159 | let body: SearchResults = wait_json(q).await; 160 | assert_eq!(body.hits as usize, body.get_docs().len()); 161 | assert!(cmp_float(body.get_docs()[0].score.unwrap(), 1.0)); 162 | Ok(()) 163 | } 164 | 165 | #[tokio::test] 166 | async fn test_exclusive_range_query() -> ReturnUnit { 167 | let body = r#"{ "query" : { "range" : { "test_i64" : { "gt" : 2012, "lt" : 2015 } } } }"#; 168 | let req: Search = serde_json::from_str(body)?; 169 | let q = run_query(req, "test_index").await?; 170 | let body: SearchResults = wait_json(q).await; 171 | assert_eq!(body.hits as usize, body.get_docs().len()); 172 | assert!(cmp_float(body.get_docs()[0].score.unwrap(), 1.0)); 173 | Ok(()) 174 | } 175 | 176 | #[tokio::test] 177 | async fn test_regex_query() -> ReturnUnit { 178 | let body = r#"{ "query" : { "regex" : { "test_text" : "d[ou]{1}c[k]?ument" } } }"#; 179 | let req: Search = serde_json::from_str(body)?; 180 | let q = run_query(req, "test_index").await?; 181 | let body: SearchResults = wait_json(q).await; 182 | assert_eq!(body.hits, 4); 183 | Ok(()) 184 | } 185 | 186 | #[tokio::test] 187 | async fn test_bool_query() -> ReturnUnit { 188 | let test_json = r#"{"query": { "bool": { 189 | "must": [ { "term": { "test_text": "document" } } ], 190 | "must_not": [ {"range": {"test_i64": { "gt": 2017 } } } ] } } }"#; 191 | 192 | let query = serde_json::from_str::(test_json)?; 193 | let q = run_query(query, "test_index").await?; 194 | let body: SearchResults = wait_json(q).await; 195 | assert_eq!(body.hits, 2); 196 | Ok(()) 197 | } 198 | } 199 | -------------------------------------------------------------------------------- /toshi-server/src/settings.rs: -------------------------------------------------------------------------------- 1 | use std::str::FromStr; 2 | 3 | use config::{Config, ConfigError, File, FileFormat, Source}; 4 | use serde::Deserialize; 5 | use structopt::StructOpt; 6 | use tantivy::merge_policy::*; 7 | 8 | pub const VERSION: &str = env!("CARGO_PKG_VERSION"); 9 | 10 | pub const HEADER: &str = r#" 11 | ______ __ _ ____ __ 12 | /_ __/__ ___ / / (_) / __/__ ___ _________/ / 13 | / / / _ \(_- = Vec::new(); 45 | pub const DEFAULT_ID: u64 = 1; 46 | pub const DEFAULT_RPC_PORT: u16 = 8081; 47 | pub const DEFAULT_LEVEL_LOG_SIZE: f64 = 0.75; 48 | pub const DEFAULT_MIN_LAYER_SIZE: u32 = 10_000; 49 | pub const DEFAULT_MIN_MERGE_SIZE: usize = 8; 50 | 51 | pub fn default_merge_policy() -> ConfigMergePolicy { 52 | ConfigMergePolicy { 53 | kind: "log".to_string(), 54 | min_merge_size: DEFAULT_MIN_MERGE_SIZE, 55 | min_layer_size: DEFAULT_MIN_LAYER_SIZE, 56 | level_log_size: DEFAULT_LEVEL_LOG_SIZE, 57 | } 58 | } 59 | 60 | pub fn settings() -> Settings { 61 | let options = Settings::from_args(); 62 | if !&options.config.is_empty() { 63 | Settings::new(&options.config).expect("Invalid Configuration File") 64 | } else { 65 | options 66 | } 67 | } 68 | 69 | #[derive(Deserialize, Clone, Debug, StructOpt)] 70 | #[serde(default = "ConfigMergePolicy::default")] 71 | pub struct ConfigMergePolicy { 72 | #[structopt(long, default_value = "log")] 73 | kind: String, 74 | #[structopt(long, default_value)] 75 | min_merge_size: usize, 76 | #[structopt(long, default_value)] 77 | min_layer_size: u32, 78 | #[structopt(long, default_value)] 79 | level_log_size: f64, 80 | } 81 | 82 | impl Default for ConfigMergePolicy { 83 | fn default() -> Self { 84 | Self { 85 | kind: "log".into(), 86 | min_merge_size: DEFAULT_MIN_MERGE_SIZE, 87 | min_layer_size: DEFAULT_MIN_LAYER_SIZE, 88 | level_log_size: DEFAULT_LEVEL_LOG_SIZE, 89 | } 90 | } 91 | } 92 | 93 | impl ConfigMergePolicy { 94 | pub fn get_kind(&self) -> MergePolicyType { 95 | match self.kind.to_ascii_lowercase().as_ref() { 96 | "log" => MergePolicyType::Log, 97 | "nomerge" => MergePolicyType::NoMerge, 98 | _ => panic!("Unknown Merge Typed Defined"), 99 | } 100 | } 101 | } 102 | 103 | #[derive(Deserialize, Clone, Debug, StructOpt, Default)] 104 | pub struct Experimental { 105 | #[structopt(long)] 106 | pub leader: bool, 107 | #[structopt(long)] 108 | pub nodes: Vec, 109 | #[structopt(long, default_value = "1")] 110 | pub id: u64, 111 | #[structopt(long, default_value = "8081")] 112 | pub rpc_port: u16, 113 | } 114 | 115 | #[derive(Deserialize, Clone, Debug, StructOpt)] 116 | #[structopt(name = "toshi", version = env!("CARGO_PKG_VERSION"))] 117 | #[serde(default = "Settings::default")] 118 | pub struct Settings { 119 | #[serde(skip)] 120 | #[structopt(short, long, default_value = "config/config.toml")] 121 | pub config: String, 122 | #[structopt(short, long, default_value = "127.0.0.1")] 123 | pub host: String, 124 | #[structopt(short, long, default_value = "8080")] 125 | pub port: u16, 126 | #[structopt(short = "P", long, default_value = "data/")] 127 | pub path: String, 128 | #[structopt(short, long, default_value = "info")] 129 | pub log_level: String, 130 | #[structopt(short, long, default_value = "200000000")] 131 | pub writer_memory: usize, 132 | #[structopt(short, long, default_value = "4")] 133 | pub json_parsing_threads: usize, 134 | #[structopt(short, long, default_value = "5")] 135 | pub auto_commit_duration: f32, 136 | #[structopt(short, long, default_value = "10000")] 137 | pub bulk_buffer_size: usize, 138 | #[structopt(short, long, default_value = "10000")] 139 | pub max_line_length: usize, 140 | #[structopt(flatten)] 141 | pub merge_policy: ConfigMergePolicy, 142 | #[structopt(short, long)] 143 | pub experimental: bool, 144 | #[structopt(flatten)] 145 | #[serde(default = "Experimental::default")] 146 | pub experimental_features: Experimental, 147 | } 148 | 149 | impl Default for Settings { 150 | fn default() -> Self { 151 | Self { 152 | config: "config/config.toml".into(), 153 | host: DEFAULT_HOST.into(), 154 | port: DEFAULT_PORT, 155 | path: DEFAULT_PATH.into(), 156 | log_level: DEFAULT_LEVEL.into(), 157 | writer_memory: DEFAULT_WRITER_MEMORY, 158 | json_parsing_threads: DEFAULT_JSON_PARSING_THREADS, 159 | auto_commit_duration: DEFAULT_AUTO_COMMIT_DURATION, 160 | bulk_buffer_size: DEFAULT_BULK_BUFFER_SIZE, 161 | max_line_length: DEFAULT_MAX_LINE_LENGTH, 162 | merge_policy: ConfigMergePolicy::default(), 163 | experimental: false, 164 | experimental_features: Experimental::default(), 165 | } 166 | } 167 | } 168 | 169 | impl FromStr for Settings { 170 | type Err = ConfigError; 171 | 172 | fn from_str(cfg: &str) -> Result { 173 | Self::from_config(File::from_str(cfg, FileFormat::Toml)) 174 | } 175 | } 176 | 177 | impl Settings { 178 | pub fn new(path: &str) -> Result { 179 | Self::from_config(File::with_name(path)) 180 | } 181 | 182 | pub fn from_config(c: T) -> Result { 183 | Config::builder().add_source(c).build()?.try_deserialize::() 184 | } 185 | 186 | pub fn get_nodes(&self) -> Vec { 187 | self.experimental_features.nodes.clone() 188 | } 189 | 190 | pub fn get_merge_policy(&self) -> Box { 191 | match self.merge_policy.get_kind() { 192 | MergePolicyType::Log => { 193 | let mut mp = LogMergePolicy::default(); 194 | mp.set_level_log_size(self.merge_policy.level_log_size); 195 | mp.set_min_layer_size(self.merge_policy.min_layer_size); 196 | mp.set_max_docs_before_merge(self.merge_policy.min_merge_size); 197 | Box::new(mp) 198 | } 199 | MergePolicyType::NoMerge => Box::new(NoMergePolicy::default()), 200 | } 201 | } 202 | } 203 | 204 | #[cfg(test)] 205 | mod tests { 206 | use crate::commit::tests::cmp_float; 207 | 208 | use super::*; 209 | 210 | #[test] 211 | fn valid_default_config() { 212 | let default = Settings::default(); 213 | assert_eq!(default.host, "0.0.0.0"); 214 | assert_eq!(default.port, 8080); 215 | assert_eq!(default.path, "data/"); 216 | assert_eq!(default.writer_memory, 200_000_000); 217 | assert_eq!(default.log_level, "info"); 218 | assert_eq!(default.json_parsing_threads, 4); 219 | assert_eq!(default.bulk_buffer_size, 10000); 220 | assert_eq!(default.max_line_length, 10000); 221 | assert_eq!(default.merge_policy.kind, "log"); 222 | assert!(cmp_float(default.merge_policy.level_log_size as f32, 0.75)); 223 | assert_eq!(default.merge_policy.min_layer_size, 10_000); 224 | assert_eq!(default.merge_policy.min_merge_size, 8); 225 | assert!(!default.experimental); 226 | assert!(!default.experimental_features.leader); 227 | } 228 | 229 | #[test] 230 | fn valid_merge_policy() { 231 | let cfg = r#" 232 | host = "asdf:8080" 233 | [merge_policy] 234 | kind = "log" 235 | level_log_size = 10.5 236 | min_layer_size = 20 237 | min_merge_size = 30"#; 238 | 239 | let config = Settings::from_str(cfg).unwrap(); 240 | assert!(cmp_float(config.merge_policy.level_log_size as f32, 10.5)); 241 | assert_eq!(config.merge_policy.min_layer_size, 20); 242 | assert_eq!(config.merge_policy.min_merge_size, 30); 243 | } 244 | 245 | #[test] 246 | fn valid_no_merge_policy() { 247 | let cfg = r#" 248 | [merge_policy] 249 | kind = "nomerge""#; 250 | 251 | let config = Settings::from_str(cfg).unwrap(); 252 | 253 | assert!(config.merge_policy.get_kind() == MergePolicyType::NoMerge); 254 | assert_eq!(config.merge_policy.kind, "nomerge"); 255 | assert!(cmp_float(config.merge_policy.level_log_size as f32, 0.75)); 256 | assert_eq!(config.merge_policy.min_layer_size, 10_000); 257 | assert_eq!(config.merge_policy.min_merge_size, 8); 258 | } 259 | 260 | #[test] 261 | #[should_panic] 262 | fn bad_config_file() { 263 | Settings::new("asdf/casdf").unwrap(); 264 | } 265 | 266 | #[test] 267 | #[should_panic] 268 | fn bad_merge_type() { 269 | let cfg = r#" 270 | [merge_policy] 271 | kind = "asdf1234""#; 272 | 273 | let config = Settings::from_str(cfg).unwrap(); 274 | config.get_merge_policy(); 275 | } 276 | } 277 | -------------------------------------------------------------------------------- /toshi-server/src/handle.rs: -------------------------------------------------------------------------------- 1 | use std::fs; 2 | use std::path::PathBuf; 3 | use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering}; 4 | use std::sync::Arc; 5 | 6 | use async_trait::async_trait; 7 | use log::*; 8 | use tantivy::collector::{FacetCollector, MultiCollector, TopDocs}; 9 | use tantivy::directory::MmapDirectory; 10 | use tantivy::merge_policy::MergePolicy; 11 | use tantivy::query::{AllQuery, QueryParser}; 12 | use tantivy::schema::*; 13 | use tantivy::space_usage::SearcherSpaceUsage; 14 | use tantivy::{Document, Index, IndexReader, IndexWriter, ReloadPolicy, Term}; 15 | use tokio::sync::*; 16 | 17 | use toshi_types::*; 18 | 19 | use crate::settings::{Settings, DEFAULT_WRITER_MEMORY}; 20 | use crate::{register_tokenizers, Result}; 21 | use crate::{AddDocument, SearchResults}; 22 | 23 | /// Index handle that operates on an Index local to the node, a remote index handle 24 | /// will eventually call to wherever the local index is stored, so at some level the relevant 25 | /// local handle will always get called through rpc 26 | #[derive(Clone)] 27 | pub struct LocalIndex { 28 | index: Index, 29 | writer: Arc>, 30 | reader: IndexReader, 31 | current_opstamp: Arc, 32 | deleted_docs: Arc, 33 | name: String, 34 | } 35 | 36 | impl PartialEq for LocalIndex { 37 | fn eq(&self, other: &LocalIndex) -> bool { 38 | self.name == *other.name 39 | } 40 | } 41 | 42 | impl Eq for LocalIndex {} 43 | 44 | #[async_trait] 45 | impl IndexHandle for LocalIndex { 46 | fn get_name(&self) -> String { 47 | self.name.clone() 48 | } 49 | 50 | fn get_index(&self) -> Index { 51 | self.index.clone() 52 | } 53 | 54 | fn get_writer(&self) -> Arc> { 55 | Arc::clone(&self.writer) 56 | } 57 | 58 | fn get_space(&self) -> SearcherSpaceUsage { 59 | self.reader.searcher().space_usage().unwrap() 60 | } 61 | 62 | fn get_opstamp(&self) -> usize { 63 | trace!("Got the opstamp"); 64 | self.current_opstamp.load(Ordering::SeqCst) 65 | } 66 | 67 | fn set_opstamp(&self, opstamp: usize) { 68 | trace!("Setting stamp to {}", opstamp); 69 | self.current_opstamp.store(opstamp, Ordering::SeqCst) 70 | } 71 | 72 | async fn commit(&self) -> Result { 73 | let mut lock = self.writer.lock().await; 74 | Ok(lock.commit()?) 75 | } 76 | 77 | async fn search_index(&self, search: Search) -> Result { 78 | let searcher = self.reader.searcher(); 79 | let schema = self.index.schema(); 80 | let mut multi_collector = MultiCollector::new(); 81 | 82 | let sorted_top_handle = search.sort_by.clone().and_then(|sort_by| { 83 | info!("Sorting with: {}", sort_by); 84 | if let Some(f) = schema.get_field(&sort_by) { 85 | let entry = schema.get_field_entry(f); 86 | if entry.is_fast() && entry.is_stored() { 87 | let c = TopDocs::with_limit(search.limit).order_by_u64_field(f); 88 | return Some(multi_collector.add_collector(c)); 89 | } 90 | } 91 | None 92 | }); 93 | 94 | let top_handle = multi_collector.add_collector(TopDocs::with_limit(search.limit)); 95 | let facet_handle = search.facets.clone().and_then(|f| { 96 | if let Some(field) = schema.get_field(f.get_facets_fields()) { 97 | let mut col = FacetCollector::for_field(field); 98 | for term in f.get_facets_values() { 99 | col.add_facet(&term); 100 | } 101 | Some(multi_collector.add_collector(col)) 102 | } else { 103 | None 104 | } 105 | }); 106 | 107 | if let Some(query) = search.query { 108 | let gen_query = match query { 109 | Query::Regex(regex) => regex.create_query(&schema)?, 110 | Query::Phrase(phrase) => phrase.create_query(&schema)?, 111 | Query::Fuzzy(fuzzy) => fuzzy.create_query(&schema)?, 112 | Query::Exact(term) => term.create_query(&schema)?, 113 | Query::Range(range) => range.create_query(&schema)?, 114 | Query::Boolean { bool } => bool.create_query(&schema)?, 115 | Query::Raw { raw } => { 116 | let fields: Vec = schema.fields().filter_map(|f| schema.get_field(f.1.name())).collect(); 117 | let query_parser = QueryParser::for_index(&self.index, fields); 118 | query_parser.parse_query(&raw)? 119 | } 120 | Query::All => Box::new(AllQuery), 121 | }; 122 | 123 | trace!("{:?}", gen_query); 124 | let mut scored_docs = searcher.search(&*gen_query, &multi_collector)?; 125 | 126 | // FruitHandle isn't a public type which leads to some duplicate code like this. 127 | let docs: Vec> = if let Some(h) = sorted_top_handle { 128 | h.extract(&mut scored_docs) 129 | .into_iter() 130 | .map(|(score, doc)| { 131 | let d = searcher.doc(doc).expect("Doc not found in segment"); 132 | ScoredDoc::::new(Some(score as f32), schema.to_named_doc(&d).into()) 133 | }) 134 | .collect() 135 | } else { 136 | top_handle 137 | .extract(&mut scored_docs) 138 | .into_iter() 139 | .map(|(score, doc)| { 140 | let d = searcher.doc(doc).expect("Doc not found in segment"); 141 | ScoredDoc::::new(Some(score), schema.to_named_doc(&d).into()) 142 | }) 143 | .collect() 144 | }; 145 | 146 | if let Some(facets) = facet_handle { 147 | if let Some(t) = &search.facets { 148 | let facet_counts = facets 149 | .extract(&mut scored_docs) 150 | .get(&t.get_facets_values()[0]) 151 | .map(|(f, c)| KeyValue::new(f.to_string(), c)) 152 | .collect(); 153 | return Ok(SearchResults::with_facets(docs, facet_counts)); 154 | } 155 | } 156 | Ok(SearchResults::new(docs)) 157 | } else { 158 | Err(Error::QueryError("Empty Query Provided".into())) 159 | } 160 | } 161 | 162 | async fn add_document(&self, add_doc: AddDocument) -> Result<()> { 163 | let index_schema = self.index.schema(); 164 | let writer_lock = self.get_writer(); 165 | { 166 | let index_writer = writer_lock.lock().await; 167 | let doc: Document = LocalIndex::parse_doc(&index_schema, &add_doc.document.to_string())?; 168 | index_writer.add_document(doc)?; 169 | } 170 | if let Some(opts) = add_doc.options { 171 | if opts.commit { 172 | let mut commit_writer = writer_lock.lock().await; 173 | commit_writer.commit()?; 174 | self.set_opstamp(0); 175 | } else { 176 | self.set_opstamp(self.get_opstamp() + 1); 177 | } 178 | } else { 179 | self.set_opstamp(self.get_opstamp() + 1); 180 | } 181 | Ok(()) 182 | } 183 | 184 | async fn delete_term(&self, term: DeleteDoc) -> Result { 185 | let index_schema = self.index.schema(); 186 | let writer_lock = self.get_writer(); 187 | let before: u64; 188 | { 189 | let index_writer = writer_lock.lock().await; 190 | before = self.reader.searcher().num_docs(); 191 | 192 | for (field, value) in term.terms { 193 | if let Some(f) = index_schema.get_field(&field) { 194 | let term = Term::from_field_text(f, &value); 195 | index_writer.delete_term(term); 196 | } 197 | } 198 | } 199 | if let Some(opts) = term.options { 200 | if opts.commit { 201 | let mut commit_writer = writer_lock.lock().await; 202 | commit_writer.commit()?; 203 | self.set_opstamp(0); 204 | } 205 | } 206 | let docs_affected = before - self.reader.searcher().num_docs(); 207 | let current = self.deleted_docs.load(Ordering::SeqCst); 208 | self.deleted_docs.store(current + docs_affected, Ordering::SeqCst); 209 | Ok(DocsAffected { docs_affected }) 210 | } 211 | } 212 | 213 | impl LocalIndex { 214 | pub fn new( 215 | mut base_path: PathBuf, 216 | index_name: &str, 217 | schema: Schema, 218 | writer_memory: usize, 219 | merge_policy: Box, 220 | ) -> Result { 221 | base_path.push(index_name); 222 | if !base_path.exists() { 223 | fs::create_dir(&base_path)?; 224 | } 225 | let dir = MmapDirectory::open(base_path)?; 226 | let index = Index::open_or_create(dir, schema)?; 227 | let index = register_tokenizers(index); 228 | let i = index.writer(writer_memory)?; 229 | i.set_merge_policy(merge_policy); 230 | let current_opstamp = Arc::new(AtomicUsize::new(0)); 231 | let writer = Arc::new(Mutex::new(i)); 232 | let reader = index.reader_builder().reload_policy(ReloadPolicy::OnCommit).try_into()?; 233 | Ok(Self { 234 | index, 235 | reader, 236 | writer, 237 | current_opstamp, 238 | deleted_docs: Arc::new(AtomicU64::new(0)), 239 | name: index_name.into(), 240 | }) 241 | } 242 | 243 | pub(crate) fn from_existing(name: String, index: Index) -> Result { 244 | let i = index.writer(DEFAULT_WRITER_MEMORY)?; 245 | i.set_merge_policy(Settings::default().get_merge_policy()); 246 | let current_opstamp = Arc::new(AtomicUsize::new(0)); 247 | let writer = Arc::new(Mutex::new(i)); 248 | let reader = index.reader_builder().reload_policy(ReloadPolicy::OnCommit).try_into()?; 249 | Ok(Self { 250 | index, 251 | reader, 252 | writer, 253 | current_opstamp, 254 | deleted_docs: Arc::new(AtomicU64::new(0)), 255 | name, 256 | }) 257 | } 258 | 259 | fn parse_doc(schema: &Schema, bytes: &str) -> Result { 260 | schema.parse_document(bytes).map_err(Into::into) 261 | } 262 | } 263 | -------------------------------------------------------------------------------- /toshi-types/src/query/mod.rs: -------------------------------------------------------------------------------- 1 | use std::fmt; 2 | use std::marker::PhantomData; 3 | 4 | use dashmap::DashMap; 5 | use serde::de::{DeserializeOwned, Deserializer, Error as SerdeError, MapAccess, Visitor}; 6 | use serde::ser::SerializeMap; 7 | use serde::Serializer; 8 | use serde::{Deserialize, Serialize}; 9 | use serde_json::Value; 10 | use tantivy::query::Query as TantivyQuery; 11 | use tantivy::schema::{NamedFieldDocument, Schema}; 12 | use tantivy::Term; 13 | 14 | use crate::error::Error; 15 | use crate::query::{ 16 | boolean::BoolQuery, facet::FacetQuery, fuzzy::FuzzyQuery, phrase::PhraseQuery, range::RangeQuery, regex::RegexQuery, term::ExactTerm, 17 | }; 18 | 19 | pub(crate) mod boolean; 20 | pub(crate) mod facet; 21 | pub(crate) mod fuzzy; 22 | pub(crate) mod phrase; 23 | pub(crate) mod range; 24 | pub(crate) mod regex; 25 | pub(crate) mod term; 26 | 27 | /// Additional Options for results returned from queries 28 | #[derive(Deserialize, Debug, Default)] 29 | pub struct QueryOptions { 30 | pretty: Option, 31 | include_sizes: Option, 32 | } 33 | 34 | impl QueryOptions { 35 | /// 36 | /// 37 | /// # Arguments 38 | /// 39 | /// * `pretty`: format return JSON 40 | /// * `include_sizes`: include index sizes 41 | /// 42 | /// returns: QueryOptions 43 | /// 44 | pub fn new(pretty: Option, include_sizes: Option) -> Self { 45 | QueryOptions { pretty, include_sizes } 46 | } 47 | 48 | /// Include Index sizes or not 49 | #[inline] 50 | pub fn include_sizes(&self) -> bool { 51 | self.include_sizes.unwrap_or(false) 52 | } 53 | 54 | /// Format return JSON 55 | #[inline] 56 | pub fn pretty(&self) -> bool { 57 | self.pretty.unwrap_or(false) 58 | } 59 | } 60 | 61 | /// Trait that generically represents Tantivy queries 62 | pub trait CreateQuery { 63 | /// Consume the implementing struct to generate a Tantivy query 64 | fn create_query(self, schema: &Schema) -> crate::Result>; 65 | } 66 | 67 | /// The possible Tantivy Queries to issue 68 | #[derive(Serialize, Deserialize, Debug, Clone)] 69 | #[serde(untagged)] 70 | pub enum Query { 71 | /// [`tantivy::query::FuzzyQuery`]: FuzzyQuery 72 | Fuzzy(FuzzyQuery), 73 | /// [`tantivy::query::TermQuery`]: TermQuery 74 | Exact(ExactTerm), 75 | /// [`tantivy::query::PhraseQuery`]: PhraseQuery 76 | Phrase(PhraseQuery), 77 | /// [`tantivy::query::RegexQuery`]: RegexQuery 78 | Regex(RegexQuery), 79 | /// [`tantivy::query::RangeQuery`]: RangeQuery 80 | Range(RangeQuery), 81 | /// [`tantivy::query::BooleanQuery`]: BooleanQuery 82 | Boolean { 83 | /// Collection of boolean clauses 84 | bool: BoolQuery, 85 | }, 86 | /// Raw is a query that passes by the query parser and is just executed directly against the index 87 | Raw { 88 | /// The actual query to be ran 89 | raw: String, 90 | }, 91 | /// [`tantivy::query::AllQuery`]: AllQuery 92 | All, 93 | } 94 | 95 | /// Boolean gets it's own special From impl due to not being a tuple query. 96 | impl From for Query { 97 | fn from(bool: BoolQuery) -> Self { 98 | Query::Boolean { bool } 99 | } 100 | } 101 | 102 | macro_rules! to_query { ($($t:tt $e:ident),+) => { $(impl From<$t> for Query { fn from(q: $t) -> Self { Query::$e(q) } })* }; } 103 | to_query! { PhraseQuery Phrase, FuzzyQuery Fuzzy, ExactTerm Exact, RegexQuery Regex, RangeQuery Range } 104 | 105 | /// The request body of a search POST in Toshi 106 | #[derive(Serialize, Deserialize, Debug, Clone)] 107 | pub struct Search { 108 | /// Optional query 109 | pub query: Option, 110 | /// Optional facets of a query 111 | pub facets: Option, 112 | /// Max number of documents to return 113 | #[serde(default = "Search::default_limit")] 114 | pub limit: usize, 115 | /// Field to sort results by 116 | #[serde(default)] 117 | pub sort_by: Option, 118 | } 119 | 120 | impl Search { 121 | /// Construct a new Search query 122 | pub fn new(query: Option, facets: Option, limit: usize, sort_by: Option) -> Self { 123 | Search { 124 | query, 125 | facets, 126 | limit, 127 | sort_by, 128 | } 129 | } 130 | 131 | /// Construct a builder to create the Search with 132 | pub fn builder() -> SearchBuilder { 133 | SearchBuilder::new() 134 | } 135 | 136 | /// Construct a search with a known Query 137 | pub fn from_query(query: Query) -> Self { 138 | Self::new(Some(query), None, Self::default_limit(), None) 139 | } 140 | 141 | /// The default limit for docs to return 142 | pub const fn default_limit() -> usize { 143 | 100 144 | } 145 | 146 | pub(crate) fn all_query() -> Query { 147 | Query::All 148 | } 149 | 150 | /// A shortcut for querying for all documents in an Index 151 | pub fn all_docs() -> Self { 152 | Self { 153 | query: Some(Self::all_query()), 154 | facets: None, 155 | limit: Self::default_limit(), 156 | sort_by: None, 157 | } 158 | } 159 | 160 | /// Another shortcut, but with a known limit 161 | pub fn all_limit(limit: usize) -> Self { 162 | let mut all = Self::all_docs(); 163 | all.limit = limit; 164 | all 165 | } 166 | } 167 | 168 | #[derive(Debug)] 169 | pub struct SearchBuilder { 170 | query: Query, 171 | facets: Option, 172 | limit: usize, 173 | sort_by: Option, 174 | } 175 | 176 | impl Default for SearchBuilder { 177 | fn default() -> Self { 178 | SearchBuilder::new() 179 | } 180 | } 181 | 182 | impl SearchBuilder { 183 | fn new() -> Self { 184 | Self { 185 | query: Query::All, 186 | facets: None, 187 | limit: Search::default_limit(), 188 | sort_by: None, 189 | } 190 | } 191 | 192 | pub fn with_query(mut self, query: Query) -> Self { 193 | self.query = query; 194 | self 195 | } 196 | pub fn with_facets(mut self, facets: FacetQuery) -> Self { 197 | self.facets = Some(facets); 198 | self 199 | } 200 | pub fn with_limit(mut self, limit: usize) -> Self { 201 | self.limit = limit; 202 | self 203 | } 204 | pub fn sort_by(mut self, field: V) -> Self 205 | where 206 | V: ToString, 207 | { 208 | self.sort_by = Some(field.to_string()); 209 | self 210 | } 211 | pub fn build(self) -> Search { 212 | Search::new(Some(self.query), self.facets, self.limit, self.sort_by) 213 | } 214 | } 215 | 216 | #[inline] 217 | fn make_field_value(schema: &Schema, k: &str, v: &str) -> crate::Result { 218 | let field = schema 219 | .get_field(k) 220 | .ok_or_else(|| Error::QueryError(format!("Unknown field: {}", k)))?; 221 | Ok(Term::from_field_text(field, v)) 222 | } 223 | 224 | /// A single key/value pair, this struct is used when we want to accept only single key/value pairs 225 | /// for a query and a Map would not allow that. 226 | #[derive(Debug, Clone)] 227 | pub struct KeyValue 228 | where 229 | K: DeserializeOwned, 230 | V: DeserializeOwned, 231 | { 232 | /// Key 233 | pub field: K, 234 | /// Value 235 | pub value: V, 236 | } 237 | 238 | impl KeyValue 239 | where 240 | K: DeserializeOwned, 241 | V: DeserializeOwned, 242 | { 243 | /// Construct a key value pair from known values 244 | pub fn new(field: K, value: V) -> Self { 245 | Self { field, value } 246 | } 247 | } 248 | 249 | struct KVVisitor 250 | where 251 | K: DeserializeOwned, 252 | V: DeserializeOwned, 253 | { 254 | marker: PhantomData KeyValue>, 255 | } 256 | 257 | impl KVVisitor 258 | where 259 | K: DeserializeOwned, 260 | V: DeserializeOwned, 261 | { 262 | fn new() -> Self { 263 | KVVisitor { marker: PhantomData } 264 | } 265 | } 266 | 267 | impl<'de, K, V> Visitor<'de> for KVVisitor 268 | where 269 | K: DeserializeOwned, 270 | V: DeserializeOwned, 271 | { 272 | type Value = KeyValue; 273 | 274 | fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { 275 | formatter.write_str("an object with a single string value of any key name") 276 | } 277 | 278 | fn visit_map(self, mut access: M) -> std::result::Result 279 | where 280 | M: MapAccess<'de>, 281 | { 282 | if let Some((field, value)) = access.next_entry()? { 283 | if access.next_entry::()?.is_some() { 284 | Err(M::Error::custom("too many values")) 285 | } else { 286 | Ok(KeyValue { field, value }) 287 | } 288 | } else { 289 | Err(M::Error::custom("not enough values")) 290 | } 291 | } 292 | } 293 | 294 | impl<'de, K, V> Deserialize<'de> for KeyValue 295 | where 296 | K: DeserializeOwned, 297 | V: DeserializeOwned, 298 | { 299 | fn deserialize(deserializer: D) -> std::result::Result 300 | where 301 | D: Deserializer<'de>, 302 | { 303 | deserializer.deserialize_map(KVVisitor::new()) 304 | } 305 | } 306 | 307 | impl<'de, K, V> Serialize for KeyValue 308 | where 309 | K: Serialize + DeserializeOwned, 310 | V: Serialize + DeserializeOwned, 311 | { 312 | fn serialize(&self, serializer: S) -> std::result::Result 313 | where 314 | S: Serializer, 315 | { 316 | let mut m = serializer.serialize_map(Some(1))?; 317 | m.serialize_entry(&self.field, &self.value)?; 318 | m.end() 319 | } 320 | } 321 | 322 | #[doc(hidden)] 323 | #[derive(Serialize, Deserialize, Debug, Clone)] 324 | pub struct FlatNamedDocument(pub DashMap); 325 | 326 | impl From for FlatNamedDocument { 327 | fn from(nfd: NamedFieldDocument) -> Self { 328 | let map = DashMap::with_capacity(nfd.0.len()); 329 | for (k, v) in nfd.0 { 330 | if v.len() == 1 { 331 | map.insert(k, serde_json::to_value(&v[0]).unwrap()); 332 | continue; 333 | } 334 | map.insert(k, serde_json::to_value(v).unwrap()); 335 | } 336 | FlatNamedDocument(map) 337 | } 338 | } 339 | 340 | #[cfg(test)] 341 | mod tests { 342 | use tantivy::schema::*; 343 | 344 | use super::*; 345 | 346 | #[test] 347 | fn test_doc_deserialize() { 348 | let mut schema_builder = Schema::builder(); 349 | let title = schema_builder.add_text_field("title", TEXT); 350 | let author = schema_builder.add_text_field("text", TEXT); 351 | let likes = schema_builder.add_u64_field("num_u64", FAST); 352 | let schema: Schema = schema_builder.build(); 353 | let doc = tantivy::doc!( 354 | title => "Life Aquatic", 355 | author => "Wes Anderson", 356 | likes => 4u64 357 | ); 358 | let named: FlatNamedDocument = schema.to_named_doc(&doc).into(); 359 | 360 | println!("{}", serde_json::to_string_pretty(&named).unwrap()); 361 | } 362 | 363 | #[test] 364 | fn test_kv_serialize() { 365 | let kv = KeyValue::new("test_field".to_string(), 1); 366 | let expected = r#"{"test_field":1}"#; 367 | assert_eq!(expected, serde_json::to_string(&kv).unwrap()); 368 | } 369 | 370 | #[test] 371 | fn test_builder() { 372 | let query_builder = FuzzyQuery::builder().for_field("text").with_distance(20).with_value("Hi!").build(); 373 | let builder = Search::builder().with_limit(50).with_query(query_builder).sort_by("text"); 374 | let query = builder.build(); 375 | 376 | assert!(query.query.is_some()); 377 | assert_eq!(query.limit, 50); 378 | assert!(query.sort_by.is_some()); 379 | assert_eq!(query.sort_by.unwrap(), "text"); 380 | } 381 | } 382 | -------------------------------------------------------------------------------- /toshi-types/src/query/boolean.rs: -------------------------------------------------------------------------------- 1 | use serde::{Deserialize, Serialize}; 2 | use tantivy::query::{BooleanQuery, Occur, Query as TQuery}; 3 | use tantivy::schema::Schema; 4 | 5 | use crate::error::Error; 6 | use crate::query::{CreateQuery, Query}; 7 | use crate::Result; 8 | 9 | /// A boolean query parallel to Tantivy's [`tantivy::query::BooleanQuery`]: BooleanQuery 10 | #[derive(Serialize, Deserialize, Debug, Clone)] 11 | pub struct BoolQuery { 12 | #[serde(default = "Vec::new")] 13 | must: Vec, 14 | #[serde(default = "Vec::new")] 15 | must_not: Vec, 16 | #[serde(default = "Vec::new")] 17 | should: Vec, 18 | #[serde(default)] 19 | minimum_should_match: Option, 20 | #[serde(default)] 21 | boost: Option, 22 | } 23 | 24 | impl BoolQuery { 25 | pub(crate) fn new( 26 | must: Vec, 27 | must_not: Vec, 28 | should: Vec, 29 | minimum_should_match: Option, 30 | boost: Option, 31 | ) -> Self { 32 | Self { 33 | must, 34 | must_not, 35 | should, 36 | minimum_should_match, 37 | boost, 38 | } 39 | } 40 | 41 | /// Create a builder instance for a BoolQuery 42 | pub fn builder() -> BoolQueryBuilder { 43 | BoolQueryBuilder::default() 44 | } 45 | } 46 | 47 | impl CreateQuery for BoolQuery { 48 | fn create_query(self, schema: &Schema) -> Result> { 49 | let mut all_queries: Vec<(Occur, Box)> = Vec::new(); 50 | if !self.must.is_empty() { 51 | all_queries.append(&mut parse_queries(schema, Occur::Must, self.must)?); 52 | } 53 | if !self.must_not.is_empty() { 54 | all_queries.append(&mut parse_queries(schema, Occur::MustNot, self.must_not)?); 55 | } 56 | if !self.should.is_empty() { 57 | all_queries.append(&mut parse_queries(schema, Occur::Should, self.should)?); 58 | } 59 | Ok(Box::new(BooleanQuery::from(all_queries))) 60 | } 61 | } 62 | 63 | fn parse_queries(schema: &Schema, occur: Occur, queries: Vec) -> Result)>> { 64 | queries 65 | .into_iter() 66 | .map(|q| match q { 67 | Query::Fuzzy(f) => Ok((occur, f.create_query(schema)?)), 68 | Query::Exact(q) => Ok((occur, q.create_query(schema)?)), 69 | Query::Range(r) => Ok((occur, r.create_query(schema)?)), 70 | Query::Phrase(p) => Ok((occur, p.create_query(schema)?)), 71 | Query::Regex(r) => Ok((occur, r.create_query(schema)?)), 72 | _ => Err(Error::QueryError("Invalid type for boolean query".into())), 73 | }) 74 | .collect::)>>>() 75 | } 76 | 77 | #[derive(Debug, Default)] 78 | pub struct BoolQueryBuilder { 79 | must: Vec, 80 | must_not: Vec, 81 | should: Vec, 82 | minimum_should_match: u64, 83 | boost: f64, 84 | } 85 | 86 | impl BoolQueryBuilder { 87 | pub fn new() -> Self { 88 | Self::default() 89 | } 90 | 91 | pub fn must_match(mut self, query: T) -> Self 92 | where 93 | T: Into, 94 | { 95 | self.must.push(query.into()); 96 | self 97 | } 98 | 99 | pub fn must_not_match(mut self, query: T) -> Self 100 | where 101 | T: Into, 102 | { 103 | self.must_not.push(query.into()); 104 | self 105 | } 106 | 107 | pub fn should_match(mut self, query: T) -> Self 108 | where 109 | T: Into, 110 | { 111 | self.should.push(query.into()); 112 | self 113 | } 114 | 115 | pub fn with_minimum_should_match(mut self, amount: u64) -> Self { 116 | self.minimum_should_match = amount; 117 | self 118 | } 119 | 120 | pub fn with_boost(mut self, amount: f64) -> Self { 121 | self.boost = amount; 122 | self 123 | } 124 | 125 | pub fn build(self) -> Query { 126 | Query::Boolean { 127 | bool: BoolQuery::new( 128 | self.must, 129 | self.must_not, 130 | self.should, 131 | Some(self.minimum_should_match), 132 | Some(self.boost), 133 | ), 134 | } 135 | } 136 | } 137 | 138 | #[cfg(test)] 139 | mod tests { 140 | use tantivy::schema::*; 141 | 142 | use crate::query::Search; 143 | use crate::{BoolQuery, PhraseQuery, RegexQuery, TermPair}; 144 | 145 | #[test] 146 | fn test_bool_query() { 147 | let test_json = r#" 148 | {"query": { 149 | "bool": { 150 | "must": [ {"term": {"user": "kimchy"}}, {"fuzzy": {"user": {"value": "kimchy", "distance": 214}}}, {"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}} ], 151 | "must_not": [ {"term": {"user": "kimchy"}}, {"range": {"age": {"gt": -10, "lte": 20}}}, {"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}} ], 152 | "should": [ {"term": {"user": "kimchy"}}, {"range": {"age": {"gte": 10, "lte": 20}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}},{"fuzzy": {"user": {"value": "kimchy", "distance": 214}}} ], 153 | "minimum_should_match": 1, 154 | "boost": 1.0 155 | } 156 | }, 157 | "limit": 10 158 | }"#; 159 | let mut builder = SchemaBuilder::new(); 160 | let _text_field = builder.add_text_field("user", STORED | TEXT); 161 | let _u_field = builder.add_i64_field("age", FAST); 162 | let _schema = builder.build(); 163 | 164 | let _result = serde_json::from_str::(test_json).unwrap(); 165 | } 166 | 167 | #[test] 168 | fn test_builder() { 169 | let phrase = PhraseQuery::with_phrase("test_text".into(), TermPair::new(vec!["blah".into()], None)); 170 | let regex = RegexQuery::from_str("test_text".into(), ".*"); 171 | BoolQuery::builder() 172 | .must_match(phrase.clone()) 173 | .should_match(regex) 174 | .must_not_match(phrase) 175 | .with_minimum_should_match(1) 176 | .with_boost(1.0) 177 | .build(); 178 | } 179 | } 180 | --------------------------------------------------------------------------------