├── .envrc ├── rustfmt.toml ├── src ├── web │ ├── assets │ │ ├── robots.txt │ │ ├── README │ │ ├── themes │ │ │ ├── README │ │ │ ├── catppuccin-macchiato.css │ │ │ ├── nord-bluish.css │ │ │ ├── discord.css │ │ │ ├── catppuccin-latte.css │ │ │ └── catppuccin-mocha.css │ │ ├── script.js │ │ ├── scripts │ │ │ └── colorpicker.js │ │ └── style.css │ ├── autocomplete.rs │ ├── opensearch.rs │ ├── search │ │ ├── images.rs │ │ └── all.rs │ ├── index.rs │ ├── image_proxy.rs │ ├── settings.rs │ ├── mod.rs │ └── search.rs ├── engines │ ├── search.rs │ ├── postsearch.rs │ ├── answer │ │ ├── ip.rs │ │ ├── notepad.rs │ │ ├── useragent.rs │ │ ├── wikipedia.rs │ │ ├── dictionary.rs │ │ ├── timezone.rs │ │ ├── fend.rs │ │ ├── numbat.rs │ │ └── thesaurus.rs │ ├── answer.rs │ ├── search │ │ ├── rightdao.rs │ │ ├── brave.rs │ │ ├── google_scholar.rs │ │ ├── stract.rs │ │ ├── yep.rs │ │ ├── marginalia.rs │ │ ├── bing.rs │ │ └── google.rs │ ├── postsearch │ │ ├── minecraft_wiki.rs │ │ ├── stackexchange.rs │ │ ├── docs_rs.rs │ │ ├── mdn.rs │ │ └── github.rs │ ├── macros.rs │ └── ranking.rs ├── build.rs ├── main.rs ├── parse.rs ├── urls.rs └── config.rs ├── .prettierrc ├── .gitignore ├── compose.yml ├── Containerfile ├── config-default.toml ├── Cargo.toml ├── flake.lock ├── module.nix ├── flake.nix ├── README └── LICENSE /.envrc: -------------------------------------------------------------------------------- 1 | use flake 2 | -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | wrap_comments = true -------------------------------------------------------------------------------- /src/web/assets/robots.txt: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: / 3 | -------------------------------------------------------------------------------- /.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "tabWidth": 2, 3 | "useTabs": false 4 | } 5 | -------------------------------------------------------------------------------- /src/engines/search.rs: -------------------------------------------------------------------------------- 1 | pub mod bing; 2 | pub mod brave; 3 | pub mod google; 4 | pub mod google_scholar; 5 | pub mod marginalia; 6 | pub mod rightdao; 7 | pub mod stract; 8 | pub mod yep; 9 | -------------------------------------------------------------------------------- /src/web/assets/README: -------------------------------------------------------------------------------- 1 | files added in this directory aren't automatically made accessible, you have to add them as a route 2 | in src/web/mod.rs (so the files are included in the binary) 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /config.toml 3 | 4 | # convenience script i use for deploying the site to my server, feel free to 5 | # write your own here too 6 | /deploy.sh 7 | 8 | # direnv (mostly used for Nix) 9 | .direnv/ 10 | -------------------------------------------------------------------------------- /compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.7' 2 | 3 | services: 4 | metasearch2: 5 | build: 6 | args: 7 | CONFIG: "/usr/local/bin/config.toml" 8 | context: . 9 | dockerfile: Containerfile 10 | network_mode: "host" 11 | restart: unless-stopped -------------------------------------------------------------------------------- /src/web/assets/themes/README: -------------------------------------------------------------------------------- 1 | How to add new themes: 2 | 3 | - Copy a similar theme in this directory and rename it to your theme's name and modify it as necessary 4 | - Add the route at `src/web/mod.rs` 5 | - Add it as an option in settings at `src/web/settings.rs` 6 | -------------------------------------------------------------------------------- /src/engines/postsearch.rs: -------------------------------------------------------------------------------- 1 | //! These search engines are requested after we've built the main search 2 | //! results. They can only show stuff in infoboxes and don't get requested if 3 | //! an infobox was added by another earlier engine. 4 | 5 | pub mod docs_rs; 6 | pub mod github; 7 | pub mod mdn; 8 | pub mod minecraft_wiki; 9 | pub mod stackexchange; 10 | -------------------------------------------------------------------------------- /src/engines/answer/ip.rs: -------------------------------------------------------------------------------- 1 | use maud::html; 2 | 3 | use crate::engines::{EngineResponse, SearchQuery}; 4 | 5 | use super::regex; 6 | 7 | pub async fn request(query: &SearchQuery) -> EngineResponse { 8 | if !regex!("^what('s|s| is) my ip").is_match(&query.query.to_lowercase()) { 9 | return EngineResponse::new(); 10 | } 11 | 12 | let ip = &query.ip; 13 | 14 | EngineResponse::answer_html(html! { 15 | h3 { b { (ip) } } 16 | }) 17 | } 18 | -------------------------------------------------------------------------------- /src/engines/answer.rs: -------------------------------------------------------------------------------- 1 | pub mod colorpicker; 2 | pub mod dictionary; 3 | pub mod fend; 4 | pub mod ip; 5 | pub mod notepad; 6 | pub mod numbat; 7 | pub mod thesaurus; 8 | pub mod timezone; 9 | pub mod useragent; 10 | pub mod wikipedia; 11 | 12 | macro_rules! regex { 13 | ($re:literal $(,)?) => {{ 14 | static RE: std::sync::LazyLock = 15 | std::sync::LazyLock::new(|| regex::Regex::new($re).unwrap()); 16 | &RE 17 | }}; 18 | } 19 | pub(crate) use regex; 20 | -------------------------------------------------------------------------------- /Containerfile: -------------------------------------------------------------------------------- 1 | FROM lukemathwalker/cargo-chef:latest-rust-1.91-alpine as chef 2 | WORKDIR /app 3 | 4 | FROM chef AS planner 5 | COPY . . 6 | RUN cargo chef prepare --recipe-path recipe.json 7 | 8 | FROM chef AS builder 9 | COPY --from=planner /app/recipe.json recipe.json 10 | RUN cargo chef cook --release --recipe-path recipe.json 11 | COPY . . 12 | RUN cargo build --release 13 | 14 | FROM alpine:latest AS runtime 15 | WORKDIR /app 16 | COPY --from=builder /app/config.toml /usr/local/bin/config.toml 17 | COPY --from=builder /app/target/release/metasearch /usr/local/bin/metasearch 18 | ARG CONFIG 19 | ENV CONFIG=${CONFIG} 20 | EXPOSE 28019 21 | ENTRYPOINT /usr/local/bin/metasearch $CONFIG 22 | -------------------------------------------------------------------------------- /src/engines/search/rightdao.rs: -------------------------------------------------------------------------------- 1 | use url::Url; 2 | 3 | use crate::{ 4 | engines::{EngineResponse, RequestResponse, CLIENT}, 5 | parse::{parse_html_response_with_opts, ParseOpts}, 6 | }; 7 | 8 | pub async fn request(query: &str) -> RequestResponse { 9 | CLIENT 10 | .get(Url::parse_with_params("https://rightdao.com/search", &[("q", query)]).unwrap()) 11 | .into() 12 | } 13 | 14 | pub fn parse_response(body: &str) -> eyre::Result { 15 | parse_html_response_with_opts( 16 | body, 17 | ParseOpts::new() 18 | .result("div.item") 19 | .title("div.title") 20 | .href("a[href]") 21 | .description("div.description"), 22 | ) 23 | } 24 | -------------------------------------------------------------------------------- /src/engines/answer/notepad.rs: -------------------------------------------------------------------------------- 1 | use maud::html; 2 | 3 | use crate::engines::{EngineResponse, SearchQuery}; 4 | 5 | use super::regex; 6 | 7 | pub async fn request(query: &SearchQuery) -> EngineResponse { 8 | if !regex!("^(note|text|code) ?(pad|book|edit(or|er)?)$").is_match(&query.query.to_lowercase()) 9 | { 10 | return EngineResponse::new(); 11 | } 12 | 13 | // This allows pasting styles which is undesired behavior, and the 14 | // `contenteditable="plaintext-only"` attribute currently only works on Chrome. 15 | // This should be updated when the attribute becomes available in more browsers 16 | EngineResponse::answer_html(html! { 17 | div.answer-notepad contenteditable {} 18 | }) 19 | } 20 | -------------------------------------------------------------------------------- /src/engines/search/brave.rs: -------------------------------------------------------------------------------- 1 | use url::Url; 2 | 3 | use crate::{ 4 | engines::{EngineResponse, RequestResponse, CLIENT}, 5 | parse::{parse_html_response_with_opts, ParseOpts}, 6 | }; 7 | 8 | pub async fn request(query: &str) -> RequestResponse { 9 | CLIENT 10 | .get(Url::parse_with_params("https://search.brave.com/search", &[("q", query)]).unwrap()) 11 | .into() 12 | } 13 | 14 | pub fn parse_response(body: &str) -> eyre::Result { 15 | parse_html_response_with_opts( 16 | body, 17 | ParseOpts::new() 18 | .result("#results > .snippet[data-pos]:not(.standalone)") 19 | .title(".title") 20 | .href("a") 21 | .description(".generic-snippet, .video-snippet > .snippet-description"), 22 | ) 23 | } 24 | -------------------------------------------------------------------------------- /src/web/assets/themes/catppuccin-macchiato.css: -------------------------------------------------------------------------------- 1 | :root { 2 | --bg-1: #24273a; /* background */ 3 | --bg-2: #1e2030; /* mantle */ 4 | --bg-3: #181926; /* crust */ 5 | --bg-4: #363a4e; /* surface 0 */ 6 | --fg-1: #cad3f5; /* text */ 7 | --fg-2: #b8c0e0; /* subtext 1 */ 8 | --fg-2: #a5adcb; /* subtext 0 */ 9 | --accent: #c6a0f6; /* mauve */ 10 | --positive: #a6da95; /* green */ 11 | --negative: #ed8796; /* red */ 12 | 13 | --syntax-string: #a6da95; /* green (string) */ 14 | --syntax-special: #c69ff5; /* mauve (keyword) */ 15 | --syntax-constant: #f5a97f; /* peach (constant/number) */ 16 | --syntax-comment: #929ab7; /* overlay 2 (comment) */ 17 | --syntax-func: #8aadf3; /* blue (method/function) */ 18 | 19 | --link: var(--accent); 20 | --link-visited: var(--link); 21 | } 22 | -------------------------------------------------------------------------------- /src/build.rs: -------------------------------------------------------------------------------- 1 | use std::process::Command; 2 | 3 | fn main() { 4 | println!("cargo:rustc-rerun-if-changed=.git/HEAD"); 5 | let output = Command::new("git").args(["rev-parse", "HEAD"]).output(); 6 | let git_hash = match output { 7 | Ok(output) => String::from_utf8(output.stdout).unwrap_or("unknown".into()), 8 | Err(_) => "unknown".into(), 9 | }; 10 | let output = Command::new("git") 11 | .args(["rev-parse", "--short", "HEAD"]) 12 | .output(); 13 | let git_hash_short = match output { 14 | Ok(output) => String::from_utf8(output.stdout).unwrap_or("unknown".into()), 15 | Err(_) => "unknown".into(), 16 | }; 17 | println!("cargo:rustc-env=GIT_HASH={git_hash}"); 18 | println!("cargo:rustc-env=GIT_HASH_SHORT={git_hash_short}"); 19 | } 20 | -------------------------------------------------------------------------------- /src/web/assets/themes/nord-bluish.css: -------------------------------------------------------------------------------- 1 | :root { 2 | /* body background */ 3 | --bg-1: #2e3440; 4 | /* background of the content */ 5 | --bg-2: #3b4252; 6 | /* input suggestions background */ 7 | --bg-3: #434c5e; 8 | /* mostly borders */ 9 | --bg-4: #5e81ac; 10 | 11 | /* main text color */ 12 | --fg-1: #eceff4; 13 | /* search result description */ 14 | --fg-2: #e5e9f0; 15 | --fg-3: #d8dee9; 16 | 17 | /* focus outline */ 18 | --accent: #88c0d0; 19 | 20 | --link: #88c0d0; 21 | --link-visited: #5e81ac; 22 | 23 | /* green */ 24 | --positive: #a3be8c; 25 | /* red, error */ 26 | --negative: #bf616a; 27 | 28 | --syntax-string: #a3be8c; 29 | --syntax-special: #81a1c1; 30 | --syntax-constant: #d8dee9; 31 | --syntax-comment: #616e88; 32 | --syntax-func: #88c0d0; 33 | } 34 | -------------------------------------------------------------------------------- /src/web/assets/themes/discord.css: -------------------------------------------------------------------------------- 1 | :root { 2 | /* body background */ 3 | --bg-1: #313338; 4 | /* background of the content */ 5 | --bg-2: #2b2d31; 6 | /* input suggestions background */ 7 | --bg-3: #1e1f22; 8 | /* mostly borders */ 9 | --bg-4: transparent; 10 | 11 | /* main text color */ 12 | --fg-1: #dbdee1; 13 | /* search result description */ 14 | --fg-2: #b5bac1; 15 | --fg-3: #949ba4; 16 | 17 | /* focus outline */ 18 | --accent: #41434a; 19 | 20 | --link: #00a8fc; 21 | --link-visited: #2eb6ff; 22 | 23 | /* green, success */ 24 | --positive: #23a559; 25 | /* red, error */ 26 | --negative: #fa777c; 27 | 28 | --syntax-string: #aad94c; 29 | --syntax-special: #e6b673; 30 | --syntax-constant: #d2a6ff; 31 | --syntax-comment: #acb6bf8c; 32 | --syntax-func: #ffb454; 33 | } 34 | -------------------------------------------------------------------------------- /src/web/assets/themes/catppuccin-latte.css: -------------------------------------------------------------------------------- 1 | :root { 2 | /* body background */ 3 | --bg-1: #dce0e8; 4 | /* background of the content */ 5 | --bg-2: #e6e9ef; 6 | /* input suggestions background */ 7 | --bg-3: #eff1f5; 8 | /* mostly borders */ 9 | --bg-4: #9ca0b0; 10 | 11 | /* main text color */ 12 | --fg-1: #4c4f69; 13 | /* search result description */ 14 | --fg-2: #4c4f69; 15 | --fg-3: #5c5f77; 16 | 17 | /* focus outline */ 18 | --accent: #7287fd; 19 | 20 | --link: #1e66f5; 21 | --link-visited: #8839ef; 22 | 23 | /* green, success */ 24 | --positive: #40a02b; 25 | /* red, error */ 26 | --negative: #e64553; 27 | 28 | --syntax-string: #40a02b; 29 | --syntax-special: #dc8a78; 30 | --syntax-constant: #ea76cb; 31 | --syntax-comment: #5c5f77; 32 | --syntax-func: #7287fd; 33 | } 34 | -------------------------------------------------------------------------------- /src/web/assets/themes/catppuccin-mocha.css: -------------------------------------------------------------------------------- 1 | :root { 2 | /* body background */ 3 | --bg-1: #11111b; 4 | /* background of the content */ 5 | --bg-2: #181825; 6 | /* input suggestions background */ 7 | --bg-3: #1e1e2e; 8 | /* mostly borders */ 9 | --bg-4: #313244; 10 | 11 | /* main text color */ 12 | --fg-1: #cdd6f4; 13 | /* search result description */ 14 | --fg-2: #bac2de; 15 | --fg-3: #a6adc8; 16 | 17 | /* focus outline */ 18 | --accent: #b4befe; 19 | 20 | --link: #89b4fa; 21 | --link-visited: #bc78f8; 22 | 23 | /* green, success */ 24 | --positive: #7fd962; 25 | /* red, error */ 26 | --negative: #f38ba8; 27 | 28 | --syntax-string: #a6e3a1; 29 | --syntax-special: #fab387; 30 | --syntax-constant: #cba6f7; 31 | --syntax-comment: #6c7086; 32 | --syntax-func: #89b4fa; 33 | } 34 | -------------------------------------------------------------------------------- /src/web/autocomplete.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | 3 | use axum::{extract::Query, http::StatusCode, response::IntoResponse, Extension, Json}; 4 | use tracing::error; 5 | 6 | use crate::{config::Config, engines}; 7 | 8 | pub async fn route( 9 | Query(params): Query>, 10 | Extension(config): Extension, 11 | ) -> impl IntoResponse { 12 | let query = params 13 | .get("q") 14 | .cloned() 15 | .unwrap_or_default() 16 | .replace('\n', " "); 17 | 18 | let res = match engines::autocomplete(&config, &query).await { 19 | Ok(res) => res, 20 | Err(err) => { 21 | error!("Autocomplete error for {query}: {err}"); 22 | return (StatusCode::INTERNAL_SERVER_ERROR, Json((query, vec![]))); 23 | } 24 | }; 25 | 26 | (StatusCode::OK, Json((query, res))) 27 | } 28 | -------------------------------------------------------------------------------- /src/engines/search/google_scholar.rs: -------------------------------------------------------------------------------- 1 | use url::Url; 2 | 3 | use crate::{ 4 | engines::{EngineResponse, RequestResponse, CLIENT}, 5 | parse::{parse_html_response_with_opts, ParseOpts}, 6 | }; 7 | 8 | pub async fn request(query: &str) -> RequestResponse { 9 | CLIENT 10 | .get( 11 | Url::parse_with_params( 12 | "https://scholar.google.com/scholar", 13 | &[("hl", "en"), ("as_sdt", "0,5"), ("q", query), ("btnG", "")], 14 | ) 15 | .unwrap(), 16 | ) 17 | .into() 18 | } 19 | 20 | pub fn parse_response(body: &str) -> eyre::Result { 21 | parse_html_response_with_opts( 22 | body, 23 | ParseOpts::new() 24 | .result("div.gs_r") 25 | .title("h3") 26 | .href("h3 > a[href]") 27 | .description("div.gs_rs"), 28 | ) 29 | } 30 | -------------------------------------------------------------------------------- /config-default.toml: -------------------------------------------------------------------------------- 1 | # See src/config.rs for all of the possible options. 2 | # The commented-out lines are examples of values you could set, not the defaults. 3 | 4 | bind = "0.0.0.0:28019" 5 | api = false 6 | 7 | [ui] 8 | # engine_list_separator = true 9 | # show_version_info = true 10 | # stylesheet_url = "/themes/catppuccin-mocha.css" 11 | # favicon_url = "" 12 | 13 | [image_search] 14 | # enabled = true 15 | 16 | [engines] 17 | # numbat = false 18 | # fend = true 19 | 20 | [urls.replace] 21 | # "www.reddit.com" = "old.reddit.com" 22 | # "medium.com" = "scribe.rip" 23 | # ".medium.com" = "scribe.rip" 24 | 25 | [urls.weight] 26 | # These are checked after applying replacements. Setting the weight to 0 (or any 27 | # negative number) completely hides the result. Longer matches have a higher 28 | # priority. 29 | # "quora.com" = 0.1 30 | # ".quora.com" = 0.1 31 | -------------------------------------------------------------------------------- /src/web/opensearch.rs: -------------------------------------------------------------------------------- 1 | use axum::{ 2 | http::{header, HeaderMap}, 3 | response::IntoResponse, 4 | }; 5 | use maud::{html, PreEscaped}; 6 | 7 | pub async fn route(headers: HeaderMap) -> impl IntoResponse { 8 | let host = headers 9 | .get("host") 10 | .and_then(|host| host.to_str().ok()) 11 | .unwrap_or("localhost"); 12 | 13 | ( 14 | [( 15 | header::CONTENT_TYPE, 16 | "application/opensearchdescription+xml", 17 | )], 18 | html! { 19 | (PreEscaped(r#""#)) 20 | OpenSearchDescription xmlns="http://a9.com/-/spec/opensearch/1.1/" { 21 | ShortName { "metasearch" } 22 | Description { "Search metasearch" } 23 | InputEncoding { "UTF-8" } 24 | Url type="text/html" method="get" template=(format!("https://{host}/search?q={{searchTerms}}")) {} 25 | Url type="application/x-suggestions+json" method="get" template=(format!("https://{host}/autocomplete?q={{searchTerms}}")) {} 26 | } 27 | }.into_string(), 28 | ) 29 | } 30 | -------------------------------------------------------------------------------- /src/engines/answer/useragent.rs: -------------------------------------------------------------------------------- 1 | use maud::html; 2 | 3 | use crate::engines::{EngineResponse, SearchQuery}; 4 | 5 | use super::regex; 6 | 7 | pub async fn request(query: &SearchQuery) -> EngineResponse { 8 | if !regex!("^(what('s|s| is) my (user ?agent|ua)|ua|user ?agent)$") 9 | .is_match(&query.query.to_lowercase()) 10 | { 11 | return EngineResponse::new(); 12 | } 13 | 14 | let user_agent = query.request_headers.get("user-agent"); 15 | 16 | let all_headers_html = html! { 17 | br; 18 | details { 19 | summary { "All headers" } 20 | @for (header, value) in query.request_headers.iter() { 21 | div { 22 | b { (header) } ": " (value) 23 | } 24 | } 25 | } 26 | }; 27 | 28 | EngineResponse::answer_html(if let Some(user_agent) = user_agent { 29 | html! { 30 | h3 { b { (user_agent) } } 31 | (all_headers_html) 32 | } 33 | } else { 34 | html! { 35 | "You don't have a user agent" 36 | (all_headers_html) 37 | } 38 | }) 39 | } 40 | -------------------------------------------------------------------------------- /src/engines/search/stract.rs: -------------------------------------------------------------------------------- 1 | use url::Url; 2 | 3 | use crate::{ 4 | engines::{EngineResponse, RequestResponse, CLIENT}, 5 | parse::{parse_html_response_with_opts, ParseOpts}, 6 | }; 7 | 8 | pub async fn request(query: &str) -> RequestResponse { 9 | CLIENT 10 | .get( 11 | Url::parse_with_params( 12 | "https://stract.com/search", 13 | &[ 14 | ("ss", "false"), 15 | // this is not a tracking parameter or token 16 | // this is stract's default value for the search rankings parameter 17 | ("sr", "N4IgNglg1gpgJiAXAbQLoBoRwgZ0rBFDEAIzAHsBjApNAXyA"), 18 | ("q", query), 19 | ("optic", ""), 20 | ], 21 | ) 22 | .unwrap(), 23 | ) 24 | .into() 25 | } 26 | 27 | pub fn parse_response(body: &str) -> eyre::Result { 28 | parse_html_response_with_opts( 29 | body, 30 | ParseOpts::new() 31 | .result("div.grid.w-full.grid-cols-1.space-y-10.place-self-start > div > div.flex.min-w-0.grow.flex-col") 32 | .title("a[title]") 33 | .href("a[href]") 34 | .description("#snippet-text"), 35 | ) 36 | } 37 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "metasearch" 3 | version = "0.2.4" 4 | edition = "2021" 5 | build = "src/build.rs" 6 | description = "a cute metasearch engine" 7 | license = "CC0-1.0" 8 | repository = "https://github.com/mat-1/metasearch2" 9 | 10 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 11 | 12 | [dependencies] 13 | ammonia = "4.1.2" 14 | async-stream = "0.3.6" 15 | axum = { version = "0.8.7", default-features = false, features = [ 16 | "tokio", 17 | "http1", 18 | "http2", 19 | "query", 20 | "json", 21 | "form", 22 | ] } 23 | axum-extra = { version = "0.12.2", features = ["cookie"] } 24 | base64 = "0.22.1" 25 | bytes = "1.11.0" 26 | chrono = "0.4.42" 27 | chrono-tz = { version = "0.10.4", features = ["case-insensitive"] } 28 | eyre = "0.6.12" 29 | fend-core = "1.5.7" 30 | futures = "0.3.31" 31 | html-escape = "0.2.13" 32 | maud = "0.27.0" 33 | numbat = "1.16.0" 34 | parking_lot = "0.12.5" 35 | rand = "0.9.2" 36 | regex = "1.12.2" 37 | scraper = "0.24.0" 38 | serde = { version = "1.0.228", features = ["derive"] } 39 | # preserve_order is needed for google images. yippee! 40 | serde_json = { version = "1.0.145", features = ["preserve_order"] } 41 | tokio = { version = "1.48.0", features = ["rt", "macros"] } 42 | tokio-stream = "0.1.17" 43 | toml = { version = "0.9.8", default-features = false, features = [ 44 | "std", 45 | "parse", 46 | "serde", 47 | ] } 48 | tower = "0.5.2" 49 | tower-http = "0.6.6" 50 | tracing = "0.1.41" 51 | tracing-subscriber = "0.3.20" 52 | url = "2.5.7" 53 | urlencoding = "2.1.3" 54 | wreq = { version = "5.3.0", features = [ 55 | "brotli", 56 | "cookies", 57 | "deflate", 58 | "gzip", 59 | "zstd", 60 | ] } 61 | wreq-util = "2.2.6" 62 | -------------------------------------------------------------------------------- /flake.lock: -------------------------------------------------------------------------------- 1 | { 2 | "nodes": { 3 | "crane": { 4 | "locked": { 5 | "lastModified": 1729273024, 6 | "narHash": "sha256-Mb5SemVsootkn4Q2IiY0rr9vrXdCCpQ9HnZeD/J3uXs=", 7 | "owner": "ipetkov", 8 | "repo": "crane", 9 | "rev": "fa8b7445ddadc37850ed222718ca86622be01967", 10 | "type": "github" 11 | }, 12 | "original": { 13 | "owner": "ipetkov", 14 | "repo": "crane", 15 | "type": "github" 16 | } 17 | }, 18 | "flake-parts": { 19 | "inputs": { 20 | "nixpkgs-lib": [ 21 | "nixpkgs" 22 | ] 23 | }, 24 | "locked": { 25 | "lastModified": 1733312601, 26 | "narHash": "sha256-4pDvzqnegAfRkPwO3wmwBhVi/Sye1mzps0zHWYnP88c=", 27 | "owner": "hercules-ci", 28 | "repo": "flake-parts", 29 | "rev": "205b12d8b7cd4802fbcb8e8ef6a0f1408781a4f9", 30 | "type": "github" 31 | }, 32 | "original": { 33 | "owner": "hercules-ci", 34 | "repo": "flake-parts", 35 | "type": "github" 36 | } 37 | }, 38 | "nixpkgs": { 39 | "locked": { 40 | "lastModified": 1729265718, 41 | "narHash": "sha256-4HQI+6LsO3kpWTYuVGIzhJs1cetFcwT7quWCk/6rqeo=", 42 | "owner": "NixOS", 43 | "repo": "nixpkgs", 44 | "rev": "ccc0c2126893dd20963580b6478d1a10a4512185", 45 | "type": "github" 46 | }, 47 | "original": { 48 | "owner": "NixOS", 49 | "ref": "nixpkgs-unstable", 50 | "repo": "nixpkgs", 51 | "type": "github" 52 | } 53 | }, 54 | "root": { 55 | "inputs": { 56 | "crane": "crane", 57 | "flake-parts": "flake-parts", 58 | "nixpkgs": "nixpkgs" 59 | } 60 | } 61 | }, 62 | "root": "root", 63 | "version": 7 64 | } 65 | -------------------------------------------------------------------------------- /src/engines/postsearch/minecraft_wiki.rs: -------------------------------------------------------------------------------- 1 | use maud::{html, PreEscaped}; 2 | use scraper::{Html, Selector}; 3 | 4 | use crate::engines::{HttpResponse, Response, CLIENT}; 5 | 6 | pub async fn request(response: &Response) -> Option { 7 | for search_result in response.search_results.iter().take(8) { 8 | if search_result 9 | .result 10 | .url 11 | .starts_with("https://minecraft.wiki/w/") 12 | { 13 | return Some(CLIENT.get(search_result.result.url.as_str())); 14 | } 15 | } 16 | 17 | None 18 | } 19 | 20 | pub fn parse_response(HttpResponse { res, body, .. }: &HttpResponse) -> Option> { 21 | let url = res.url().clone(); 22 | 23 | let dom = Html::parse_document(body); 24 | 25 | let page_title = dom 26 | .select(&Selector::parse("#firstHeading").unwrap()) 27 | .next()? 28 | .text() 29 | .collect::() 30 | .trim() 31 | .to_string(); 32 | 33 | let doc_query = Selector::parse(".mw-parser-output > p").unwrap(); 34 | 35 | let doc_html = dom 36 | .select(&doc_query) 37 | .next() 38 | .map(|doc| doc.html()) 39 | .unwrap_or_default(); 40 | 41 | let doc_html = ammonia::Builder::default() 42 | .link_rel(None) 43 | .add_allowed_classes("div", ["notaninfobox", "mcw-mainpage-icon"]) 44 | .add_allowed_classes("pre", ["noexcerpt", "navigation-not-searchable"]) 45 | .url_relative(ammonia::UrlRelative::RewriteWithBase(url.clone())) 46 | .clean(&doc_html) 47 | .to_string(); 48 | 49 | Some(html! { 50 | h2 { 51 | a href=(url) { (page_title) } 52 | } 53 | div.infobox-minecraft_wiki-article { 54 | (PreEscaped(doc_html)) 55 | } 56 | }) 57 | } 58 | -------------------------------------------------------------------------------- /src/web/search/images.rs: -------------------------------------------------------------------------------- 1 | use maud::{html, PreEscaped}; 2 | 3 | use crate::{ 4 | config::Config, 5 | engines::{self, EngineImageResult, ImagesResponse}, 6 | web::search::render_engine_list, 7 | }; 8 | 9 | pub fn render_results(response: ImagesResponse) -> PreEscaped { 10 | html! { 11 | div.image-results { 12 | @for image in &response.image_results { 13 | (render_image_result(image, &response.config)) 14 | } 15 | } 16 | } 17 | } 18 | 19 | fn render_image_result( 20 | result: &engines::SearchResult, 21 | config: &Config, 22 | ) -> PreEscaped { 23 | let original_image_src = &result.result.image_url; 24 | let image_src = if config.image_search.proxy.enabled { 25 | // serialize url params 26 | let escaped_param = 27 | url::form_urlencoded::byte_serialize(original_image_src.as_bytes()).collect::(); 28 | format!("/image-proxy?url={escaped_param}") 29 | } else { 30 | original_image_src.to_string() 31 | }; 32 | html! { 33 | div.image-result { 34 | a.image-result-anchor rel="noreferrer" href=(original_image_src) target="_blank" { 35 | div.image-result-img-container { 36 | img loading="lazy" src=(image_src) width=(result.result.width) height=(result.result.height); 37 | } 38 | } 39 | a.image-result-page-anchor href=(result.result.page_url) { 40 | span.image-result-page-url.search-result-url { (result.result.page_url) } 41 | span.image-result-title { (result.result.title) } 42 | } 43 | @if config.image_search.show_engines { 44 | {(render_engine_list(&result.engines.iter().copied().collect::>(), config))} 45 | } 46 | } 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/engines/search/yep.rs: -------------------------------------------------------------------------------- 1 | use serde::Deserialize; 2 | use url::Url; 3 | 4 | use crate::engines::{EngineResponse, EngineSearchResult, RequestResponse, CLIENT}; 5 | 6 | pub async fn request(query: &str) -> RequestResponse { 7 | CLIENT 8 | .get( 9 | Url::parse_with_params( 10 | "https://api.yep.com/fs/2/search", 11 | &[ 12 | ("client", "web"), 13 | ("gl", "all"), 14 | ("no_correct", "true"), 15 | ("q", query), 16 | ("safeSearch", "off"), 17 | ("type", "web"), 18 | ], 19 | ) 20 | .unwrap(), 21 | ) 22 | .into() 23 | } 24 | 25 | #[derive(Deserialize, Debug)] 26 | struct YepApiResponse { 27 | pub results: Vec, 28 | } 29 | 30 | #[derive(Deserialize, Debug)] 31 | struct YepApiResponseResult { 32 | pub url: String, 33 | pub title: String, 34 | pub snippet: String, 35 | } 36 | 37 | pub fn parse_response(body: &str) -> eyre::Result { 38 | let (code, response): (String, YepApiResponse) = serde_json::from_str(body)?; 39 | if &code != "Ok" { 40 | return Ok(EngineResponse::new()); 41 | } 42 | 43 | let search_results = response 44 | .results 45 | .into_iter() 46 | .map(|result| { 47 | let description_html = scraper::Html::parse_document(&result.snippet); 48 | let description = description_html.root_element().text().collect(); 49 | EngineSearchResult { 50 | url: result.url, 51 | title: result.title, 52 | description, 53 | } 54 | }) 55 | .collect(); 56 | 57 | Ok(EngineResponse { 58 | search_results, 59 | featured_snippet: None, 60 | answer_html: None, 61 | infobox_html: None, 62 | }) 63 | } 64 | -------------------------------------------------------------------------------- /src/engines/search/marginalia.rs: -------------------------------------------------------------------------------- 1 | use serde::Deserialize; 2 | use tracing::error; 3 | use url::Url; 4 | 5 | use crate::{ 6 | engines::{Engine, EngineResponse, RequestResponse, SearchQuery, CLIENT}, 7 | parse::{parse_html_response_with_opts, ParseOpts}, 8 | }; 9 | 10 | #[derive(Deserialize)] 11 | pub struct MarginaliaConfig { 12 | pub args: MarginaliaArgs, 13 | } 14 | #[derive(Deserialize)] 15 | pub struct MarginaliaArgs { 16 | pub profile: String, 17 | pub js: String, 18 | pub adtech: String, 19 | } 20 | 21 | pub async fn request(query: &SearchQuery) -> RequestResponse { 22 | // if the query is more than 3 words or has any special characters then abort 23 | if query.split_whitespace().count() > 3 24 | || !query.chars().all(|c| c.is_ascii_alphanumeric() || c == ' ') 25 | { 26 | return RequestResponse::None; 27 | } 28 | 29 | let config_toml = query.config.engines.get(Engine::Marginalia).extra.clone(); 30 | let config: MarginaliaConfig = match toml::Value::Table(config_toml).try_into() { 31 | Ok(args) => args, 32 | Err(err) => { 33 | error!("Failed to parse Marginalia config: {err}"); 34 | return RequestResponse::None; 35 | } 36 | }; 37 | 38 | CLIENT 39 | .get( 40 | Url::parse_with_params( 41 | "https://old-search.marginalia.nu/search", 42 | &[ 43 | ("query", query.query.as_str()), 44 | ("profile", config.args.profile.as_str()), 45 | ("js", config.args.js.as_str()), 46 | ("adtech", config.args.adtech.as_str()), 47 | ], 48 | ) 49 | .unwrap(), 50 | ) 51 | .into() 52 | } 53 | 54 | pub fn parse_response(body: &str) -> eyre::Result { 55 | parse_html_response_with_opts( 56 | body, 57 | ParseOpts::new() 58 | .result("section.search-result") 59 | .title("h2") 60 | .href("a[href]") 61 | .description("p.description"), 62 | ) 63 | } 64 | -------------------------------------------------------------------------------- /module.nix: -------------------------------------------------------------------------------- 1 | self: { 2 | config, 3 | pkgs, 4 | lib, 5 | ... 6 | }: let 7 | cfg = config.services.metasearch; 8 | port = 9 | if lib.hasAttr "bind" cfg.settings 10 | then lib.toInt (builtins.elemAt (lib.splitString ":" cfg.settings.bind) 1) 11 | else 28019; 12 | 13 | settingArg = 14 | if cfg.settings != {} 15 | then " " + pkgs.writers.writeTOML "metasearch.toml" cfg.settings 16 | else ""; 17 | 18 | loggingArg = 19 | if !cfg.enableLogging 20 | then " > /dev/null" 21 | else ""; 22 | in { 23 | options.services.metasearch = { 24 | enable = lib.mkEnableOption "metasearch"; 25 | openFirewall = lib.mkOption { 26 | type = lib.types.bool; 27 | default = false; 28 | description = '' 29 | Open firewall ports used by metasearch. 30 | ''; 31 | }; 32 | enableLogging = lib.mkOption { 33 | type = lib.types.bool; 34 | default = false; 35 | description = '' 36 | Enable metasearch logging. Does not affect stderr. 37 | ''; 38 | }; 39 | settings = lib.mkOption { 40 | type = lib.types.attrs; 41 | default = {}; 42 | description = '' 43 | Optional metasearch configuration. If not defined, defaults in `src/config.rs` will be used 44 | ''; 45 | example = { 46 | bind = "0.0.0.0:4444"; 47 | ui.show_version_info = true; 48 | urls = { 49 | replace = { 50 | "www.reddit.com" = "old.reddit.com"; 51 | }; 52 | 53 | weight = { 54 | "quora.com" = 0.1; 55 | }; 56 | }; 57 | }; 58 | }; 59 | }; 60 | 61 | config = lib.mkIf cfg.enable { 62 | systemd.services.metasearch = { 63 | wantedBy = ["multi-user.target"]; 64 | after = ["network.target"]; 65 | description = "a cute metasearch engine"; 66 | serviceConfig = { 67 | ExecStart = "${self.packages.${pkgs.system}.default}/bin/metasearch" + settingArg + loggingArg; 68 | }; 69 | }; 70 | 71 | networking.firewall = lib.mkIf cfg.openFirewall { 72 | allowedTCPPorts = [port]; 73 | }; 74 | }; 75 | } 76 | -------------------------------------------------------------------------------- /src/web/index.rs: -------------------------------------------------------------------------------- 1 | use axum::{http::header, response::IntoResponse, Extension}; 2 | use maud::{html, PreEscaped, DOCTYPE}; 3 | 4 | use crate::{config::Config, web::head_html}; 5 | 6 | const BASE_COMMIT_URL: &str = "https://github.com/mat-1/metasearch2/commit/"; 7 | const VERSION: &str = std::env!("CARGO_PKG_VERSION"); 8 | const COMMIT_HASH: &str = std::env!("GIT_HASH"); 9 | const COMMIT_HASH_SHORT: &str = std::env!("GIT_HASH_SHORT"); 10 | 11 | pub async fn get(Extension(config): Extension) -> impl IntoResponse { 12 | let html = html! { 13 | (PreEscaped("\n")) 14 | (DOCTYPE) 15 | html lang="en" { 16 | {(head_html(None, &config))} 17 | body { 18 | @if config.ui.show_settings_link { 19 | a.settings-link href="/settings" { "Settings" } 20 | } 21 | div.main-container.index-page { 22 | h1 { {(config.ui.site_name)} } 23 | form.search-form action="/search" method="get" { 24 | input type="text" name="q" placeholder="Search" id="search-input" autofocus onfocus="this.select()" autocomplete="off"; 25 | input type="submit" value="Search"; 26 | } 27 | } 28 | @if config.ui.show_version_info { 29 | span.version-info { 30 | @if COMMIT_HASH == "unknown" || COMMIT_HASH_SHORT == "unknown" { 31 | "Version " 32 | (VERSION) 33 | } @else { 34 | "Version " 35 | (VERSION) 36 | " (" 37 | a href=(format!("{BASE_COMMIT_URL}{COMMIT_HASH}")) { (COMMIT_HASH_SHORT) } 38 | ")" 39 | } 40 | } 41 | } 42 | } 43 | } 44 | } 45 | .into_string(); 46 | 47 | ([(header::CONTENT_TYPE, "text/html; charset=utf-8")], html) 48 | } 49 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | env, 3 | path::{Path, PathBuf}, 4 | }; 5 | 6 | use config::Config; 7 | use tracing::error; 8 | 9 | pub mod config; 10 | pub mod engines; 11 | pub mod parse; 12 | pub mod urls; 13 | pub mod web; 14 | 15 | #[tokio::main(flavor = "current_thread")] 16 | async fn main() { 17 | tracing_subscriber::fmt::init(); 18 | 19 | if env::args().any(|arg| arg == "--help" || arg == "-h" || arg == "help" || arg == "h") { 20 | println!("Usage: metasearch [config_path]"); 21 | return; 22 | } 23 | 24 | let config_path = config_path(); 25 | let config = match Config::read_or_create(&config_path) { 26 | Ok(config) => config, 27 | Err(err) => { 28 | error!("Couldn't parse config:\n{err}"); 29 | return; 30 | } 31 | }; 32 | web::run(config).await; 33 | } 34 | 35 | fn config_path() -> PathBuf { 36 | if let Some(config_path) = env::args().nth(1) { 37 | return PathBuf::from(config_path); 38 | } 39 | 40 | let app_name = env!("CARGO_PKG_NAME"); 41 | 42 | let mut default_config_dir = None; 43 | 44 | // $XDG_CONFIG_HOME/metasearch/config.toml 45 | if let Ok(xdg_config_home) = env::var("XDG_CONFIG_HOME") { 46 | let path = PathBuf::from(xdg_config_home) 47 | .join(app_name) 48 | .join("config.toml"); 49 | if path.is_file() { 50 | return path; 51 | } 52 | if default_config_dir.is_none() { 53 | default_config_dir = Some(path); 54 | } 55 | } 56 | 57 | // $HOME/.config/metasearch/config.toml 58 | if let Ok(home) = env::var("HOME") { 59 | let path = PathBuf::from(home) 60 | .join(".config") 61 | .join(app_name) 62 | .join("config.toml"); 63 | if path.is_file() { 64 | return path; 65 | } 66 | if default_config_dir.is_none() { 67 | default_config_dir = Some(path); 68 | } 69 | } 70 | 71 | // ./config.toml 72 | let path = Path::new("config.toml"); 73 | if path.exists() { 74 | return path.to_path_buf(); 75 | } 76 | default_config_dir.unwrap_or(PathBuf::from("config.toml")) 77 | } 78 | -------------------------------------------------------------------------------- /src/engines/postsearch/stackexchange.rs: -------------------------------------------------------------------------------- 1 | use maud::{html, PreEscaped}; 2 | use scraper::{Html, Selector}; 3 | use url::Url; 4 | 5 | use crate::engines::{answer::regex, Response, CLIENT}; 6 | 7 | pub async fn request(response: &Response) -> Option { 8 | for search_result in response.search_results.iter().take(8) { 9 | if regex!(r"^https:\/\/(stackoverflow\.com|serverfault\.com|superuser\.com|\w{1,}\.stackexchange\.com)\/questions\/\d+") 10 | .is_match(&search_result.result.url) 11 | { 12 | return Some(CLIENT.get(search_result.result.url.as_str())); 13 | } 14 | } 15 | 16 | None 17 | } 18 | 19 | pub fn parse_response(body: &str) -> Option> { 20 | let dom = Html::parse_document(body); 21 | 22 | let title = dom 23 | .select(&Selector::parse("h1").unwrap()) 24 | .next()? 25 | .text() 26 | .collect::(); 27 | 28 | let base_url = dom 29 | .select(&Selector::parse("link[rel=canonical]").unwrap()) 30 | .next()? 31 | .value() 32 | .attr("href")?; 33 | let url = Url::join( 34 | &Url::parse(base_url).unwrap(), 35 | dom.select(&Selector::parse(".question-hyperlink").unwrap()) 36 | .next()? 37 | .value() 38 | .attr("href")?, 39 | ) 40 | .ok()?; 41 | 42 | let answer_query = Selector::parse("div.answer.accepted-answer").unwrap(); 43 | 44 | let answer = dom.select(&answer_query).next()?; 45 | let answer_id = answer.value().attr("data-answerid")?; 46 | let answer_html = answer 47 | .select(&Selector::parse("div.answercell > div.js-post-body").unwrap()) 48 | .next()? 49 | .html() 50 | .to_string(); 51 | 52 | let answer_html = ammonia::Builder::default() 53 | .url_relative(ammonia::UrlRelative::RewriteWithBase(url.clone())) 54 | .clean(&answer_html) 55 | .to_string(); 56 | 57 | let url = format!("{url}#{answer_id}"); 58 | 59 | Some(html! { 60 | a href=(url) { 61 | h2 { (title) } 62 | } 63 | div.infobox-stackexchange-answer { 64 | (PreEscaped(answer_html)) 65 | } 66 | }) 67 | } 68 | -------------------------------------------------------------------------------- /src/engines/postsearch/docs_rs.rs: -------------------------------------------------------------------------------- 1 | use maud::{html, PreEscaped}; 2 | use scraper::{Html, Selector}; 3 | 4 | use crate::engines::{HttpResponse, Response, CLIENT}; 5 | 6 | pub async fn request(response: &Response) -> Option { 7 | for search_result in response.search_results.iter().take(8) { 8 | if search_result.result.url.starts_with("https://docs.rs/") { 9 | return Some(CLIENT.get(search_result.result.url.as_str())); 10 | } 11 | } 12 | 13 | None 14 | } 15 | 16 | pub fn parse_response(HttpResponse { res, body, .. }: &HttpResponse) -> Option> { 17 | let url = res.url().clone(); 18 | 19 | let dom = Html::parse_document(body); 20 | 21 | let version = dom 22 | .select(&Selector::parse("h2 .version").unwrap()) 23 | .next()? 24 | .text() 25 | .collect::(); 26 | 27 | let page_title = dom 28 | .select(&Selector::parse("h1").unwrap()) 29 | .next()? 30 | .text() 31 | .collect::() 32 | .trim() 33 | .to_string(); 34 | 35 | let doc_query = Selector::parse(".docblock").unwrap(); 36 | 37 | let doc_html = dom 38 | .select(&doc_query) 39 | .next() 40 | .map(|doc| doc.inner_html()) 41 | .unwrap_or_default(); 42 | 43 | let item_decl = dom 44 | .select(&Selector::parse(".item-decl").unwrap()) 45 | .next() 46 | .map(|el| el.html()) 47 | .unwrap_or_default(); 48 | 49 | let doc_html = ammonia::Builder::default() 50 | .link_rel(None) 51 | .url_relative(ammonia::UrlRelative::RewriteWithBase(url.clone())) 52 | .clean(&format!("{item_decl}{doc_html}")) 53 | .to_string(); 54 | 55 | let (category, title) = page_title.split_once(' ').unwrap_or(("", &page_title)); 56 | 57 | let title_html = html! { 58 | h2 { 59 | (category) 60 | " " 61 | a href=(url) { (title) } 62 | @if category == "Crate" { 63 | span.infobox-docs_rs-version { (version) } 64 | } 65 | } 66 | }; 67 | 68 | Some(html! { 69 | (title_html) 70 | div.infobox-docs_rs-doc { 71 | (PreEscaped(doc_html)) 72 | } 73 | }) 74 | } 75 | -------------------------------------------------------------------------------- /src/engines/postsearch/mdn.rs: -------------------------------------------------------------------------------- 1 | use maud::{html, PreEscaped}; 2 | use scraper::{Html, Selector}; 3 | use serde::Deserialize; 4 | use tracing::error; 5 | 6 | use crate::engines::{Engine, HttpResponse, Response, CLIENT}; 7 | 8 | #[derive(Deserialize)] 9 | pub struct MdnConfig { 10 | pub max_sections: usize, 11 | } 12 | 13 | pub async fn request(response: &Response) -> Option { 14 | for search_result in response.search_results.iter().take(8) { 15 | if search_result 16 | .result 17 | .url 18 | .starts_with("https://developer.mozilla.org/en-US/docs/Web") 19 | { 20 | return Some(CLIENT.get(search_result.result.url.as_str())); 21 | } 22 | } 23 | 24 | None 25 | } 26 | 27 | pub fn parse_response( 28 | HttpResponse { res, body, config }: &HttpResponse, 29 | ) -> Option> { 30 | let config_toml = config.engines.get(Engine::Mdn).extra.clone(); 31 | let config: MdnConfig = match toml::Value::Table(config_toml).try_into() { 32 | Ok(args) => args, 33 | Err(err) => { 34 | error!("Failed to parse Mdn config: {err}"); 35 | return None; 36 | } 37 | }; 38 | 39 | let url = res.url().clone(); 40 | 41 | let dom = Html::parse_document(body); 42 | 43 | let page_title = dom 44 | .select(&Selector::parse("header > h1").unwrap()) 45 | .next()? 46 | .text() 47 | .collect::() 48 | .trim() 49 | .to_string(); 50 | 51 | let doc_query = Selector::parse(".section-content").unwrap(); 52 | 53 | let max_sections = if config.max_sections == 0 { 54 | usize::MAX 55 | } else { 56 | config.max_sections 57 | }; 58 | 59 | let doc_html = dom 60 | .select(&doc_query) 61 | .map(|doc| doc.inner_html()) 62 | .take(max_sections) 63 | .collect::>() 64 | .join("
"); 65 | 66 | let doc_html = ammonia::Builder::default() 67 | .link_rel(None) 68 | .url_relative(ammonia::UrlRelative::RewriteWithBase(url.clone())) 69 | .clean(&doc_html) 70 | .to_string(); 71 | 72 | Some(html! { 73 | h2 { 74 | a href=(url) { (page_title) } 75 | } 76 | div.infobox-mdn-article { 77 | (PreEscaped(doc_html)) 78 | } 79 | }) 80 | } 81 | -------------------------------------------------------------------------------- /flake.nix: -------------------------------------------------------------------------------- 1 | { 2 | description = "a cute metasearch engine"; 3 | 4 | inputs = { 5 | nixpkgs.url = "github:NixOS/nixpkgs/nixpkgs-unstable"; 6 | 7 | crane.url = "github:ipetkov/crane"; 8 | 9 | flake-parts = { 10 | url = "github:hercules-ci/flake-parts"; 11 | inputs.nixpkgs-lib.follows = "nixpkgs"; 12 | }; 13 | }; 14 | 15 | outputs = inputs @ { 16 | self, 17 | crane, 18 | flake-parts, 19 | ... 20 | }: 21 | flake-parts.lib.mkFlake {inherit inputs;} { 22 | systems = ["x86_64-linux" "x86_64-darwin" "aarch64-darwin" "aarch64-linux"]; 23 | flake.nixosModules.default = import ./module.nix self; 24 | 25 | perSystem = { 26 | pkgs, 27 | system, 28 | ... 29 | }: let 30 | craneLib = crane.mkLib pkgs; 31 | 32 | assetFilter = path: _type: (pkgs.lib.strings.hasPrefix (toString ./src/web/assets) path); 33 | sourceFilter = path: type: (craneLib.filterCargoSources path type) || (assetFilter path type); 34 | 35 | # Common arguments can be set here to avoid repeating them later 36 | # Note: changes here will rebuild all dependency crates 37 | commonArgs = { 38 | src = pkgs.lib.cleanSourceWith { 39 | src = ./.; 40 | filter = sourceFilter; 41 | name = "source"; # Be reproducible, regardless of the directory name 42 | }; 43 | strictDeps = true; 44 | 45 | buildInputs = [ 46 | # Add additional build inputs here 47 | ]; 48 | }; 49 | 50 | metasearch2 = craneLib.buildPackage (commonArgs 51 | // { 52 | cargoArtifacts = craneLib.buildDepsOnly commonArgs; 53 | 54 | # Additional environment variables or build phases/hooks can be set 55 | # here *without* rebuilding all dependency crates 56 | # MY_CUSTOM_VAR = "some value"; 57 | }); 58 | in { 59 | formatter = pkgs.alejandra; 60 | 61 | checks = { 62 | inherit metasearch2; 63 | }; 64 | 65 | packages.default = metasearch2; 66 | 67 | apps.default = { 68 | type = "app"; 69 | program = "${self.packages.${system}.default}/bin/metasearch"; 70 | }; 71 | 72 | devShells.default = craneLib.devShell { 73 | checks = self.checks.${system}; 74 | 75 | # Additional dev-shell environment variables can be set directly 76 | # MY_CUSTOM_DEVELOPMENT_VAR = "something else"; 77 | 78 | # Extra inputs can be added here; cargo and rustc are provided by default. 79 | packages = [ 80 | # pkgs.ripgrep 81 | ]; 82 | }; 83 | }; 84 | }; 85 | } 86 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | ========== 2 | metasearch 3 | ========== 4 | 5 | https://github.com/mat-1/metasearch2 6 | 7 | ---- 8 | INFO 9 | ---- 10 | 11 | metasearch (aka metasearch2) is a cute metasearch engine. It sources its results 12 | from Google, Bing, Brave, and several others. It's designed to be as lightweight 13 | as possible, both on the server and client. There is no required client-side 14 | JavaScript. 15 | 16 | There's a public demo instance at https://s.matdoes.dev, but please do not use 17 | it as your default or rely on it. This is so I don't get ratelimited by Google 18 | or other engines. Also, searches are logged. Run your own instance instead! 19 | 20 | ------------ 21 | INSTALLATION 22 | ------------ 23 | 24 | The easiest way to install metasearch is with `cargo install metasearch`. To get 25 | the unstable version with the latest features, you can install it with 26 | `cargo install --git https://github.com/mat-1/metasearch2`. 27 | 28 | Usage: `metasearch [config_file]` 29 | 30 | The config_file argument is optional; if it's not specified then it'll be 31 | checked at the following locations: 32 | 33 | - $XDG_CONFIG_HOME/metasearch/config.toml 34 | - $HOME/.config/metasearch/config.toml 35 | - ./config.toml 36 | 37 | If no config file exists, it'll be created at the first valid path in the list. 38 | 39 | By default, metasearch runs on port 28019. You are encouraged to use a reverse 40 | proxy. 41 | 42 | ------------- 43 | CONFIGURATION 44 | ------------- 45 | 46 | You can see all the default config options at `src/config.rs`. Some interesting 47 | options you may want to change are: 48 | 49 | - bind - the host and port that the web server runs on, defaults to 50 | `0.0.0.0:28019`. 51 | - api - whether your instance is accessible through a JSON API. See below for 52 | more details. 53 | - ui.stylesheet_url - a link to a stylesheet that will be loaded alongside the 54 | main one, for example `/themes/catppuccin-mocha.css`. 55 | - image_search.enabled - add a tab for viewing image results for your query. 56 | this is disabled by default as the image proxy could be used to make GET 57 | requests to arbitrary URLs from your server. 58 | - engines.google.weight - the ranking score multiplier for an engine, you can 59 | modify this if you prefer the results from certain engines. 60 | 61 | -------- 62 | JSON API 63 | -------- 64 | 65 | metasearch has a JSON API that can be enabled by setting `api = true` in your 66 | config. To use it, set the `Accept: application/json` header in your requests. 67 | 68 | For example: 69 | curl 'http://localhost:28019/search?q=sandcats' -H 'Accept: application/json' 70 | 71 | The structure of the API is not guaranteed to be stable, as it relies on 72 | serializing internal structs. It may change without warning in the future. 73 | -------------------------------------------------------------------------------- /src/web/image_proxy.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | 3 | use axum::{ 4 | extract::Query, 5 | http::StatusCode, 6 | response::{IntoResponse, Response}, 7 | Extension, 8 | }; 9 | use tracing::error; 10 | use wreq::header; 11 | 12 | use crate::{config::Config, engines}; 13 | 14 | pub async fn route( 15 | Query(params): Query>, 16 | Extension(config): Extension, 17 | ) -> Response { 18 | let image_search_config = &config.image_search; 19 | let proxy_config = &image_search_config.proxy; 20 | if !image_search_config.enabled || !proxy_config.enabled { 21 | return (StatusCode::FORBIDDEN, "Image proxy is disabled").into_response(); 22 | }; 23 | let url = params.get("url").cloned().unwrap_or_default(); 24 | if url.is_empty() { 25 | return (StatusCode::BAD_REQUEST, "Missing `url` parameter").into_response(); 26 | } 27 | 28 | let mut res = match engines::CLIENT 29 | .get(&url) 30 | .header("accept", "image/*") 31 | .send() 32 | .await 33 | { 34 | Ok(res) => res, 35 | Err(err) => { 36 | error!("Image proxy error for {url}: {err}"); 37 | return (StatusCode::INTERNAL_SERVER_ERROR, "Image proxy error").into_response(); 38 | } 39 | }; 40 | 41 | let max_size = proxy_config.max_download_size; 42 | 43 | if res.content_length().unwrap_or_default() > max_size { 44 | return (StatusCode::PAYLOAD_TOO_LARGE, "Image too large").into_response(); 45 | } 46 | 47 | const ALLOWED_IMAGE_TYPES: &[&str] = &["apng", "avif", "gif", "jpeg", "png", "webp"]; 48 | 49 | // validate content-type 50 | let content_type = res 51 | .headers() 52 | .get(wreq::header::CONTENT_TYPE) 53 | .and_then(|v| v.to_str().ok()) 54 | .unwrap_or_default() 55 | .to_string(); 56 | 57 | let Some((base_type, subtype)) = content_type.split_once("/") else { 58 | return (StatusCode::UNSUPPORTED_MEDIA_TYPE, "Invalid Content-Type").into_response(); 59 | }; 60 | if base_type != "image" { 61 | return (StatusCode::UNSUPPORTED_MEDIA_TYPE, "Not an image").into_response(); 62 | } 63 | if !ALLOWED_IMAGE_TYPES.contains(&subtype) { 64 | return (StatusCode::UNSUPPORTED_MEDIA_TYPE, "Image type not allowed").into_response(); 65 | } 66 | 67 | let mut image_bytes = Vec::new(); 68 | while let Ok(Some(chunk)) = res.chunk().await { 69 | image_bytes.extend_from_slice(&chunk); 70 | if image_bytes.len() as u64 > max_size { 71 | return (StatusCode::PAYLOAD_TOO_LARGE, "Image too large").into_response(); 72 | } 73 | } 74 | 75 | ( 76 | [ 77 | (header::CONTENT_TYPE, content_type), 78 | (header::CACHE_CONTROL, "public, max-age=31536000".to_owned()), 79 | (header::X_CONTENT_TYPE_OPTIONS, "nosniff".to_owned()), 80 | (header::CONTENT_DISPOSITION, "attachment".to_owned()), 81 | ], 82 | image_bytes, 83 | ) 84 | .into_response() 85 | } 86 | -------------------------------------------------------------------------------- /src/web/search/all.rs: -------------------------------------------------------------------------------- 1 | //! Rendering results in the "all" tab. 2 | 3 | use maud::{html, PreEscaped}; 4 | 5 | use crate::{ 6 | config::Config, 7 | engines::{self, EngineSearchResult, Infobox, Response}, 8 | web::search::render_engine_list, 9 | }; 10 | 11 | pub fn render_results(response: Response) -> PreEscaped { 12 | let mut html = String::new(); 13 | if let Some(answer) = &response.answer { 14 | html.push_str( 15 | &html! { 16 | div.answer { 17 | (answer.html) 18 | (render_engine_list(&[answer.engine], &response.config)) 19 | } 20 | } 21 | .into_string(), 22 | ); 23 | } 24 | if let Some(infobox) = &response.infobox { 25 | html.push_str( 26 | &html! { 27 | div.infobox { 28 | (infobox.html) 29 | (render_engine_list(&[infobox.engine], &response.config)) 30 | } 31 | } 32 | .into_string(), 33 | ); 34 | } 35 | if let Some(featured_snippet) = &response.featured_snippet { 36 | html.push_str(&render_featured_snippet(featured_snippet, &response.config).into_string()); 37 | } 38 | for result in &response.search_results { 39 | html.push_str(&render_search_result(result, &response.config).into_string()); 40 | } 41 | 42 | if html.is_empty() { 43 | html.push_str( 44 | &html! { 45 | p { "No results." } 46 | } 47 | .into_string(), 48 | ); 49 | } 50 | 51 | PreEscaped(html) 52 | } 53 | 54 | fn render_search_result( 55 | result: &engines::SearchResult, 56 | config: &Config, 57 | ) -> PreEscaped { 58 | html! { 59 | div.search-result { 60 | a.search-result-anchor rel="noreferrer" href=(result.result.url) { 61 | span.search-result-url { (result.result.url) } 62 | h3.search-result-title { (result.result.title) } 63 | } 64 | p.search-result-description { (result.result.description) } 65 | (render_engine_list(&result.engines.iter().copied().collect::>(), config)) 66 | } 67 | } 68 | } 69 | 70 | fn render_featured_snippet( 71 | featured_snippet: &engines::FeaturedSnippet, 72 | config: &Config, 73 | ) -> PreEscaped { 74 | html! { 75 | div.featured-snippet { 76 | p.search-result-description { (featured_snippet.description) } 77 | a.search-result-anchor rel="noreferrer" href=(featured_snippet.url) { 78 | span.search-result-url { (featured_snippet.url) } 79 | h3.search-result-title { (featured_snippet.title) } 80 | } 81 | (render_engine_list(&[featured_snippet.engine], config)) 82 | } 83 | } 84 | } 85 | 86 | pub fn render_infobox(infobox: &Infobox, config: &Config) -> PreEscaped { 87 | html! { 88 | div.infobox.postsearch-infobox { 89 | (infobox.html) 90 | (render_engine_list(&[infobox.engine], config)) 91 | } 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /src/engines/answer/wikipedia.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | 3 | use maud::html; 4 | use serde::Deserialize; 5 | use url::Url; 6 | 7 | use crate::engines::{EngineResponse, RequestResponse, CLIENT}; 8 | 9 | use super::colorpicker; 10 | 11 | pub async fn request(mut query: &str) -> RequestResponse { 12 | if !colorpicker::MatchedColorModel::new(query).is_empty() { 13 | // "color picker" is a wikipedia article but we only want to show the 14 | // actual color picker answer 15 | return RequestResponse::None; 16 | } 17 | 18 | // adding "wikipedia" to the start or end of your query is common when you 19 | // want to get a wikipedia article 20 | if let Some(stripped_query) = query.strip_suffix(" wikipedia") { 21 | query = stripped_query 22 | } else if let Some(stripped_query) = query.strip_prefix("wikipedia ") { 23 | query = stripped_query 24 | } 25 | 26 | CLIENT 27 | .get( 28 | Url::parse_with_params( 29 | "https://en.wikipedia.org/w/api.php", 30 | &[ 31 | ("format", "json"), 32 | ("action", "query"), 33 | ("prop", "extracts|pageimages"), 34 | ("exintro", ""), 35 | ("explaintext", ""), 36 | ("redirects", "1"), 37 | ("exsentences", "2"), 38 | ("titles", query), 39 | ], 40 | ) 41 | .unwrap(), 42 | ) 43 | .into() 44 | } 45 | 46 | #[derive(Debug, Deserialize)] 47 | pub struct WikipediaResponse { 48 | pub batchcomplete: String, 49 | pub query: WikipediaQuery, 50 | } 51 | 52 | #[derive(Debug, Deserialize)] 53 | pub struct WikipediaQuery { 54 | pub pages: HashMap, 55 | } 56 | 57 | #[derive(Debug, Deserialize)] 58 | pub struct WikipediaPage { 59 | pub pageid: u64, 60 | pub ns: u64, 61 | pub title: String, 62 | pub extract: String, 63 | pub thumbnail: Option, 64 | } 65 | 66 | #[derive(Debug, Deserialize)] 67 | pub struct WikipediaThumbnail { 68 | pub source: String, 69 | pub width: u64, 70 | pub height: u64, 71 | } 72 | 73 | pub fn parse_response(body: &str) -> eyre::Result { 74 | let Ok(res) = serde_json::from_str::(body) else { 75 | return Ok(EngineResponse::new()); 76 | }; 77 | 78 | let pages: Vec<(String, WikipediaPage)> = res.query.pages.into_iter().collect(); 79 | 80 | if pages.is_empty() || pages[0].0 == "-1" { 81 | return Ok(EngineResponse::new()); 82 | } 83 | 84 | let page = &pages[0].1; 85 | let WikipediaPage { 86 | pageid: _, 87 | ns: _, 88 | title, 89 | extract, 90 | thumbnail: _, 91 | } = page; 92 | if extract.ends_with(':') { 93 | return Ok(EngineResponse::new()); 94 | } 95 | 96 | let mut previous_extract = String::new(); 97 | let mut extract = extract.clone(); 98 | while previous_extract != extract { 99 | previous_extract.clone_from(&extract); 100 | extract = extract 101 | .replace("( ", "(") 102 | .replace("(, ", "(") 103 | .replace("(; ", "(") 104 | .replace(" ()", "") 105 | .replace("()", ""); 106 | } 107 | 108 | let page_title = title.replace(' ', "_"); 109 | let page_url = format!("https://en.wikipedia.org/wiki/{page_title}"); 110 | 111 | Ok(EngineResponse::infobox_html(html! { 112 | a href=(page_url) { 113 | h2 { (title) } 114 | } 115 | p { (extract) } 116 | })) 117 | } 118 | -------------------------------------------------------------------------------- /src/web/settings.rs: -------------------------------------------------------------------------------- 1 | use axum::{ 2 | http::{header, HeaderMap, StatusCode}, 3 | response::{IntoResponse, Response}, 4 | Extension, Form, 5 | }; 6 | use axum_extra::extract::{cookie::Cookie, CookieJar}; 7 | use maud::{html, Markup, PreEscaped, DOCTYPE}; 8 | use serde::{Deserialize, Serialize}; 9 | 10 | use crate::{config::Config, web::head_html}; 11 | 12 | pub async fn get(Extension(config): Extension) -> impl IntoResponse { 13 | let theme_option = |value: &str, name: &str| -> Markup { 14 | let selected = config.ui.stylesheet_url == value; 15 | html! { 16 | option value=(value) selected[selected] { 17 | { (name) } 18 | } 19 | } 20 | }; 21 | 22 | let html = html! { 23 | (PreEscaped("\n")) 24 | (DOCTYPE) 25 | html lang="en" { 26 | {(head_html(Some("settings"), &config))} 27 | body { 28 | div.main-container.settings-page { 29 | main { 30 | a.back-to-index-button href="/" { "Back" } 31 | h1 { "Settings" } 32 | form.settings-form method="post" { 33 | label for="theme" { "Theme" } 34 | select name="stylesheet-url" selected=(config.ui.stylesheet_url) { 35 | { (theme_option("", "Ayu Dark")) } 36 | { (theme_option("/themes/catppuccin-mocha.css", "Catppuccin Mocha")) } 37 | { (theme_option("/themes/catppuccin-macchiato.css", "Catppuccin Macchiato")) } 38 | { (theme_option("/themes/catppuccin-latte.css", "Catppuccin Latte")) } 39 | { (theme_option("/themes/nord-bluish.css", "Nord Bluish")) } 40 | { (theme_option("/themes/discord.css", "Discord")) } 41 | } 42 | 43 | br; 44 | 45 | // custom css textarea 46 | details #custom-css-details { 47 | summary { "Custom CSS" } 48 | textarea #custom-css name="stylesheet-str" { 49 | { (config.ui.stylesheet_str) } 50 | } 51 | } 52 | 53 | input #save-settings-button type="submit" value="Save"; 54 | } 55 | } 56 | } 57 | } 58 | } 59 | } 60 | .into_string(); 61 | 62 | ([(header::CONTENT_TYPE, "text/html; charset=utf-8")], html) 63 | } 64 | 65 | #[derive(Serialize, Deserialize)] 66 | #[serde(rename_all = "kebab-case")] 67 | pub struct Settings { 68 | pub stylesheet_url: String, 69 | pub stylesheet_str: String, 70 | } 71 | 72 | pub async fn post( 73 | headers: HeaderMap, 74 | mut jar: CookieJar, 75 | Form(settings): Form, 76 | ) -> Response { 77 | let Some(origin) = headers.get("origin").and_then(|h| h.to_str().ok()) else { 78 | return (StatusCode::BAD_REQUEST, "Missing or invalid Origin header").into_response(); 79 | }; 80 | let Some(host) = headers.get("host").and_then(|h| h.to_str().ok()) else { 81 | return (StatusCode::BAD_REQUEST, "Missing or invalid Host header").into_response(); 82 | }; 83 | if origin != format!("http://{host}") && origin != format!("https://{host}") { 84 | return (StatusCode::BAD_REQUEST, "Origin does not match Host").into_response(); 85 | } 86 | 87 | let mut settings_cookie = Cookie::new("settings", serde_json::to_string(&settings).unwrap()); 88 | settings_cookie.make_permanent(); 89 | jar = jar.add(settings_cookie); 90 | 91 | (StatusCode::FOUND, [(header::LOCATION, "/settings")], jar).into_response() 92 | } 93 | -------------------------------------------------------------------------------- /src/engines/postsearch/github.rs: -------------------------------------------------------------------------------- 1 | use maud::{html, PreEscaped}; 2 | use scraper::{Html, Selector}; 3 | use url::Url; 4 | 5 | use crate::engines::{answer::regex, Response, CLIENT}; 6 | 7 | pub async fn request(response: &Response) -> Option { 8 | for search_result in response.search_results.iter().take(8) { 9 | if regex!(r"^https:\/\/github\.com\/[\w-]+\/[\w.-]+$").is_match(&search_result.result.url) { 10 | return Some(CLIENT.get(search_result.result.url.as_str())); 11 | } 12 | } 13 | 14 | None 15 | } 16 | 17 | pub fn parse_response(body: &str) -> Option> { 18 | let dom = Html::parse_document(body); 19 | 20 | let url_relative = dom 21 | .select( 22 | &Selector::parse("main #repository-container-header strong[itemprop='name'] > a") 23 | .unwrap(), 24 | ) 25 | .next()? 26 | .value() 27 | .attr("href")?; 28 | let url = format!("https://github.com{url_relative}"); 29 | 30 | let embedded_data_script = dom 31 | .select(&Selector::parse("script[data-target='react-partial.embeddedData']").unwrap()) 32 | .next_back()? 33 | .inner_html(); 34 | let embedded_data = serde_json::from_str::(&embedded_data_script).ok()?; 35 | let readme_html = embedded_data 36 | .get("props")? 37 | .get("initialPayload")? 38 | .get("overview")? 39 | .get("overviewFiles")? 40 | .as_array()? 41 | .first()? 42 | .get("richText")? 43 | .as_str()?; 44 | 45 | let mut readme_html = ammonia::Builder::default() 46 | .link_rel(None) 47 | .add_allowed_classes("div", &["markdown-alert"]) 48 | .add_allowed_classes("p", &["markdown-alert-title"]) 49 | .url_relative(ammonia::UrlRelative::RewriteWithBase( 50 | Url::parse("https://github.com").unwrap(), 51 | )) 52 | .clean(readme_html) 53 | .to_string(); 54 | 55 | let readme_dom = Html::parse_fragment(&readme_html); 56 | let mut readme_element = readme_dom.root_element(); 57 | 58 | let mut is_readme_element_pre = false; 59 | 60 | while readme_element.children().count() == 1 { 61 | // if the readme is wrapped in
, remove that 62 | if let Some(article) = readme_element 63 | .select(&Selector::parse("article").unwrap()) 64 | .next() 65 | { 66 | readme_element = article; 67 | } 68 | // useless div 69 | else if let Some(div) = readme_element 70 | .select(&Selector::parse("div").unwrap()) 71 | .next() 72 | { 73 | readme_element = div; 74 | // useless pre 75 | } else if let Some(pre) = readme_element 76 | .select(&Selector::parse("pre").unwrap()) 77 | .next() 78 | { 79 | readme_element = pre; 80 | is_readme_element_pre = true; 81 | } else { 82 | break; 83 | } 84 | } 85 | 86 | readme_html = readme_element.inner_html().to_string(); 87 | 88 | let title = if let Some(title_el) = readme_dom 89 | // github wraps their h1s in a
90 | .select(&Selector::parse("div:has(h1)").unwrap()) 91 | .next() 92 | { 93 | // if the readme starts with an h1, remove it 94 | let title_html = title_el.html().trim().to_string(); 95 | 96 | if readme_html.starts_with(&title_html) { 97 | readme_html = readme_html[title_html.len()..].to_string(); 98 | } 99 | title_el.text().collect::() 100 | } else { 101 | dom.select( 102 | &Selector::parse("main #repository-container-header strong[itemprop='name'] > a") 103 | .unwrap(), 104 | ) 105 | .next()? 106 | .text() 107 | .collect::() 108 | }; 109 | 110 | Some(html! { 111 | a href=(url) { 112 | h1 { (title) } 113 | } 114 | @if is_readme_element_pre { 115 | pre.infobox-github-readme { 116 | (PreEscaped(readme_html)) 117 | } 118 | } @else { 119 | div.infobox-github-readme { 120 | (PreEscaped(readme_html)) 121 | } 122 | } 123 | }) 124 | } 125 | -------------------------------------------------------------------------------- /src/web/mod.rs: -------------------------------------------------------------------------------- 1 | mod autocomplete; 2 | mod image_proxy; 3 | mod index; 4 | mod opensearch; 5 | mod search; 6 | mod settings; 7 | 8 | use std::{convert::Infallible, net::SocketAddr, sync::Arc}; 9 | 10 | use axum::{ 11 | extract::{Request, State}, 12 | http::{header, StatusCode}, 13 | middleware::{self, Next}, 14 | response::Response, 15 | routing::{get, post, MethodRouter}, 16 | Router, 17 | }; 18 | use axum_extra::extract::CookieJar; 19 | use maud::{html, Markup, PreEscaped}; 20 | use tracing::info; 21 | 22 | use crate::config::Config; 23 | 24 | macro_rules! register_static_routes { 25 | ( $app:ident, $( $x:expr ),* ) => { 26 | { 27 | $( 28 | let $app = $app.route( 29 | concat!("/", $x), 30 | static_route( 31 | include_str!(concat!("assets/", $x)), 32 | guess_mime_type($x) 33 | ), 34 | ); 35 | )* 36 | 37 | $app 38 | } 39 | }; 40 | } 41 | 42 | pub async fn run(config: Config) { 43 | let bind_addr = config.bind; 44 | 45 | let config = Arc::new(config); 46 | 47 | fn static_route( 48 | content: &'static str, 49 | content_type: &'static str, 50 | ) -> MethodRouter 51 | where 52 | S: Clone + Send + Sync + 'static, 53 | { 54 | let response = ([(header::CONTENT_TYPE, content_type)], content); 55 | get(|| async { response }) 56 | } 57 | 58 | let app = Router::new() 59 | .route("/", get(index::get)) 60 | .route("/search", get(search::get)) 61 | .route("/settings", get(settings::get)) 62 | .route("/settings", post(settings::post)) 63 | .route("/opensearch.xml", get(opensearch::route)) 64 | .route("/autocomplete", get(autocomplete::route)) 65 | .route("/image-proxy", get(image_proxy::route)) 66 | .layer(middleware::from_fn_with_state( 67 | config.clone(), 68 | config_middleware, 69 | )) 70 | .with_state(config); 71 | let app = register_static_routes![ 72 | app, 73 | "style.css", 74 | "script.js", 75 | "robots.txt", 76 | "scripts/colorpicker.js", 77 | "themes/catppuccin-mocha.css", 78 | "themes/catppuccin-macchiato.css", 79 | "themes/catppuccin-latte.css", 80 | "themes/nord-bluish.css", 81 | "themes/discord.css" 82 | ]; 83 | 84 | info!("Listening on http://{bind_addr}"); 85 | 86 | let listener = tokio::net::TcpListener::bind(bind_addr).await.unwrap(); 87 | axum::serve( 88 | listener, 89 | app.into_make_service_with_connect_info::(), 90 | ) 91 | .await 92 | .unwrap(); 93 | } 94 | 95 | fn guess_mime_type(path: &str) -> &'static str { 96 | match path.rsplit('.').next() { 97 | Some("css") => "text/css; charset=utf-8", 98 | Some("js") => "text/javascript; charset=utf-8", 99 | Some("txt") => "text/plain; charset=utf-8", 100 | _ => "text/plain; charset=utf-8", 101 | } 102 | } 103 | 104 | async fn config_middleware( 105 | State(config): State>, 106 | cookies: CookieJar, 107 | mut req: Request, 108 | next: Next, 109 | ) -> Result { 110 | let mut config = config.clone().as_ref().clone(); 111 | 112 | let settings_cookie = cookies.get("settings"); 113 | if let Some(settings_cookie) = settings_cookie { 114 | if let Ok(settings) = serde_json::from_str::(settings_cookie.value()) { 115 | config.ui.stylesheet_url = settings.stylesheet_url; 116 | config.ui.stylesheet_str = settings.stylesheet_str; 117 | } 118 | } 119 | 120 | // modify the state 121 | req.extensions_mut().insert(config); 122 | 123 | Ok(next.run(req).await) 124 | } 125 | 126 | pub fn head_html(title: Option<&str>, config: &Config) -> Markup { 127 | html! { 128 | head { 129 | meta charset="UTF-8"; 130 | meta name="viewport" content="width=device-width, initial-scale=1.0"; 131 | title { 132 | @if let Some(title) = title { 133 | { (title) } 134 | { " - " } 135 | } 136 | {(config.ui.site_name)} 137 | } 138 | link rel="stylesheet" href="/style.css"; 139 | @if !config.ui.stylesheet_url.is_empty() { 140 | link rel="stylesheet" href=(config.ui.stylesheet_url); 141 | } 142 | @if !config.ui.stylesheet_str.is_empty() { 143 | style { (PreEscaped(html_escape::encode_style(&config.ui.stylesheet_str))) } 144 | } 145 | @if !config.ui.favicon_url.is_empty() { 146 | link rel="icon" href=(config.ui.favicon_url); 147 | } 148 | script src="/script.js" defer {} 149 | link rel="search" type="application/opensearchdescription+xml" title="metasearch" href="/opensearch.xml"; 150 | } 151 | } 152 | } 153 | -------------------------------------------------------------------------------- /src/engines/answer/dictionary.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | 3 | use eyre::eyre; 4 | use maud::{html, PreEscaped}; 5 | use serde::Deserialize; 6 | use url::Url; 7 | 8 | use crate::engines::{EngineResponse, HttpResponse, RequestResponse, CLIENT}; 9 | 10 | use super::regex; 11 | 12 | pub async fn request(query: &str) -> RequestResponse { 13 | // if the query starts with "define " then use that, otherwise abort 14 | let re = regex!(r"^define\s+(\w+)$"); 15 | let query = match re.captures(query) { 16 | Some(caps) => caps.get(1).unwrap().as_str(), 17 | None => return RequestResponse::None, 18 | } 19 | .to_lowercase(); 20 | 21 | CLIENT 22 | .get( 23 | Url::parse( 24 | format!( 25 | "https://en.wiktionary.org/api/rest_v1/page/definition/{}", 26 | urlencoding::encode(&query) 27 | ) 28 | .as_str(), 29 | ) 30 | .unwrap(), 31 | ) 32 | .into() 33 | } 34 | 35 | #[derive(Debug, Deserialize)] 36 | pub struct WiktionaryResponse(pub HashMap>); 37 | 38 | #[derive(Debug, Deserialize)] 39 | #[serde(rename_all = "camelCase")] 40 | pub struct WiktionaryEntry { 41 | pub part_of_speech: String, 42 | pub language: String, 43 | pub definitions: Vec, 44 | } 45 | 46 | #[derive(Debug, Deserialize)] 47 | #[serde(rename_all = "camelCase")] 48 | pub struct WiktionaryDefinition { 49 | pub definition: String, 50 | #[serde(default)] 51 | pub examples: Vec, 52 | } 53 | 54 | pub fn parse_response( 55 | HttpResponse { res, body, .. }: &HttpResponse, 56 | ) -> eyre::Result { 57 | let url = res.url(); 58 | 59 | let Ok(res) = serde_json::from_str::(body) else { 60 | return Ok(EngineResponse::new()); 61 | }; 62 | 63 | let mediawiki_key = url 64 | .path_segments() 65 | .ok_or_else(|| eyre!("url has no path segments"))? 66 | .next_back() 67 | .ok_or_else(|| eyre!("url has no last path segment"))?; 68 | 69 | let word = key_to_title(mediawiki_key); 70 | 71 | let Some(entries) = res.0.get("en") else { 72 | return Ok(EngineResponse::new()); 73 | }; 74 | 75 | let mut cleaner = ammonia::Builder::default(); 76 | cleaner 77 | .link_rel(None) 78 | .url_relative(ammonia::UrlRelative::RewriteWithBase( 79 | Url::parse("https://en.wiktionary.org").unwrap(), 80 | )); 81 | 82 | let mut html = String::new(); 83 | 84 | html.push_str( 85 | &html! { 86 | h2.answer-dictionary-word { 87 | a href={ "https://en.wiktionary.org/wiki/" (mediawiki_key) } { 88 | (word) 89 | } 90 | } 91 | } 92 | .into_string(), 93 | ); 94 | 95 | for entry in entries { 96 | html.push_str( 97 | &html! { 98 | span.answer-dictionary-part-of-speech { 99 | (entry.part_of_speech.to_lowercase()) 100 | } 101 | } 102 | .into_string(), 103 | ); 104 | 105 | html.push_str("
    "); 106 | let mut previous_definitions = Vec::::new(); 107 | for definition in &entry.definitions { 108 | if definition.definition.is_empty() { 109 | // wiktionary does this sometimes, for example https://en.wiktionary.org/api/rest_v1/page/definition/variance 110 | continue; 111 | } 112 | if previous_definitions 113 | .iter() 114 | .any(|d| d.contains(&definition.definition)) 115 | { 116 | // wiktionary will sometimes duplicate definitions, for example https://en.wiktionary.org/api/rest_v1/page/definition/google 117 | continue; 118 | } 119 | previous_definitions.push(definition.definition.clone()); 120 | 121 | html.push_str("
  1. "); 122 | let definition_html = cleaner 123 | .clean(&definition.definition.replace('“', "\"")) 124 | .to_string(); 125 | 126 | html.push_str(&html! { p { (PreEscaped(definition_html)) } }.into_string()); 127 | 128 | if !definition.examples.is_empty() { 129 | for example in &definition.examples { 130 | let example_html = cleaner.clean(example).to_string(); 131 | html.push_str( 132 | &html! { 133 | blockquote.answer-dictionary-example { 134 | (PreEscaped(example_html)) 135 | } 136 | } 137 | .into_string(), 138 | ); 139 | } 140 | } 141 | html.push_str("
  2. "); 142 | } 143 | html.push_str("
"); 144 | } 145 | 146 | Ok(EngineResponse::answer_html(PreEscaped(html))) 147 | } 148 | 149 | fn key_to_title(key: &str) -> String { 150 | // https://github.com/wikimedia/mediawiki-title 151 | // In general, the page title is converted to the mediawiki DB key format by 152 | // trimming spaces, replacing whitespace symbols to underscores and applying 153 | // wiki-specific capitalization rules. 154 | 155 | let title = key.trim().replace('_', " "); 156 | let mut c = title.chars(); 157 | match c.next() { 158 | None => String::new(), 159 | Some(f) => f.to_uppercase().chain(c).collect(), 160 | } 161 | } 162 | -------------------------------------------------------------------------------- /src/engines/macros.rs: -------------------------------------------------------------------------------- 1 | #[macro_export] 2 | macro_rules! engines { 3 | ($($engine:ident = $id:expr),* $(,)?) => { 4 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize)] 5 | pub enum Engine { 6 | $($engine,)* 7 | } 8 | 9 | impl Engine { 10 | #[must_use] 11 | pub fn all() -> &'static [Engine] { 12 | &[$(Engine::$engine,)*] 13 | } 14 | 15 | #[must_use] 16 | pub fn id(&self) -> &'static str { 17 | match self { 18 | $(Engine::$engine => $id,)* 19 | } 20 | } 21 | } 22 | 23 | impl FromStr for Engine { 24 | type Err = (); 25 | 26 | fn from_str(s: &str) -> Result { 27 | match s { 28 | $($id => Ok(Engine::$engine),)* 29 | _ => Err(()), 30 | } 31 | } 32 | } 33 | }; 34 | } 35 | 36 | #[macro_export] 37 | macro_rules! engine_parse_response { 38 | ($res:ident, $module:ident::$engine_id:ident::None) => { 39 | None 40 | }; 41 | ($res:ident, $module:ident::$engine_id:ident::$parse_response:ident) => { 42 | Some($module::$engine_id::$parse_response($res.into())) 43 | }; 44 | } 45 | 46 | #[macro_export] 47 | macro_rules! engine_requests { 48 | ($($engine:ident => $module:ident::$engine_id:ident::$request:ident, $parse_response:ident),* $(,)?) => { 49 | impl Engine { 50 | pub async fn request(&self, query: &SearchQuery) -> eyre::Result { 51 | #[allow(clippy::useless_conversion)] 52 | match self { 53 | $( 54 | Engine::$engine => $module::$engine_id::$request(query).await.into_request_response_result(), 55 | )* 56 | _ => Ok(RequestResponse::None), 57 | } 58 | } 59 | 60 | #[tracing::instrument(skip(self, res), fields(engine = %self))] 61 | pub fn parse_response(&self, res: &HttpResponse) -> eyre::Result { 62 | #[allow(clippy::useless_conversion)] 63 | match self { 64 | $( 65 | Engine::$engine => $crate::engine_parse_response! { res, $module::$engine_id::$parse_response } 66 | .ok_or_else(|| eyre::eyre!("engine {self:?} can't parse response"))?, 67 | )* 68 | _ => eyre::bail!("engine {self:?} can't parse response"), 69 | } 70 | } 71 | } 72 | }; 73 | } 74 | 75 | #[macro_export] 76 | macro_rules! engine_autocomplete_requests { 77 | ($($engine:ident => $module:ident::$engine_id:ident::$request:ident, $parse_response:ident),* $(,)?) => { 78 | impl Engine { 79 | #[must_use] 80 | pub fn request_autocomplete(&self, query: &str) -> Option { 81 | match self { 82 | $( 83 | Engine::$engine => Some($module::$engine_id::$request(query).into()), 84 | )* 85 | _ => None, 86 | } 87 | } 88 | 89 | pub fn parse_autocomplete_response(&self, body: &str) -> eyre::Result> { 90 | match self { 91 | $( 92 | Engine::$engine => $crate::engine_parse_response! { body, $module::$engine_id::$parse_response } 93 | .ok_or_else(|| eyre::eyre!("engine {self:?} can't parse autocomplete response"))?, 94 | )* 95 | _ => eyre::bail!("engine {self:?} can't parse autocomplete response"), 96 | } 97 | } 98 | } 99 | }; 100 | } 101 | 102 | #[macro_export] 103 | macro_rules! engine_postsearch_requests { 104 | ($($engine:ident => $module:ident::$engine_id:ident::$request:ident, $parse_response:ident),* $(,)?) => { 105 | impl Engine { 106 | #[must_use] 107 | pub async fn postsearch_request(&self, response: &Response) -> Option { 108 | match self { 109 | $( 110 | Engine::$engine => $module::$engine_id::$request(response).await, 111 | )* 112 | _ => None, 113 | } 114 | } 115 | 116 | #[must_use] 117 | pub fn postsearch_parse_response(&self, res: &HttpResponse) -> Option> { 118 | match self { 119 | $( 120 | Engine::$engine => $crate::engine_parse_response! { res, $module::$engine_id::$parse_response }?, 121 | )* 122 | _ => None, 123 | } 124 | } 125 | } 126 | }; 127 | } 128 | 129 | #[macro_export] 130 | macro_rules! engine_image_requests { 131 | ($($engine:ident => $module:ident::$engine_id:ident::$request:ident, $parse_response:ident),* $(,)?) => { 132 | impl Engine { 133 | #[must_use] 134 | pub fn request_images(&self, query: &SearchQuery) -> RequestResponse { 135 | match self { 136 | $( 137 | Engine::$engine => $module::$engine_id::$request(query).into(), 138 | )* 139 | _ => RequestResponse::None, 140 | } 141 | } 142 | 143 | pub fn parse_images_response(&self, res: &HttpResponse) -> eyre::Result { 144 | #[allow(clippy::useless_conversion)] 145 | match self { 146 | $( 147 | Engine::$engine => $crate::engine_parse_response! { res, $module::$engine_id::$parse_response } 148 | .ok_or_else(|| eyre::eyre!("engine {self:?} can't parse images response"))?, 149 | )* 150 | _ => eyre::bail!("engine {self:?} can't parse response"), 151 | } 152 | } 153 | } 154 | }; 155 | } 156 | -------------------------------------------------------------------------------- /src/engines/answer/timezone.rs: -------------------------------------------------------------------------------- 1 | use chrono::{DateTime, TimeZone}; 2 | use chrono_tz::{OffsetComponents, Tz}; 3 | use maud::html; 4 | 5 | use crate::engines::EngineResponse; 6 | 7 | use super::regex; 8 | 9 | pub async fn request(query: &str) -> EngineResponse { 10 | match evaluate(query) { 11 | None => EngineResponse::new(), 12 | Some(TimeResponse::Current { time, timezone }) => EngineResponse::answer_html(html! { 13 | p.answer-query { "Current time in " (timezone_to_string(timezone)) } 14 | h3 { 15 | b { (time.format("%-I:%M %P")) } 16 | span.answer-comment { 17 | " (" (time.format("%B %-d")) ")" 18 | } 19 | } 20 | }), 21 | Some(TimeResponse::Conversion { 22 | source_timezone, 23 | target_timezone, 24 | source_time, 25 | target_time, 26 | source_offset, 27 | target_offset, 28 | }) => { 29 | let delta_minutes = (target_offset - source_offset).num_minutes(); 30 | let delta = if delta_minutes % 60 == 0 { 31 | format!("{:+}", delta_minutes / 60) 32 | } else { 33 | format!("{:+}:{}", delta_minutes / 60, delta_minutes % 60) 34 | }; 35 | 36 | EngineResponse::answer_html(html! { 37 | p.answer-query { 38 | (source_time.format("%-I:%M %P")) 39 | " " 40 | (timezone_to_string(source_timezone)) 41 | " to " 42 | (timezone_to_string(target_timezone)) 43 | } 44 | h3 { 45 | b { (target_time.format("%-I:%M %P")) } 46 | " " 47 | span.answer-comment { 48 | (timezone_to_string(target_timezone)) " (" (delta) ")" 49 | } 50 | } 51 | }) 52 | } 53 | } 54 | } 55 | 56 | #[derive(Debug)] 57 | enum TimeResponse { 58 | Current { 59 | time: DateTime, 60 | timezone: Tz, 61 | }, 62 | Conversion { 63 | source_timezone: Tz, 64 | target_timezone: Tz, 65 | source_time: DateTime, 66 | target_time: DateTime, 67 | source_offset: chrono::Duration, 68 | target_offset: chrono::Duration, 69 | }, 70 | } 71 | 72 | fn evaluate(query: &str) -> Option { 73 | // "4pm utc to cst" 74 | let re = regex!(r"(\d{1,2})(?:(\d{1,2}))?\s*(am|pm|) ([\w/+\-]+) (to|as|in) ([\w/+\-]+)"); 75 | if let Some(captures) = re.captures(query) { 76 | if let Some(hour) = captures.get(1).map(|m| m.as_str().parse::().unwrap()) { 77 | let minute = match captures.get(2) { 78 | Some(m) => m.as_str().parse::().ok()?, 79 | None => 0, 80 | }; 81 | let ampm = captures.get(3).unwrap().as_str(); 82 | let timezone1_name = captures.get(4).unwrap().as_str(); 83 | let timezone2_name = captures.get(6).unwrap().as_str(); 84 | 85 | let source_timezone = parse_timezone(timezone1_name)?; 86 | let target_timezone = parse_timezone(timezone2_name)?; 87 | 88 | let current_date = chrono::Utc::now().date_naive(); 89 | 90 | let source_offset = source_timezone.offset_from_utc_date(¤t_date); 91 | let target_offset = target_timezone.offset_from_utc_date(¤t_date); 92 | 93 | let source_time_naive = current_date.and_hms_opt( 94 | if ampm == "pm" && hour != 12 { 95 | hour + 12 96 | } else if ampm == "am" && hour == 12 { 97 | 0 98 | } else { 99 | hour 100 | }, 101 | minute, 102 | 0, 103 | )?; 104 | let source_time_utc = chrono::Utc 105 | .from_local_datetime(&source_time_naive) 106 | .latest()? 107 | - (source_offset.base_utc_offset() + source_offset.dst_offset()); 108 | 109 | let source_time = source_time_utc.with_timezone(&source_timezone); 110 | let target_time = source_time_utc.with_timezone(&target_timezone); 111 | 112 | return Some(TimeResponse::Conversion { 113 | source_timezone, 114 | target_timezone, 115 | source_time, 116 | target_time, 117 | source_offset: source_offset.base_utc_offset(), 118 | target_offset: target_offset.base_utc_offset(), 119 | }); 120 | } 121 | } 122 | 123 | // "utc time" 124 | let re = regex!(r"([\w/+\-]+)(?: current)? time$"); 125 | // "time in utc" 126 | let re2 = regex!(r"time (?:in|as) ([\w/+\-]+)$"); 127 | if let Some(timezone_name) = re 128 | .captures(query) 129 | .and_then(|m| m.get(1)) 130 | .or_else(|| re2.captures(query).and_then(|m| m.get(1))) 131 | { 132 | if let Some(timezone) = parse_timezone(timezone_name.as_str()) { 133 | let time = chrono::Utc::now().with_timezone(&timezone); 134 | return Some(TimeResponse::Current { time, timezone }); 135 | } 136 | } 137 | 138 | None 139 | } 140 | 141 | fn parse_timezone(timezone_name: &str) -> Option { 142 | match timezone_name.to_lowercase().as_str() { 143 | "cst" | "cdt" => Some(Tz::CST6CDT), 144 | "est" | "edt" => Some(Tz::EST5EDT), 145 | _ => Tz::from_str_insensitive(timezone_name) 146 | .ok() 147 | .or_else(|| Tz::from_str_insensitive(&format!("etc/{timezone_name}")).ok()), 148 | } 149 | } 150 | 151 | fn timezone_to_string(tz: Tz) -> String { 152 | match tz { 153 | Tz::CST6CDT => "CST".to_string(), 154 | Tz::EST5EDT => "EST".to_string(), 155 | _ => { 156 | let tz_string = tz.name(); 157 | if let Some(tz_string) = tz_string.strip_prefix("Etc/") { 158 | tz_string.to_string() 159 | } else { 160 | tz_string.to_string() 161 | } 162 | } 163 | } 164 | } 165 | 166 | #[cfg(test)] 167 | mod tests { 168 | use super::*; 169 | 170 | #[test] 171 | fn test_evaluate() { 172 | let response = evaluate("9 pm est to CST").unwrap(); 173 | let TimeResponse::Conversion { 174 | source_time, 175 | target_time, 176 | .. 177 | } = response 178 | else { 179 | panic!("Expected TimeResponse::Conversion, got {response:?}"); 180 | }; 181 | 182 | // we don't check the exact offsets since it depends on daylight savings, cst 183 | // will always be 1 hour behind est though 184 | 185 | assert_eq!(source_time.format("%-I:%M %P").to_string(), "9:00 pm"); 186 | assert_eq!(target_time.format("%-I:%M %P").to_string(), "8:00 pm"); 187 | } 188 | } 189 | -------------------------------------------------------------------------------- /src/engines/search/bing.rs: -------------------------------------------------------------------------------- 1 | use base64::Engine; 2 | use eyre::eyre; 3 | use rand::Rng; 4 | use scraper::{ElementRef, Html, Selector}; 5 | use tracing::warn; 6 | use url::Url; 7 | 8 | use crate::{ 9 | engines::{EngineImageResult, EngineImagesResponse, EngineResponse, CLIENT}, 10 | parse::{parse_html_response_with_opts, ParseOpts, QueryMethod}, 11 | }; 12 | 13 | pub async fn request(query: &str) -> wreq::RequestBuilder { 14 | let cvid = generate_cvid(); 15 | let url = Url::parse_with_params( 16 | "https://www.bing.com/search", 17 | &[ 18 | ("q", query), 19 | ("pq", query), 20 | ("cvid", &cvid), 21 | ("filters", "rcrse:\"1\""), // filters=rcrse:"1" makes it not try to autocorrect 22 | ("FORM", "PERE"), 23 | ("ghc", "1"), 24 | ("lq", "0"), 25 | ("qs", "n"), 26 | ("sk", ""), 27 | ("sp", "-1"), 28 | ], 29 | ) 30 | .unwrap(); 31 | CLIENT 32 | .get(url) 33 | .header("Cookie", &format!("SRCHHPGUSR=IG={}", cvid)) 34 | } 35 | 36 | fn generate_cvid() -> String { 37 | let mut bytes = [0u8; 16]; 38 | rand::rng().fill(&mut bytes); 39 | bytes.iter().map(|b| format!("{:02X}", b)).collect() 40 | } 41 | 42 | pub fn parse_response(body: &str) -> eyre::Result { 43 | parse_html_response_with_opts( 44 | body, 45 | ParseOpts::new() 46 | .result("#b_results > li.b_algo") 47 | .title(".b_algo h2 > a") 48 | .href(QueryMethod::Manual(Box::new(|el: &ElementRef| { 49 | let url = el 50 | .select(&Selector::parse("a[href]").unwrap()) 51 | .next() 52 | .and_then(|n| n.value().attr("href")) 53 | .unwrap_or_default(); 54 | clean_url(url) 55 | }))) 56 | .description(QueryMethod::Manual(Box::new(|el: &ElementRef| { 57 | let mut description = String::new(); 58 | for inner_node in el 59 | .select( 60 | &Selector::parse(".b_caption > p, p.b_algoSlug, .b_caption .ipText") 61 | .unwrap(), 62 | ) 63 | .next() 64 | .map(|n| n.children().collect::>()) 65 | .unwrap_or_default() 66 | { 67 | match inner_node.value() { 68 | scraper::Node::Text(t) => { 69 | description.push_str(&t.text); 70 | } 71 | scraper::Node::Element(inner_el) => { 72 | if !inner_el 73 | .has_class("algoSlug_icon", scraper::CaseSensitivity::CaseSensitive) 74 | { 75 | let element_ref = ElementRef::wrap(inner_node).unwrap(); 76 | description.push_str(&element_ref.text().collect::()); 77 | } 78 | } 79 | _ => {} 80 | } 81 | } 82 | 83 | Ok(description) 84 | }))), 85 | ) 86 | } 87 | 88 | pub fn request_images(query: &str) -> wreq::RequestBuilder { 89 | CLIENT.get( 90 | Url::parse_with_params( 91 | "https://www.bing.com/images/async", 92 | &[ 93 | ("q", query), 94 | ("async", "content"), 95 | ("first", "1"), 96 | ("count", "35"), 97 | ], 98 | ) 99 | .unwrap(), 100 | ) 101 | } 102 | 103 | #[tracing::instrument(skip(body))] 104 | pub fn parse_images_response(body: &str) -> eyre::Result { 105 | let dom = Html::parse_document(body); 106 | 107 | let mut image_results = Vec::new(); 108 | 109 | let image_container_el_sel = Selector::parse(".imgpt").unwrap(); 110 | let image_el_sel = Selector::parse(".iusc").unwrap(); 111 | for image_container_el in dom.select(&image_container_el_sel) { 112 | let image_el = image_container_el 113 | .select(&image_el_sel) 114 | .next() 115 | .ok_or_else(|| eyre!("no image element found"))?; 116 | 117 | // parse the "m" attribute as json 118 | let Some(data) = image_el.value().attr("m") else { 119 | // this is normal, i think 120 | continue; 121 | }; 122 | let data = serde_json::from_str::(data)?; 123 | let page_url = data 124 | .get("purl") 125 | .and_then(|v| v.as_str()) 126 | .unwrap_or_default(); 127 | let image_url = data 128 | // short for media url, probably 129 | .get("murl") 130 | .and_then(|v| v.as_str()) 131 | .unwrap_or_default(); 132 | let page_title = data 133 | .get("t") 134 | .and_then(|v| v.as_str()) 135 | .unwrap_or_default() 136 | // bing adds these unicode characters around matches 137 | .replace(['', ''], ""); 138 | 139 | // the text looks like "1200 x 1600 · jpegWikipedia" 140 | // (the last part is incorrectly parsed since the actual text is inside another 141 | // element but this is already good enough for our purposes) 142 | let text = image_container_el.text().collect::(); 143 | let width_height: Vec = text 144 | .split(" · ") 145 | .next() 146 | .unwrap_or_default() 147 | .split(" x ") 148 | .map(|s| s.parse().unwrap_or_default()) 149 | .collect(); 150 | let (width, height) = match width_height.as_slice() { 151 | [width, height] => (*width, *height), 152 | _ => { 153 | warn!("couldn't get width and height from text \"{text}\""); 154 | continue; 155 | } 156 | }; 157 | 158 | image_results.push(EngineImageResult { 159 | page_url: page_url.to_string(), 160 | image_url: image_url.to_string(), 161 | title: page_title.to_string(), 162 | width, 163 | height, 164 | }); 165 | } 166 | 167 | Ok(EngineImagesResponse { image_results }) 168 | } 169 | 170 | fn clean_url(url: &str) -> eyre::Result { 171 | // clean up bing's tracking urls 172 | if url.starts_with("https://www.bing.com/ck/a?") { 173 | // get the u param 174 | let url = Url::parse(url)?; 175 | let u = url 176 | .query_pairs() 177 | .find(|(key, _)| key == "u") 178 | .unwrap_or_default() 179 | .1; 180 | // cut off the "a1" and base64 decode 181 | let u = base64::engine::general_purpose::URL_SAFE_NO_PAD 182 | .decode(&u[2..]) 183 | .unwrap_or_default(); 184 | // convert to utf8 185 | Ok(String::from_utf8_lossy(&u).to_string()) 186 | } else { 187 | Ok(url.to_string()) 188 | } 189 | } 190 | -------------------------------------------------------------------------------- /src/parse.rs: -------------------------------------------------------------------------------- 1 | //! Helper functions for parsing search engine responses. 2 | 3 | use crate::{ 4 | engines::{EngineFeaturedSnippet, EngineResponse, EngineSearchResult}, 5 | urls::normalize_url, 6 | }; 7 | 8 | use scraper::{Html, Selector}; 9 | use tracing::trace; 10 | 11 | #[derive(Default)] 12 | pub struct ParseOpts { 13 | result: &'static str, 14 | title: QueryMethod, 15 | href: QueryMethod, 16 | description: QueryMethod, 17 | 18 | featured_snippet: &'static str, 19 | featured_snippet_title: QueryMethod, 20 | featured_snippet_href: QueryMethod, 21 | featured_snippet_description: QueryMethod, 22 | } 23 | 24 | impl ParseOpts { 25 | #[must_use] 26 | pub fn new() -> Self { 27 | Self::default() 28 | } 29 | 30 | #[must_use] 31 | pub fn result(mut self, result: &'static str) -> Self { 32 | self.result = result; 33 | self 34 | } 35 | 36 | #[must_use] 37 | pub fn title(mut self, title: impl Into) -> Self { 38 | self.title = title.into(); 39 | self 40 | } 41 | 42 | #[must_use] 43 | pub fn href(mut self, href: impl Into) -> Self { 44 | self.href = href.into(); 45 | self 46 | } 47 | 48 | #[must_use] 49 | pub fn description(mut self, description: impl Into) -> Self { 50 | self.description = description.into(); 51 | self 52 | } 53 | 54 | #[must_use] 55 | pub fn featured_snippet(mut self, featured_snippet: &'static str) -> Self { 56 | self.featured_snippet = featured_snippet; 57 | self 58 | } 59 | 60 | #[must_use] 61 | pub fn featured_snippet_title( 62 | mut self, 63 | featured_snippet_title: impl Into, 64 | ) -> Self { 65 | self.featured_snippet_title = featured_snippet_title.into(); 66 | self 67 | } 68 | 69 | #[must_use] 70 | pub fn featured_snippet_href(mut self, featured_snippet_href: impl Into) -> Self { 71 | self.featured_snippet_href = featured_snippet_href.into(); 72 | self 73 | } 74 | 75 | #[must_use] 76 | pub fn featured_snippet_description( 77 | mut self, 78 | featured_snippet_description: impl Into, 79 | ) -> Self { 80 | self.featured_snippet_description = featured_snippet_description.into(); 81 | self 82 | } 83 | } 84 | 85 | type ManualQueryMethod = Box eyre::Result>; 86 | 87 | #[derive(Default)] 88 | pub enum QueryMethod { 89 | #[default] 90 | None, 91 | CssSelector(&'static str), 92 | Manual(ManualQueryMethod), 93 | } 94 | 95 | impl From<&'static str> for QueryMethod { 96 | fn from(s: &'static str) -> Self { 97 | QueryMethod::CssSelector(s) 98 | } 99 | } 100 | 101 | impl QueryMethod { 102 | pub fn call_with_css_selector_override( 103 | &self, 104 | el: &scraper::ElementRef, 105 | with_css_selector: impl Fn(&scraper::ElementRef, &'static str) -> Option, 106 | ) -> eyre::Result { 107 | match self { 108 | QueryMethod::None => Ok(String::new()), 109 | QueryMethod::CssSelector(s) => Ok(with_css_selector(el, s).unwrap_or_default()), 110 | QueryMethod::Manual(f) => f(el), 111 | } 112 | } 113 | 114 | pub fn call(&self, el: &scraper::ElementRef) -> eyre::Result { 115 | self.call_with_css_selector_override(el, |el, s| { 116 | el.select(&Selector::parse(s).unwrap()) 117 | .next() 118 | .map(|n| n.text().collect::()) 119 | }) 120 | } 121 | } 122 | 123 | pub(super) fn parse_html_response_with_opts( 124 | body: &str, 125 | opts: ParseOpts, 126 | ) -> eyre::Result { 127 | let dom = Html::parse_document(body); 128 | 129 | let mut search_results = Vec::new(); 130 | 131 | let ParseOpts { 132 | result: result_item_query, 133 | title: title_query_method, 134 | href: href_query_method, 135 | description: description_query_method, 136 | featured_snippet: featured_snippet_query, 137 | featured_snippet_title: featured_snippet_title_query_method, 138 | featured_snippet_href: featured_snippet_href_query_method, 139 | featured_snippet_description: featured_snippet_description_query_method, 140 | } = opts; 141 | 142 | let result_item_query = Selector::parse(result_item_query).unwrap(); 143 | 144 | let results = dom.select(&result_item_query); 145 | 146 | for result in results { 147 | let title = title_query_method.call(&result)?; 148 | let url = href_query_method.call_with_css_selector_override(&result, |el, s| { 149 | el.select(&Selector::parse(s).unwrap()).next().map(|n| { 150 | n.value() 151 | .attr("href") 152 | .map_or_else(|| n.text().collect::(), str::to_string) 153 | }) 154 | })?; 155 | let description = description_query_method.call(&result)?; 156 | trace!("url: {url}, title: {title}, description: {description}"); 157 | trace!("result: {:?}", result.value().classes().collect::>()); 158 | 159 | // this can happen on google if you search "roll d6" 160 | let is_empty = description.is_empty() && title.is_empty(); 161 | if is_empty { 162 | trace!("empty content for {url} ({title}), skipping"); 163 | continue; 164 | } 165 | 166 | // this can happen on google if it gives you a featured snippet 167 | if description.is_empty() { 168 | trace!("empty description for {url} ({title}), skipping"); 169 | continue; 170 | } 171 | 172 | let url = normalize_url(&url); 173 | 174 | search_results.push(EngineSearchResult { 175 | url, 176 | title, 177 | description, 178 | }); 179 | } 180 | 181 | let featured_snippet = if featured_snippet_query.is_empty() { 182 | None 183 | } else if let Some(featured_snippet) = dom 184 | .select(&Selector::parse(featured_snippet_query).unwrap()) 185 | .next() 186 | { 187 | let title = featured_snippet_title_query_method.call(&featured_snippet)?; 188 | let url = featured_snippet_href_query_method.call(&featured_snippet)?; 189 | let url = normalize_url(&url); 190 | let description = featured_snippet_description_query_method.call(&featured_snippet)?; 191 | 192 | // this can happen on google if you search "what's my user agent" 193 | let is_empty = description.is_empty() && title.is_empty(); 194 | if is_empty { 195 | None 196 | } else { 197 | Some(EngineFeaturedSnippet { 198 | url, 199 | title, 200 | description, 201 | }) 202 | } 203 | } else { 204 | None 205 | }; 206 | 207 | Ok(EngineResponse { 208 | search_results, 209 | featured_snippet, 210 | // these fields are used by instant answers, not normal search engines 211 | answer_html: None, 212 | infobox_html: None, 213 | }) 214 | } 215 | -------------------------------------------------------------------------------- /src/engines/answer/fend.rs: -------------------------------------------------------------------------------- 1 | use fend_core::SpanKind; 2 | use maud::{html, PreEscaped}; 3 | use std::sync::{atomic::AtomicU32, atomic::Ordering, LazyLock}; 4 | 5 | use crate::engines::EngineResponse; 6 | 7 | use super::regex; 8 | 9 | pub async fn request(query: &str) -> EngineResponse { 10 | let query = clean_query(query); 11 | 12 | let Some(result_html) = evaluate_to_html(&query, true) else { 13 | return EngineResponse::new(); 14 | }; 15 | 16 | EngineResponse::answer_html(html! { 17 | p.answer-query { (query) " =" } 18 | h3 { b { (result_html) } } 19 | }) 20 | } 21 | 22 | pub fn request_autocomplete(query: &str) -> Vec { 23 | let mut results = Vec::new(); 24 | 25 | let query = clean_query(query); 26 | 27 | if let Some(result) = evaluate_to_plaintext(&query, false) { 28 | results.push(format!("= {result}")); 29 | } 30 | 31 | results 32 | } 33 | 34 | fn clean_query(query: &str) -> String { 35 | query.strip_suffix('=').unwrap_or(query).trim().to_string() 36 | } 37 | 38 | #[derive(Debug)] 39 | pub struct Span { 40 | pub text: String, 41 | pub kind: SpanKind, 42 | } 43 | 44 | fn evaluate_to_plaintext(query: &str, html: bool) -> Option { 45 | let spans = evaluate_into_spans(query, html); 46 | if spans.is_empty() { 47 | return None; 48 | } 49 | 50 | Some( 51 | spans 52 | .iter() 53 | .map(|span| span.text.clone()) 54 | .collect::(), 55 | ) 56 | } 57 | 58 | fn evaluate_to_html(query: &str, html: bool) -> Option> { 59 | let spans = evaluate_into_spans(query, html); 60 | if spans.is_empty() { 61 | return None; 62 | } 63 | 64 | let mut result_html = String::new(); 65 | for span in &spans { 66 | let class = match span.kind { 67 | fend_core::SpanKind::Number 68 | | fend_core::SpanKind::Boolean 69 | | fend_core::SpanKind::Date => "answer-calc-constant", 70 | fend_core::SpanKind::String => "answer-calc-string", 71 | _ => "", 72 | }; 73 | if class.is_empty() { 74 | result_html.push_str(&html! { (span.text) }.into_string()); 75 | } else { 76 | result_html.push_str( 77 | &html! { 78 | span.(class) { 79 | (span.text) 80 | } 81 | } 82 | .into_string(), 83 | ); 84 | } 85 | } 86 | 87 | // if the result was a single hex number then we add the decimal equivalent 88 | // below 89 | if spans.len() == 1 90 | && spans[0].kind == fend_core::SpanKind::Number 91 | && spans[0].text.starts_with("0x") 92 | { 93 | let hex = spans[0].text.trim_start_matches("0x"); 94 | if let Ok(num) = u64::from_str_radix(hex, 16) { 95 | result_html.push_str( 96 | &html! { 97 | span.answer-comment { " = " (num) } 98 | } 99 | .into_string(), 100 | ); 101 | } 102 | } 103 | 104 | Some(PreEscaped(result_html)) 105 | } 106 | 107 | pub static FEND_CTX: LazyLock = LazyLock::new(|| { 108 | let mut context = fend_core::Context::new(); 109 | 110 | // make lowercase f and c work 111 | context.define_custom_unit_v1("f", "f", "°F", &fend_core::CustomUnitAttribute::Alias); 112 | context.define_custom_unit_v1("c", "c", "°C", &fend_core::CustomUnitAttribute::Alias); 113 | 114 | context.define_custom_unit_v1( 115 | "mb", 116 | "mbs", 117 | "megabyte", 118 | &fend_core::CustomUnitAttribute::Alias, 119 | ); 120 | context.define_custom_unit_v1( 121 | "gb", 122 | "gbs", 123 | "gigabyte", 124 | &fend_core::CustomUnitAttribute::Alias, 125 | ); 126 | context.define_custom_unit_v1( 127 | "tb", 128 | "tbs", 129 | "terabyte", 130 | &fend_core::CustomUnitAttribute::Alias, 131 | ); 132 | context.define_custom_unit_v1( 133 | "pb", 134 | "pbs", 135 | "petabyte", 136 | &fend_core::CustomUnitAttribute::Alias, 137 | ); 138 | 139 | // make random work 140 | context.set_random_u32_fn(rand::random::); 141 | 142 | fend_core::evaluate("ord=(x: x to codepoint)", &mut context).unwrap(); 143 | fend_core::evaluate("chr=(x: x to character)", &mut context).unwrap(); 144 | 145 | context 146 | }); 147 | 148 | struct Interrupter { 149 | invocations_left: AtomicU32, 150 | } 151 | 152 | impl fend_core::Interrupt for Interrupter { 153 | fn should_interrupt(&self) -> bool { 154 | let v = self.invocations_left.load(Ordering::Relaxed); 155 | 156 | if v == 0 { 157 | return true; 158 | } 159 | 160 | self.invocations_left.store(v - 1, Ordering::Relaxed); 161 | false 162 | } 163 | } 164 | 165 | fn evaluate_into_spans(query: &str, multiline: bool) -> Vec { 166 | // fend incorrectly triggers on these often 167 | { 168 | // at least 3 characters and not one of the short constants 169 | if query.len() < 3 && !matches!(query.to_lowercase().as_str(), "pi" | "e" | "c") { 170 | return vec![]; 171 | } 172 | 173 | // probably a query operator thing or a url, fend evaluates these but it 174 | // shouldn't 175 | if regex!("^[a-z]{2,}:").is_match(query) { 176 | return vec![]; 177 | } 178 | 179 | // if it starts and ends with quotes then the person was just searching in 180 | // quotes and didn't mean to evaluate a string 181 | if query.starts_with('"') 182 | && query.ends_with('"') 183 | && query.chars().filter(|c| *c == '"').count() == 2 184 | { 185 | return vec![]; 186 | } 187 | } 188 | 189 | let mut context = FEND_CTX.clone(); 190 | if multiline { 191 | // this makes it generate slightly nicer outputs for some queries like 2d6 192 | context.set_output_mode_terminal(); 193 | } 194 | 195 | // avoids stackoverflows and queries that take too long 196 | // examples: 197 | // - Y = (\f. (\x. f x x)) (\x. f x x); Y(Y) 198 | // - 10**100000000 199 | let interrupt = Interrupter { 200 | invocations_left: AtomicU32::new(1000), 201 | }; 202 | let Ok(result) = fend_core::evaluate_with_interrupt(query, &mut context, &interrupt) else { 203 | return vec![]; 204 | }; 205 | let main_result = result.get_main_result(); 206 | if main_result == query { 207 | return vec![]; 208 | } 209 | 210 | let res = result 211 | .get_main_result_spans() 212 | .filter(|span| !span.string().is_empty()) 213 | .map(|span| Span { 214 | text: span.string().to_string(), 215 | kind: span.kind(), 216 | }) 217 | .collect::>(); 218 | 219 | if let Some(first) = res.first() { 220 | if first.kind == SpanKind::Other && first.text.starts_with("\\") { 221 | // false positive, can happen if you search like "a: b" 222 | return vec![]; 223 | } 224 | } 225 | 226 | res 227 | } 228 | -------------------------------------------------------------------------------- /src/engines/answer/numbat.rs: -------------------------------------------------------------------------------- 1 | use std::{collections::HashSet, sync::LazyLock}; 2 | 3 | use fend_core::SpanKind; 4 | use maud::{html, PreEscaped}; 5 | use numbat::{ 6 | markup::{FormatType, FormattedString, Markup}, 7 | pretty_print::PrettyPrint, 8 | resolver::CodeSource, 9 | InterpreterResult, InterpreterSettings, Statement, 10 | }; 11 | use tracing::debug; 12 | 13 | use crate::engines::EngineResponse; 14 | 15 | pub async fn request(query: &str) -> EngineResponse { 16 | let query = clean_query(query); 17 | 18 | let Some(NumbatResponse { 19 | query_html, 20 | result_html, 21 | }) = evaluate(&query) 22 | else { 23 | return EngineResponse::new(); 24 | }; 25 | 26 | EngineResponse::answer_html(html! { 27 | p.answer-query { (query_html) " =" } 28 | h3 { b { (result_html) } } 29 | }) 30 | } 31 | 32 | pub fn request_autocomplete(query: &str) -> Vec { 33 | let mut results = Vec::new(); 34 | 35 | let query = clean_query(query); 36 | 37 | if let Some(result) = evaluate_for_autocomplete(&query) { 38 | results.push(format!("= {result}")); 39 | } 40 | 41 | results 42 | } 43 | 44 | fn clean_query(query: &str) -> String { 45 | query.strip_suffix('=').unwrap_or(query).trim().to_string() 46 | } 47 | 48 | #[derive(Debug)] 49 | pub struct Span { 50 | pub text: String, 51 | pub kind: SpanKind, 52 | } 53 | 54 | fn is_potential_request(query: &str) -> bool { 55 | // allow these short constants, they're fine 56 | if matches!(query.to_lowercase().as_str(), "pi" | "e" | "c") { 57 | return true; 58 | } 59 | 60 | // at least 3 characters 61 | if query.len() < 3 { 62 | return false; 63 | } 64 | 65 | // must have numbers 66 | if !query.chars().any(|c| c.is_numeric()) { 67 | return false; 68 | } 69 | 70 | // if it starts and ends with quotes then the person was just searching in 71 | // quotes and didn't mean to evaluate a string 72 | if query.starts_with('"') 73 | && query.ends_with('"') 74 | && query.chars().filter(|c| *c == '"').count() == 2 75 | { 76 | return false; 77 | } 78 | 79 | true 80 | } 81 | 82 | fn interpret(query: &str) -> Option<(Statement<'_>, Markup)> { 83 | if !is_potential_request(query) { 84 | return None; 85 | } 86 | 87 | let mut ctx = NUMBAT_CTX.clone(); 88 | 89 | let (statements, res) = match ctx.interpret_with_settings( 90 | &mut InterpreterSettings { 91 | print_fn: Box::new(move |_: &Markup| {}), 92 | }, 93 | query, 94 | CodeSource::Text, 95 | ) { 96 | Ok(r) => r, 97 | Err(err) => { 98 | debug!("numbat error: {err}"); 99 | return None; 100 | } 101 | }; 102 | 103 | let res_markup = match res { 104 | InterpreterResult::Value(val) => val.pretty_print(), 105 | InterpreterResult::Continue => return None, 106 | }; 107 | if res_markup.to_string().trim() == query { 108 | return None; 109 | } 110 | let res_markup = fix_markup(res_markup); 111 | 112 | Some((statements.into_iter().next_back()?, res_markup)) 113 | } 114 | 115 | fn evaluate_for_autocomplete(query: &str) -> Option { 116 | let (_statements, res_markup) = interpret(query)?; 117 | 118 | Some(res_markup.to_string().trim().to_string()) 119 | } 120 | 121 | pub struct NumbatResponse { 122 | pub query_html: PreEscaped, 123 | pub result_html: PreEscaped, 124 | } 125 | 126 | fn evaluate(query: &str) -> Option { 127 | let (statement, res_markup) = interpret(query)?; 128 | 129 | let statement_markup = fix_markup(statement.pretty_print()); 130 | let query_html = markup_to_html(statement_markup); 131 | let result_html = markup_to_html(res_markup); 132 | 133 | Some(NumbatResponse { 134 | query_html, 135 | result_html, 136 | }) 137 | } 138 | 139 | fn fix_markup(markup: Markup) -> Markup { 140 | let mut reordered_markup: Vec = Vec::new(); 141 | const LEFT_SIDE_UNITS: &[&str] = &["$", "€", "£", "¥"]; 142 | for s in markup.0 { 143 | let FormattedString(_output_type, format_type, content) = s.clone(); 144 | 145 | if format_type == FormatType::Unit && LEFT_SIDE_UNITS.contains(&&*content) { 146 | // remove the last markup if it's whitespace 147 | if let Some(FormattedString(_, FormatType::Whitespace, _)) = reordered_markup.last() { 148 | reordered_markup.pop(); 149 | } 150 | reordered_markup.insert(reordered_markup.len() - 1, s); 151 | } else { 152 | reordered_markup.push(s); 153 | } 154 | } 155 | Markup(reordered_markup) 156 | } 157 | 158 | fn markup_to_html(markup: Markup) -> PreEscaped { 159 | let mut html = String::new(); 160 | for FormattedString(_, format_type, content) in markup.0 { 161 | let class = match format_type { 162 | FormatType::Value => "answer-calc-constant", 163 | FormatType::String => "answer-calc-string", 164 | FormatType::Identifier => "answer-calc-func", 165 | _ => "", 166 | }; 167 | if class.is_empty() { 168 | html.push_str(&html! {(content)}.into_string()); 169 | } else { 170 | html.push_str( 171 | &html! { 172 | span.(class) { (content) } 173 | } 174 | .into_string(), 175 | ); 176 | } 177 | } 178 | PreEscaped(html) 179 | } 180 | 181 | pub static NUMBAT_CTX: LazyLock = LazyLock::new(|| { 182 | let mut ctx = numbat::Context::new(numbat::module_importer::BuiltinModuleImporter {}); 183 | let _ = ctx.interpret("use prelude", CodeSource::Internal); 184 | let _ = ctx.interpret("use units::currencies", CodeSource::Internal); 185 | 186 | ctx.load_currency_module_on_demand(true); 187 | 188 | // a few hardcoded aliases 189 | // (the lowercase alias code won't work for these because they have prefixes) 190 | for (alias, canonical) in &[ 191 | ("kb", "kB"), 192 | ("kib", "KiB"), 193 | ("mb", "MB"), 194 | ("mib", "MiB"), 195 | ("gb", "GB"), 196 | ("gib", "GiB"), 197 | ("tb", "TB"), 198 | ("tib", "TiB"), 199 | ("pb", "PB"), 200 | ("pib", "PiB"), 201 | ] { 202 | let _ = ctx.interpret(&format!("let {alias} = {canonical}"), CodeSource::Internal); 203 | } 204 | 205 | // lowercase aliases (so for example usd and USD are the same unit) 206 | 207 | let mut unit_names = HashSet::new(); 208 | for names in ctx.unit_names() { 209 | unit_names.extend(names.iter().map(|name| name.to_owned())); 210 | } 211 | 212 | for name in &unit_names { 213 | // taken_unit_names.insert(alias_name); 214 | let name_lower = name.to_lowercase(); 215 | // add every lowercase aliases for every unit as long as that alias isn't 216 | // already taken 217 | if !unit_names.contains(&name_lower) { 218 | let _ = ctx.interpret(&format!("let {name_lower} = {name}"), CodeSource::Internal); 219 | } 220 | } 221 | 222 | ctx 223 | }); 224 | -------------------------------------------------------------------------------- /src/web/assets/script.js: -------------------------------------------------------------------------------- 1 | const searchInputEl = document.getElementById("search-input"); 2 | 3 | if (searchInputEl) { 4 | // add an element with search suggestions after the search input 5 | const suggestionsEl = document.createElement("div"); 6 | suggestionsEl.id = "search-input-suggestions"; 7 | suggestionsEl.style.visibility = "hidden"; 8 | searchInputEl.insertAdjacentElement("afterend", suggestionsEl); 9 | 10 | let lastValue = ""; 11 | let nextQueryId = 0; 12 | let lastLoadedQueryId = -1; 13 | async function updateSuggestions() { 14 | const value = searchInputEl.value; 15 | 16 | if (value === "") { 17 | suggestionsEl.style.visibility = "hidden"; 18 | nextQueryId++; 19 | lastLoadedQueryId = nextQueryId; 20 | return; 21 | } 22 | 23 | if (value === lastValue) { 24 | suggestionsEl.style.visibility = "visible"; 25 | return; 26 | } 27 | lastValue = value; 28 | 29 | const thisQueryId = nextQueryId; 30 | nextQueryId++; 31 | 32 | const res = await fetch( 33 | `/autocomplete?q=${encodeURIComponent(value)}` 34 | ).then((res) => res.json()); 35 | const options = res[1]; 36 | 37 | // this makes sure we don't load suggestions out of order 38 | if (thisQueryId < lastLoadedQueryId) { 39 | return; 40 | } 41 | lastLoadedQueryId = thisQueryId; 42 | 43 | renderSuggestions(options); 44 | } 45 | 46 | function renderSuggestions(options) { 47 | if (options.length === 0) { 48 | suggestionsEl.style.visibility = "hidden"; 49 | return; 50 | } 51 | 52 | suggestionsEl.style.visibility = "visible"; 53 | suggestionsEl.innerHTML = ""; 54 | options.forEach((option) => { 55 | const optionEl = document.createElement("div"); 56 | optionEl.textContent = option; 57 | optionEl.className = "search-input-suggestion"; 58 | suggestionsEl.appendChild(optionEl); 59 | 60 | optionEl.addEventListener("mousedown", () => { 61 | searchInputEl.value = option; 62 | searchInputEl.focus(); 63 | searchInputEl.form.submit(); 64 | }); 65 | }); 66 | } 67 | 68 | let focusedSuggestionIndex = -1; 69 | let focusedSuggestionEl = null; 70 | 71 | function clearFocusedSuggestion() { 72 | if (focusedSuggestionEl) { 73 | focusedSuggestionEl.classList.remove("focused"); 74 | focusedSuggestionEl = null; 75 | focusedSuggestionIndex = -1; 76 | } 77 | } 78 | 79 | function focusSelectionIndex(index) { 80 | clearFocusedSuggestion(); 81 | focusedSuggestionIndex = index; 82 | focusedSuggestionEl = suggestionsEl.children[focusedSuggestionIndex]; 83 | focusedSuggestionEl.classList.add("focused"); 84 | searchInputEl.value = focusedSuggestionEl.textContent; 85 | } 86 | 87 | document.addEventListener("keydown", (e) => { 88 | // if any modifier keys are pressed, ignore all this 89 | if (e.ctrlKey || e.metaKey || e.altKey || e.shiftKey) { 90 | return; 91 | } 92 | 93 | // if it's focused then use different keybinds 94 | if (searchInputEl.matches(":focus")) { 95 | if (e.key === "ArrowDown") { 96 | e.preventDefault(); 97 | if (focusedSuggestionIndex === -1) { 98 | focusSelectionIndex(0); 99 | } else if (focusedSuggestionIndex < suggestionsEl.children.length - 1) { 100 | focusSelectionIndex(focusedSuggestionIndex + 1); 101 | } else { 102 | focusSelectionIndex(0); 103 | } 104 | } else if (e.key === "ArrowUp") { 105 | e.preventDefault(); 106 | if (focusedSuggestionIndex === -1) { 107 | focusSelectionIndex(suggestionsEl.children.length - 1); 108 | } else if (focusedSuggestionIndex > 0) { 109 | focusSelectionIndex(focusedSuggestionIndex - 1); 110 | } else { 111 | focusSelectionIndex(suggestionsEl.children.length - 1); 112 | } 113 | } else if (e.key === "Escape") { 114 | clearFocusedSuggestion(); 115 | suggestionsEl.style.visibility = "hidden"; 116 | } 117 | 118 | return; 119 | } 120 | 121 | // if the currently selected element is not the search bar and is contenteditable, don't do anything 122 | const focusedEl = document.querySelector(":focus"); 123 | if ( 124 | focusedEl && 125 | (focusedEl.tagName.toLowerCase() == "input" || 126 | focusedEl.tagName.toLowerCase() == "textarea" || 127 | focusedEl.getAttribute("contenteditable") !== null) 128 | ) 129 | return; 130 | 131 | // if the user starts typing but they don't have focus on the input, focus it 132 | 133 | // must be a letter or number 134 | if (e.key.match(/^[a-z0-9]$/i)) { 135 | searchInputEl.focus(); 136 | } 137 | // right arrow key focuses it at the end 138 | else if (e.key === "ArrowRight") { 139 | searchInputEl.focus(); 140 | searchInputEl.setSelectionRange( 141 | searchInputEl.value.length, 142 | searchInputEl.value.length 143 | ); 144 | } 145 | // left arrow key focuses it at the beginning 146 | else if (e.key === "ArrowLeft") { 147 | searchInputEl.focus(); 148 | searchInputEl.setSelectionRange(0, 0); 149 | } 150 | // backspace key focuses it at the end 151 | else if (e.key === "Backspace") { 152 | searchInputEl.focus(); 153 | searchInputEl.setSelectionRange( 154 | searchInputEl.value.length, 155 | searchInputEl.value.length 156 | ); 157 | } 158 | }); 159 | 160 | // update the input suggestions on input 161 | searchInputEl.addEventListener("input", () => { 162 | clearFocusedSuggestion(); 163 | updateSuggestions(); 164 | }); 165 | // and when they click suggestions 166 | searchInputEl.addEventListener("click", updateSuggestions); 167 | // on unfocus hide the suggestions 168 | searchInputEl.addEventListener("blur", (e) => { 169 | suggestionsEl.style.visibility = "hidden"; 170 | }); 171 | } 172 | 173 | const customCssEl = document.getElementById("custom-css"); 174 | if (customCssEl) { 175 | // tab to indent 176 | // https://stackoverflow.com/a/6637396 177 | customCssEl.addEventListener("keydown", (e) => { 178 | if (e.key == "Tab") { 179 | e.preventDefault(); 180 | var start = customCssEl.selectionStart; 181 | var end = customCssEl.selectionEnd; 182 | customCssEl.value = 183 | customCssEl.value.substring(0, start) + 184 | "\t" + 185 | customCssEl.value.substring(end); 186 | customCssEl.selectionStart = customCssEl.selectionEnd = start + 1; 187 | } 188 | }); 189 | 190 | // ctrl+enter anywhere on the page to submit 191 | const saveEl = document.getElementById("save-settings-button"); 192 | document.addEventListener("keydown", (e) => { 193 | if (e.key == "Enter" && (e.ctrlKey || e.metaKey)) { 194 | e.preventDefault(); 195 | console.log("click"); 196 | saveEl.click(); 197 | } 198 | }); 199 | 200 | // save whether the details are open or not 201 | const customCssDetailsEl = document.getElementById("custom-css-details"); 202 | const customCssDetailsOpen = localStorage.getItem("custom-css-details-open"); 203 | if (customCssDetailsOpen === "true") customCssDetailsEl.open = true; 204 | customCssDetailsEl.addEventListener("toggle", () => { 205 | localStorage.setItem("custom-css-details-open", customCssDetailsEl.open); 206 | }); 207 | } 208 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Creative Commons Legal Code 2 | 3 | CC0 1.0 Universal 4 | 5 | CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE 6 | LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN 7 | ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS 8 | INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES 9 | REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS 10 | PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM 11 | THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED 12 | HEREUNDER. 13 | 14 | Statement of Purpose 15 | 16 | The laws of most jurisdictions throughout the world automatically confer 17 | exclusive Copyright and Related Rights (defined below) upon the creator 18 | and subsequent owner(s) (each and all, an "owner") of an original work of 19 | authorship and/or a database (each, a "Work"). 20 | 21 | Certain owners wish to permanently relinquish those rights to a Work for 22 | the purpose of contributing to a commons of creative, cultural and 23 | scientific works ("Commons") that the public can reliably and without fear 24 | of later claims of infringement build upon, modify, incorporate in other 25 | works, reuse and redistribute as freely as possible in any form whatsoever 26 | and for any purposes, including without limitation commercial purposes. 27 | These owners may contribute to the Commons to promote the ideal of a free 28 | culture and the further production of creative, cultural and scientific 29 | works, or to gain reputation or greater distribution for their Work in 30 | part through the use and efforts of others. 31 | 32 | For these and/or other purposes and motivations, and without any 33 | expectation of additional consideration or compensation, the person 34 | associating CC0 with a Work (the "Affirmer"), to the extent that he or she 35 | is an owner of Copyright and Related Rights in the Work, voluntarily 36 | elects to apply CC0 to the Work and publicly distribute the Work under its 37 | terms, with knowledge of his or her Copyright and Related Rights in the 38 | Work and the meaning and intended legal effect of CC0 on those rights. 39 | 40 | 1. Copyright and Related Rights. A Work made available under CC0 may be 41 | protected by copyright and related or neighboring rights ("Copyright and 42 | Related Rights"). Copyright and Related Rights include, but are not 43 | limited to, the following: 44 | 45 | i. the right to reproduce, adapt, distribute, perform, display, 46 | communicate, and translate a Work; 47 | ii. moral rights retained by the original author(s) and/or performer(s); 48 | iii. publicity and privacy rights pertaining to a person's image or 49 | likeness depicted in a Work; 50 | iv. rights protecting against unfair competition in regards to a Work, 51 | subject to the limitations in paragraph 4(a), below; 52 | v. rights protecting the extraction, dissemination, use and reuse of data 53 | in a Work; 54 | vi. database rights (such as those arising under Directive 96/9/EC of the 55 | European Parliament and of the Council of 11 March 1996 on the legal 56 | protection of databases, and under any national implementation 57 | thereof, including any amended or successor version of such 58 | directive); and 59 | vii. other similar, equivalent or corresponding rights throughout the 60 | world based on applicable law or treaty, and any national 61 | implementations thereof. 62 | 63 | 2. Waiver. To the greatest extent permitted by, but not in contravention 64 | of, applicable law, Affirmer hereby overtly, fully, permanently, 65 | irrevocably and unconditionally waives, abandons, and surrenders all of 66 | Affirmer's Copyright and Related Rights and associated claims and causes 67 | of action, whether now known or unknown (including existing as well as 68 | future claims and causes of action), in the Work (i) in all territories 69 | worldwide, (ii) for the maximum duration provided by applicable law or 70 | treaty (including future time extensions), (iii) in any current or future 71 | medium and for any number of copies, and (iv) for any purpose whatsoever, 72 | including without limitation commercial, advertising or promotional 73 | purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each 74 | member of the public at large and to the detriment of Affirmer's heirs and 75 | successors, fully intending that such Waiver shall not be subject to 76 | revocation, rescission, cancellation, termination, or any other legal or 77 | equitable action to disrupt the quiet enjoyment of the Work by the public 78 | as contemplated by Affirmer's express Statement of Purpose. 79 | 80 | 3. Public License Fallback. Should any part of the Waiver for any reason 81 | be judged legally invalid or ineffective under applicable law, then the 82 | Waiver shall be preserved to the maximum extent permitted taking into 83 | account Affirmer's express Statement of Purpose. In addition, to the 84 | extent the Waiver is so judged Affirmer hereby grants to each affected 85 | person a royalty-free, non transferable, non sublicensable, non exclusive, 86 | irrevocable and unconditional license to exercise Affirmer's Copyright and 87 | Related Rights in the Work (i) in all territories worldwide, (ii) for the 88 | maximum duration provided by applicable law or treaty (including future 89 | time extensions), (iii) in any current or future medium and for any number 90 | of copies, and (iv) for any purpose whatsoever, including without 91 | limitation commercial, advertising or promotional purposes (the 92 | "License"). The License shall be deemed effective as of the date CC0 was 93 | applied by Affirmer to the Work. Should any part of the License for any 94 | reason be judged legally invalid or ineffective under applicable law, such 95 | partial invalidity or ineffectiveness shall not invalidate the remainder 96 | of the License, and in such case Affirmer hereby affirms that he or she 97 | will not (i) exercise any of his or her remaining Copyright and Related 98 | Rights in the Work or (ii) assert any associated claims and causes of 99 | action with respect to the Work, in either case contrary to Affirmer's 100 | express Statement of Purpose. 101 | 102 | 4. Limitations and Disclaimers. 103 | 104 | a. No trademark or patent rights held by Affirmer are waived, abandoned, 105 | surrendered, licensed or otherwise affected by this document. 106 | b. Affirmer offers the Work as-is and makes no representations or 107 | warranties of any kind concerning the Work, express, implied, 108 | statutory or otherwise, including without limitation warranties of 109 | title, merchantability, fitness for a particular purpose, non 110 | infringement, or the absence of latent or other defects, accuracy, or 111 | the present or absence of errors, whether or not discoverable, all to 112 | the greatest extent permissible under applicable law. 113 | c. Affirmer disclaims responsibility for clearing rights of other persons 114 | that may apply to the Work or any use thereof, including without 115 | limitation any person's Copyright and Related Rights in the Work. 116 | Further, Affirmer disclaims responsibility for obtaining any necessary 117 | consents, permissions or other rights required for any use of the 118 | Work. 119 | d. Affirmer understands and acknowledges that Creative Commons is not a 120 | party to this document and has no duty or obligation with respect to 121 | this CC0 or use of the Work. 122 | -------------------------------------------------------------------------------- /src/engines/answer/thesaurus.rs: -------------------------------------------------------------------------------- 1 | use eyre::eyre; 2 | use maud::{html, PreEscaped}; 3 | use scraper::{Html, Selector}; 4 | use serde::Deserialize; 5 | use tracing::error; 6 | use url::Url; 7 | 8 | use crate::engines::{EngineResponse, RequestResponse, CLIENT}; 9 | 10 | use super::regex; 11 | 12 | pub async fn request(query: &str) -> RequestResponse { 13 | let re = regex!(r"^synonym(?:s?) for\s+(\w+)$"); 14 | let re2 = regex!(r"^(\w+)\s+synonym(?:s?)$"); 15 | let Some(query) = re 16 | .captures(query) 17 | .and_then(|m| m.get(1)) 18 | .or_else(|| re2.captures(query).and_then(|m| m.get(1))) 19 | else { 20 | return RequestResponse::None; 21 | }; 22 | let query = query.as_str().to_lowercase(); 23 | 24 | CLIENT 25 | .get( 26 | Url::parse( 27 | format!( 28 | "https://www.thesaurus.com/browse/{}", 29 | urlencoding::encode(&query.to_lowercase()) 30 | ) 31 | .as_str(), 32 | ) 33 | .unwrap(), 34 | ) 35 | .into() 36 | } 37 | 38 | #[derive(Debug, Deserialize)] 39 | pub struct ThesaurusResponse { 40 | /// Example: `silly` 41 | pub word: String, 42 | pub items: Vec, 43 | } 44 | 45 | #[derive(Debug, Deserialize)] 46 | pub struct ThesaurusItem { 47 | /// Example `adjective` 48 | pub part_of_speech: String, 49 | /// Example: `absurd, giddy, foolish` 50 | pub as_in: String, 51 | 52 | pub strongest_matches: Vec, 53 | pub strong_matches: Vec, 54 | pub weak_matches: Vec, 55 | } 56 | 57 | pub fn parse_response(body: &str) -> eyre::Result { 58 | let response = parse_thesaurus_com_response(body)?; 59 | 60 | if response.items.is_empty() { 61 | return Ok(EngineResponse::new()); 62 | } 63 | 64 | let rendered_html = render_thesaurus_html(response); 65 | 66 | Ok(EngineResponse::answer_html(rendered_html)) 67 | } 68 | 69 | fn parse_thesaurus_com_response(body: &str) -> eyre::Result { 70 | let dom = Html::parse_document(body); 71 | 72 | let word = dom 73 | .select(&Selector::parse("h1").unwrap()) 74 | .next() 75 | .ok_or_else(|| eyre!("No title found"))? 76 | .text() 77 | .collect::(); 78 | 79 | let card_sel = Selector::parse("[data-type='synonym-and-antonym-card']").unwrap(); 80 | let card_els = dom.select(&card_sel); 81 | 82 | let mut items = Vec::::new(); 83 | 84 | for synonym_and_antonym_card_el in card_els { 85 | items.push(parse_thesaurus_com_item(synonym_and_antonym_card_el)?); 86 | } 87 | 88 | Ok(ThesaurusResponse { word, items }) 89 | } 90 | 91 | fn parse_thesaurus_com_item( 92 | synonym_and_antonym_card_el: scraper::ElementRef, 93 | ) -> eyre::Result { 94 | let adjective_as_in_words = synonym_and_antonym_card_el 95 | .select(&Selector::parse("div:first-child > p").unwrap()) 96 | .next() 97 | .ok_or_else(|| eyre!("No adjective as in words found"))? 98 | .text() 99 | .collect::(); 100 | let (part_of_speech, as_in) = adjective_as_in_words 101 | .split_once(" as in ") 102 | .ok_or_else(|| eyre!("No 'as in' found"))?; 103 | let part_of_speech = part_of_speech.trim().to_owned(); 104 | let as_in = as_in.trim().to_owned(); 105 | 106 | let matches_container_el = synonym_and_antonym_card_el 107 | .select(&Selector::parse("div:nth-child(2) > div:nth-child(2)").unwrap()) 108 | .next() 109 | .ok_or_else(|| eyre!("No matches container found"))?; 110 | 111 | let mut strongest_matches = Vec::::new(); 112 | let mut strong_matches = Vec::::new(); 113 | let mut weak_matches = Vec::::new(); 114 | 115 | for match_el in matches_container_el.select(&Selector::parse("div").unwrap()) { 116 | let match_type = match_el 117 | .select(&Selector::parse("p").unwrap()) 118 | .next() 119 | .ok_or_else(|| eyre!("No match type found"))? 120 | .text() 121 | .collect::(); 122 | let match_type = match_type 123 | .split(' ') 124 | .next() 125 | .ok_or_else(|| eyre!("No match type found"))?; 126 | 127 | let matches = match_el 128 | .select(&Selector::parse("a").unwrap()) 129 | .map(|el| el.text().collect::()) 130 | .collect::>(); 131 | 132 | match match_type { 133 | "Strongest" => { 134 | strongest_matches = matches; 135 | } 136 | "Strong" => { 137 | strong_matches = matches; 138 | } 139 | "Weak" => { 140 | weak_matches = matches; 141 | } 142 | _ => { 143 | error!("Unknown thesaurus match type: {match_type}"); 144 | } 145 | } 146 | } 147 | 148 | Ok(ThesaurusItem { 149 | part_of_speech, 150 | as_in, 151 | strongest_matches, 152 | strong_matches, 153 | weak_matches, 154 | }) 155 | } 156 | 157 | fn render_thesaurus_html( 158 | ThesaurusResponse { word, items }: ThesaurusResponse, 159 | ) -> PreEscaped { 160 | html! { 161 | h2.answer-thesaurus-word { 162 | a href={ "https://www.thesaurus.com/browse/" (word) } { 163 | (word) 164 | } 165 | } 166 | div.answer-thesaurus-items { 167 | @for item in items { 168 | div.answer-thesaurus-item { 169 | (render_thesaurus_item_html(item)) 170 | } 171 | } 172 | } 173 | 174 | } 175 | } 176 | 177 | fn render_thesaurus_item_html( 178 | ThesaurusItem { 179 | part_of_speech, 180 | as_in, 181 | strongest_matches, 182 | strong_matches, 183 | weak_matches, 184 | }: ThesaurusItem, 185 | ) -> PreEscaped { 186 | let mut html = String::new(); 187 | 188 | html.push_str( 189 | &html! { 190 | span.answer-thesaurus-word-description { 191 | span.answer-thesaurus-part-of-speech { (part_of_speech.to_lowercase()) } 192 | ", as in " 193 | span.answer-thesaurus-as-in { (as_in) } 194 | } 195 | } 196 | .into_string(), 197 | ); 198 | 199 | let render_matches = |matches: Vec, strength: &str| { 200 | if matches.is_empty() { 201 | return PreEscaped::default(); 202 | } 203 | 204 | html! { 205 | div.{ "answer-thesaurus-" (strength.to_lowercase().replace(' ', "-")) } { 206 | h3.answer-thesaurus-category-title { 207 | (strength) 208 | " " 209 | (if matches.len() == 1 { "match" } else { "matches" }) 210 | } 211 | ul.answer-thesaurus-list { 212 | @for synonym in matches { 213 | li { 214 | a href={ "https://www.thesaurus.com/browse/" (synonym) } { (synonym) } 215 | } 216 | } 217 | } 218 | } 219 | } 220 | }; 221 | 222 | html.push_str(&render_matches(strongest_matches, "Strongest").into_string()); 223 | html.push_str(&render_matches(strong_matches, "Strong").into_string()); 224 | html.push_str(&render_matches(weak_matches, "Weak").into_string()); 225 | 226 | PreEscaped(html) 227 | } 228 | -------------------------------------------------------------------------------- /src/urls.rs: -------------------------------------------------------------------------------- 1 | use std::borrow::Cow; 2 | 3 | use tracing::{error, warn}; 4 | use url::Url; 5 | 6 | use crate::config::{HostAndPath, UrlsConfig}; 7 | 8 | #[tracing::instrument] 9 | pub fn normalize_url(url: &str) -> String { 10 | let url = url.trim_end_matches('#'); 11 | if url.is_empty() { 12 | warn!("url is empty"); 13 | return String::new(); 14 | } 15 | 16 | let Ok(mut url) = Url::parse(url) else { 17 | error!("failed to parse url"); 18 | return url.to_string(); 19 | }; 20 | 21 | // make sure the scheme is https 22 | if url.scheme() == "http" { 23 | url.set_scheme("https").unwrap(); 24 | } 25 | 26 | // remove fragment 27 | url.set_fragment(None); 28 | 29 | // remove trailing slash 30 | let path = url.path().to_string(); 31 | if let Some(path) = path.strip_suffix('/') { 32 | url.set_path(path); 33 | } 34 | 35 | // remove tracking params 36 | let query_pairs = url.query_pairs().into_owned(); 37 | let mut new_query_pairs = Vec::new(); 38 | const TRACKING_PARAMS: &[&str] = &["ref_src", "_sm_au_"]; 39 | for (key, value) in query_pairs { 40 | if !TRACKING_PARAMS.contains(&key.as_str()) { 41 | new_query_pairs.push((key, value)); 42 | } 43 | } 44 | if new_query_pairs.is_empty() { 45 | url.set_query(None); 46 | } else { 47 | url.set_query(Some( 48 | &url::form_urlencoded::Serializer::new(String::new()) 49 | .extend_pairs(new_query_pairs) 50 | .finish(), 51 | )); 52 | } 53 | 54 | // url decode and encode path 55 | let path = url.path().to_string(); 56 | let path = match urlencoding::decode(&path) { 57 | Ok(path) => path, 58 | Err(e) => { 59 | warn!("failed to decode path: {e}"); 60 | Cow::Owned(path) 61 | } 62 | }; 63 | url.set_path(path.as_ref()); 64 | 65 | let url = url.to_string(); 66 | // remove trailing slash 67 | let url = if let Some(url) = url.strip_suffix('/') { 68 | url.to_string() 69 | } else { 70 | url 71 | }; 72 | 73 | url 74 | } 75 | 76 | impl HostAndPath { 77 | pub fn contains(&self, host: &str, path: &str) -> bool { 78 | if self.host.starts_with('.') { 79 | if !host.ends_with(&self.host) { 80 | return false; 81 | } 82 | } else if host != self.host { 83 | return false; 84 | } 85 | 86 | if self.path.ends_with('/') || self.path.is_empty() { 87 | path.starts_with(&self.path) 88 | } else { 89 | path == self.path 90 | } 91 | } 92 | 93 | pub fn replace( 94 | replace_from: &HostAndPath, 95 | replace_with: &HostAndPath, 96 | real_url: &HostAndPath, 97 | ) -> Option<(String, String)> { 98 | let new_host = if replace_from.host.starts_with(".") { 99 | if replace_with.host.starts_with(".") { 100 | if let Some(host_without_suffix) = real_url.host.strip_suffix(&replace_from.host) { 101 | format!("{host_without_suffix}{}", replace_with.host) 102 | } else { 103 | return None; 104 | } 105 | } else if real_url.host.ends_with(&replace_from.host) { 106 | replace_with.host.to_owned() 107 | } else { 108 | return None; 109 | } 110 | } else if real_url.host == replace_from.host { 111 | replace_with.host.clone() 112 | } else { 113 | return None; 114 | }; 115 | 116 | // host matches, now check path 117 | 118 | let new_path = if replace_from.path.ends_with('/') || replace_from.path.is_empty() { 119 | if replace_with.path.ends_with('/') || replace_with.path.is_empty() { 120 | if let Some(path_without_prefix) = real_url.path.strip_prefix(&replace_from.path) { 121 | format!("{}{path_without_prefix}", replace_with.path) 122 | } else { 123 | return None; 124 | } 125 | } else if real_url.path.starts_with(&replace_from.path) { 126 | replace_with.path.clone() 127 | } else { 128 | return None; 129 | } 130 | } else if real_url.path == replace_from.path { 131 | replace_with.path.clone() 132 | } else { 133 | return None; 134 | }; 135 | 136 | Some((new_host, new_path)) 137 | } 138 | } 139 | 140 | pub fn apply_url_replacements(url: &str, urls_config: &UrlsConfig) -> String { 141 | let Ok(mut url) = Url::parse(url) else { 142 | error!("failed to parse url"); 143 | return url.to_string(); 144 | }; 145 | 146 | let host = url.host_str().unwrap_or_default().to_owned(); 147 | 148 | let path = url 149 | .path() 150 | .strip_prefix("/") 151 | .unwrap_or(url.path()) 152 | .to_owned(); 153 | let real_url = HostAndPath { host, path }; 154 | for (replace_from, replace_to) in &urls_config.replace { 155 | if let Some((new_host, new_path)) = 156 | HostAndPath::replace(replace_from, replace_to, &real_url) 157 | { 158 | let _ = url.set_host(Some(&new_host)); 159 | url.set_path(&new_path); 160 | break; 161 | } 162 | } 163 | 164 | normalize_url(url.as_ref()) 165 | } 166 | pub fn get_url_weight(url: &str, urls_config: &UrlsConfig) -> f64 { 167 | let Ok(url) = Url::parse(url) else { 168 | error!("failed to parse url"); 169 | return 1.; 170 | }; 171 | 172 | let host = url.host_str().unwrap_or_default().to_owned(); 173 | let path = url.path().strip_prefix("/").unwrap_or_default().to_owned(); 174 | for (check, weight) in &urls_config.weight { 175 | if check.contains(&host, &path) { 176 | return *weight; 177 | } 178 | } 179 | 180 | 1. 181 | } 182 | 183 | #[cfg(test)] 184 | mod tests { 185 | use crate::config::HostAndPath; 186 | 187 | use super::*; 188 | 189 | fn test_replacement(from: &str, to: &str, url: &str, expected: &str) { 190 | let urls_config = UrlsConfig { 191 | replace: vec![(HostAndPath::new(from), HostAndPath::new(to))], 192 | weight: vec![], 193 | }; 194 | let normalized_url = apply_url_replacements(url, &urls_config); 195 | assert_eq!(normalized_url, expected); 196 | } 197 | 198 | #[test] 199 | fn test_replace_url() { 200 | test_replacement( 201 | "minecraft.fandom.com/wiki/", 202 | "minecraft.wiki/w/", 203 | "https://minecraft.fandom.com/wiki/Java_Edition", 204 | "https://minecraft.wiki/w/Java_Edition", 205 | ); 206 | } 207 | #[test] 208 | fn test_replace_wildcard_host_with_absolute() { 209 | test_replacement( 210 | ".medium.com", 211 | "scribe.rip", 212 | "https://example.medium.com/asdf", 213 | "https://scribe.rip/asdf", 214 | ); 215 | } 216 | #[test] 217 | fn test_replace_wildcard_host_with_wildcard() { 218 | test_replacement( 219 | ".medium.com", 220 | ".scribe.rip", 221 | "https://example.medium.com/asdf", 222 | "https://example.scribe.rip/asdf", 223 | ); 224 | } 225 | #[test] 226 | fn test_non_matching_wildcard() { 227 | test_replacement( 228 | ".medium.com", 229 | ".scribe.rip", 230 | "https://medium.com/asdf", 231 | "https://medium.com/asdf", 232 | ); 233 | } 234 | #[test] 235 | fn test_non_matching_wildcard_to_absolute() { 236 | test_replacement( 237 | ".medium.com", 238 | "scribe.rip", 239 | "https://example.com/asdf", 240 | "https://example.com/asdf", 241 | ); 242 | } 243 | } 244 | -------------------------------------------------------------------------------- /src/engines/ranking.rs: -------------------------------------------------------------------------------- 1 | use std::{collections::HashMap, sync::Arc}; 2 | 3 | use crate::{ 4 | config::Config, 5 | urls::{apply_url_replacements, get_url_weight}, 6 | }; 7 | 8 | use super::{ 9 | Answer, AutocompleteResult, Engine, EngineImageResult, EngineImagesResponse, EngineResponse, 10 | EngineSearchResult, FeaturedSnippet, ImagesResponse, Infobox, Response, SearchResult, 11 | }; 12 | 13 | pub fn merge_engine_responses( 14 | config: Arc, 15 | responses: HashMap, 16 | ) -> Response { 17 | let mut search_results: Vec> = Vec::new(); 18 | let mut featured_snippet: Option = None; 19 | let mut answer: Option = None; 20 | let mut infobox: Option = None; 21 | 22 | for (engine, response) in responses { 23 | let engine_config = config.engines.get(engine); 24 | 25 | for (result_index, mut search_result) in response.search_results.into_iter().enumerate() { 26 | // position 1 has a score of 1, position 2 has a score of 0.5, position 3 has a 27 | // score of 0.33, etc. 28 | let base_result_score = 1. / (result_index + 1) as f64; 29 | let result_score = base_result_score * engine_config.weight; 30 | 31 | // apply url config here 32 | search_result.url = apply_url_replacements(&search_result.url, &config.urls); 33 | let url_weight = get_url_weight(&search_result.url, &config.urls); 34 | if url_weight <= 0. { 35 | continue; 36 | } 37 | let result_score = result_score * url_weight; 38 | 39 | if let Some(existing_result) = search_results 40 | .iter_mut() 41 | .find(|r| r.result.url == search_result.url) 42 | { 43 | // if the weight of this engine is higher than every other one then replace the 44 | // title and description 45 | if engine_config.weight 46 | > existing_result 47 | .engines 48 | .iter() 49 | .map(|&other_engine| { 50 | let other_engine_config = config.engines.get(other_engine); 51 | other_engine_config.weight 52 | }) 53 | .max_by(|a, b| a.partial_cmp(b).unwrap()) 54 | .unwrap_or(0.) 55 | { 56 | existing_result.result.title = search_result.title; 57 | existing_result.result.description = search_result.description; 58 | } 59 | 60 | existing_result.engines.insert(engine); 61 | existing_result.score += result_score; 62 | } else { 63 | search_results.push(SearchResult { 64 | result: search_result, 65 | engines: [engine].iter().copied().collect(), 66 | score: result_score, 67 | }); 68 | } 69 | } 70 | 71 | if let Some(mut engine_featured_snippet) = response.featured_snippet { 72 | // if it has a higher weight than the current featured snippet 73 | let featured_snippet_weight = featured_snippet.as_ref().map_or(0., |s| { 74 | let other_engine_config = config.engines.get(s.engine); 75 | other_engine_config.weight 76 | }); 77 | 78 | // url config applies to featured snippets too 79 | engine_featured_snippet.url = 80 | apply_url_replacements(&engine_featured_snippet.url, &config.urls); 81 | let url_weight = get_url_weight(&engine_featured_snippet.url, &config.urls); 82 | if url_weight <= 0. { 83 | continue; 84 | } 85 | let featured_snippet_weight = featured_snippet_weight * url_weight; 86 | 87 | if engine_config.weight > featured_snippet_weight { 88 | featured_snippet = Some(FeaturedSnippet { 89 | url: engine_featured_snippet.url, 90 | title: engine_featured_snippet.title, 91 | description: engine_featured_snippet.description, 92 | engine, 93 | }); 94 | } 95 | } 96 | 97 | if let Some(engine_answer_html) = response.answer_html { 98 | // if it has a higher weight than the current answer 99 | let answer_weight = answer.as_ref().map_or(0., |s| { 100 | let other_engine_config = config.engines.get(s.engine); 101 | other_engine_config.weight 102 | }); 103 | if engine_config.weight > answer_weight { 104 | answer = Some(Answer { 105 | html: engine_answer_html, 106 | engine, 107 | }); 108 | } 109 | } 110 | 111 | if let Some(engine_infobox_html) = response.infobox_html { 112 | // if it has a higher weight than the current infobox 113 | let infobox_weight = infobox.as_ref().map_or(0., |s| { 114 | let other_engine_config = config.engines.get(s.engine); 115 | other_engine_config.weight 116 | }); 117 | if engine_config.weight > infobox_weight { 118 | infobox = Some(Infobox { 119 | html: engine_infobox_html, 120 | engine, 121 | }); 122 | } 123 | } 124 | } 125 | 126 | search_results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap()); 127 | 128 | Response { 129 | search_results, 130 | featured_snippet, 131 | answer, 132 | infobox, 133 | config, 134 | } 135 | } 136 | 137 | pub fn merge_autocomplete_responses( 138 | config: &Config, 139 | responses: HashMap>, 140 | ) -> Vec { 141 | let mut autocomplete_results: Vec = Vec::new(); 142 | 143 | for (engine, response) in responses { 144 | let engine_config = config.engines.get(engine); 145 | 146 | for (result_index, autocomplete_result) in response.into_iter().enumerate() { 147 | // position 1 has a score of 1, position 2 has a score of 0.5, position 3 has a 148 | // score of 0.33, etc. 149 | let base_result_score = 1. / (result_index + 1) as f64; 150 | let result_score = base_result_score * engine_config.weight; 151 | 152 | if let Some(existing_result) = autocomplete_results 153 | .iter_mut() 154 | .find(|r| r.query == autocomplete_result) 155 | { 156 | existing_result.score += result_score; 157 | } else { 158 | autocomplete_results.push(AutocompleteResult { 159 | query: autocomplete_result, 160 | score: result_score, 161 | }); 162 | } 163 | } 164 | } 165 | 166 | autocomplete_results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap()); 167 | 168 | autocomplete_results.into_iter().map(|r| r.query).collect() 169 | } 170 | 171 | pub fn merge_images_responses( 172 | config: Arc, 173 | responses: HashMap, 174 | ) -> ImagesResponse { 175 | let mut image_results: Vec> = Vec::new(); 176 | 177 | for (engine, response) in responses { 178 | let engine_config = config.engines.get(engine); 179 | 180 | for (result_index, image_result) in response.image_results.into_iter().enumerate() { 181 | // position 1 has a score of 1, position 2 has a score of 0.5, position 3 has a 182 | // score of 0.33, etc. 183 | let base_result_score = 1. / (result_index + 1) as f64; 184 | let result_score = base_result_score * engine_config.weight; 185 | 186 | if let Some(existing_result) = image_results 187 | .iter_mut() 188 | .find(|r| r.result.image_url == image_result.image_url) 189 | { 190 | // if the weight of this engine is higher than every other one then replace the 191 | // title and page url 192 | if engine_config.weight 193 | > existing_result 194 | .engines 195 | .iter() 196 | .map(|&other_engine| { 197 | let other_engine_config = config.engines.get(other_engine); 198 | other_engine_config.weight 199 | }) 200 | .max_by(|a, b| a.partial_cmp(b).unwrap()) 201 | .unwrap_or(0.) 202 | { 203 | existing_result.result.title = image_result.title; 204 | existing_result.result.page_url = image_result.page_url; 205 | } 206 | 207 | existing_result.engines.insert(engine); 208 | existing_result.score += result_score; 209 | } else { 210 | image_results.push(SearchResult { 211 | result: image_result, 212 | engines: [engine].iter().copied().collect(), 213 | score: result_score, 214 | }); 215 | } 216 | } 217 | } 218 | 219 | image_results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap()); 220 | 221 | ImagesResponse { 222 | image_results, 223 | config, 224 | } 225 | } 226 | -------------------------------------------------------------------------------- /src/engines/search/google.rs: -------------------------------------------------------------------------------- 1 | use eyre::eyre; 2 | use scraper::{ElementRef, Selector}; 3 | use tracing::warn; 4 | use url::Url; 5 | 6 | use crate::{ 7 | engines::{ 8 | EngineImageResult, EngineImagesResponse, EngineResponse, RequestResponse, SearchQuery, 9 | CLIENT, 10 | }, 11 | parse::{parse_html_response_with_opts, ParseOpts, QueryMethod}, 12 | }; 13 | 14 | pub async fn request(search: &SearchQuery) -> eyre::Result { 15 | let url = Url::parse_with_params( 16 | "https://www.google.com/search", 17 | &[ 18 | ("q", search.query.as_str()), 19 | // nfpr makes it not try to autocorrect 20 | ("nfpr", "1"), 21 | ("filter", "0"), 22 | ("start", "0"), 23 | ], 24 | ) 25 | .unwrap(); 26 | 27 | Ok(CLIENT.get(url).into()) 28 | } 29 | 30 | pub fn parse_response(body: &str) -> eyre::Result { 31 | parse_html_response_with_opts( 32 | body, 33 | ParseOpts::new() 34 | // xpd is weird, some results have it but it's usually used for ads? 35 | // the :first-child filters out the ads though since for ads the first child is always a 36 | // span 37 | .result("[jscontroller=SC7lYd]") 38 | .title("h3") 39 | .href("a[href]") 40 | .description( 41 | "div[data-sncf='2'], div[data-sncf='1,2'], div[style='-webkit-line-clamp:2']", 42 | ) 43 | .featured_snippet("block-component") 44 | .featured_snippet_description(QueryMethod::Manual(Box::new(|el: &ElementRef| { 45 | let mut description = String::new(); 46 | 47 | // role="heading" 48 | if let Some(heading_el) = el 49 | .select(&Selector::parse("div[role='heading']").unwrap()) 50 | .next() 51 | { 52 | description.push_str(&format!("{}\n\n", heading_el.text().collect::())); 53 | } 54 | 55 | if let Some(description_container_el) = el 56 | .select(&Selector::parse("div[data-attrid='wa:/description'] > span:first-child").unwrap()) 57 | .next() 58 | { 59 | description.push_str(&iter_featured_snippet_children(&description_container_el)); 60 | } 61 | else if let Some(description_list_el) = el 62 | .select(&Selector::parse("ul").unwrap()) 63 | .next() 64 | { 65 | // render as bullet points 66 | for li in description_list_el.select(&Selector::parse("li").unwrap()) { 67 | let text = li.text().collect::(); 68 | description.push_str(&format!("• {text}\n")); 69 | } 70 | } 71 | 72 | Ok(description) 73 | }))) 74 | .featured_snippet_title(".g > div[lang] a h3, div[lang] > div[style='position:relative'] a h3") 75 | .featured_snippet_href(QueryMethod::Manual(Box::new(|el: &ElementRef| { 76 | let url = el 77 | .select(&Selector::parse(".g > div[lang] a:has(h3), div[lang] > div[style='position:relative'] a:has(h3)").unwrap()) 78 | .next() 79 | .and_then(|n| n.value().attr("href")) 80 | .unwrap_or_default(); 81 | clean_url(url) 82 | }))), 83 | ) 84 | } 85 | 86 | // Google autocomplete responses sometimes include clickable links that include 87 | // text that we shouldn't show. 88 | // We can filter for these by removing any elements matching 89 | // [data-ved]:not([data-send-open-event]) 90 | fn iter_featured_snippet_children(el: &ElementRef) -> String { 91 | let mut description = String::new(); 92 | recursive_iter_featured_snippet_children(&mut description, el); 93 | description 94 | } 95 | fn recursive_iter_featured_snippet_children(description: &mut String, el: &ElementRef) { 96 | for inner_node in el.children() { 97 | match inner_node.value() { 98 | scraper::Node::Text(t) => { 99 | description.push_str(&t.text); 100 | } 101 | scraper::Node::Element(inner_el) => { 102 | if inner_el.attr("data-ved").is_none() 103 | || inner_el.attr("data-send-open-event").is_some() 104 | { 105 | recursive_iter_featured_snippet_children( 106 | description, 107 | &ElementRef::wrap(inner_node).unwrap(), 108 | ); 109 | } 110 | } 111 | _ => {} 112 | } 113 | } 114 | } 115 | 116 | pub fn request_autocomplete(query: &str) -> wreq::RequestBuilder { 117 | CLIENT.get( 118 | Url::parse_with_params( 119 | "https://suggestqueries.google.com/complete/search", 120 | &[ 121 | ("output", "firefox"), 122 | ("client", "firefox"), 123 | ("hl", "US-en"), 124 | ("q", query), 125 | ], 126 | ) 127 | .unwrap(), 128 | ) 129 | } 130 | 131 | pub fn parse_autocomplete_response(body: &str) -> eyre::Result> { 132 | let res = serde_json::from_str::>(body)?; 133 | Ok(res 134 | .into_iter() 135 | .nth(1) 136 | .unwrap_or_default() 137 | .as_array() 138 | .cloned() 139 | .unwrap_or_default() 140 | .into_iter() 141 | .map(|v| v.as_str().unwrap_or_default().to_string()) 142 | .collect()) 143 | } 144 | 145 | pub fn request_images(query: &str) -> wreq::RequestBuilder { 146 | // ok so google also has a json api for images BUT it gives us less results 147 | CLIENT.get( 148 | Url::parse_with_params( 149 | "https://www.google.com/search", 150 | &[("q", query), ("udm", "2"), ("prmd", "ivsnmbtz")], 151 | ) 152 | .unwrap(), 153 | ) 154 | } 155 | 156 | pub fn parse_images_response(body: &str) -> eyre::Result { 157 | // we can't just scrape the html because it won't give us the image sources, 158 | // so... we have to scrape their internal json 159 | 160 | // iterate through every script until we find something that matches our regex 161 | let internal_json_regex = 162 | regex::Regex::new(r#"(?:\(function\(\)\{google\.jl=\{.+?)var \w=(\{".+?\});"#)?; 163 | let mut internal_json = None; 164 | let dom = scraper::Html::parse_document(body); 165 | for script in dom.select(&Selector::parse("script").unwrap()) { 166 | let script = script.inner_html(); 167 | if let Some(captures) = internal_json_regex.captures(&script).and_then(|c| c.get(1)) { 168 | internal_json = Some(captures.as_str().to_string()); 169 | break; 170 | } 171 | } 172 | 173 | let internal_json = 174 | internal_json.ok_or_else(|| eyre!("couldn't get internal json for google images"))?; 175 | let internal_json: serde_json::Map = 176 | serde_json::from_str(&internal_json)?; 177 | 178 | let mut image_results = Vec::new(); 179 | for element_json in internal_json.values() { 180 | // the internal json uses arrays instead of maps, which makes it kinda hard to 181 | // use and also probably pretty unstable 182 | 183 | let Some(element_json) = element_json 184 | .as_array() 185 | .and_then(|a| a.get(1)) 186 | .and_then(|v| v.as_array()) 187 | else { 188 | continue; 189 | }; 190 | 191 | let Some((image_url, width, height)) = element_json 192 | .get(3) 193 | .and_then(|v| serde_json::from_value(v.clone()).ok()) 194 | else { 195 | warn!("couldn't get image data from google images json"); 196 | continue; 197 | }; 198 | 199 | // this is probably pretty brittle, hopefully google doesn't break it any time 200 | // soon 201 | let Some(page) = element_json 202 | .get(9) 203 | .and_then(|v| v.as_object()) 204 | .and_then(|o| o.get("2003")) 205 | .and_then(|v| v.as_array()) 206 | else { 207 | warn!("couldn't get page data from google images json"); 208 | continue; 209 | }; 210 | let Some(page_url) = page.get(2).and_then(|v| v.as_str()).map(|s| s.to_string()) else { 211 | warn!("couldn't get page url from google images json"); 212 | continue; 213 | }; 214 | let Some(title) = page.get(3).and_then(|v| v.as_str()).map(|s| s.to_string()) else { 215 | warn!("couldn't get page title from google images json"); 216 | continue; 217 | }; 218 | 219 | image_results.push(EngineImageResult { 220 | image_url, 221 | page_url, 222 | title, 223 | width, 224 | height, 225 | }); 226 | } 227 | 228 | Ok(EngineImagesResponse { image_results }) 229 | } 230 | 231 | fn clean_url(url: &str) -> eyre::Result { 232 | if url.starts_with("/url?q=") { 233 | // get the q param 234 | let url = Url::parse(format!("https://www.google.com{url}").as_str())?; 235 | let q = url 236 | .query_pairs() 237 | .find(|(key, _)| key == "q") 238 | .unwrap_or_default() 239 | .1; 240 | Ok(q.to_string()) 241 | } else { 242 | Ok(url.to_string()) 243 | } 244 | } 245 | -------------------------------------------------------------------------------- /src/web/search.rs: -------------------------------------------------------------------------------- 1 | mod all; 2 | mod images; 3 | 4 | use std::{collections::HashMap, net::SocketAddr, str::FromStr}; 5 | 6 | use async_stream::stream; 7 | use axum::{ 8 | body::Body, 9 | extract::{ConnectInfo, Query}, 10 | http::{header, HeaderMap, StatusCode}, 11 | response::IntoResponse, 12 | Extension, Json, 13 | }; 14 | use bytes::Bytes; 15 | use maud::{html, PreEscaped, DOCTYPE}; 16 | 17 | use crate::{ 18 | config::Config, 19 | engines::{ 20 | self, Engine, EngineProgressUpdate, ProgressUpdateData, ResponseForTab, SearchQuery, 21 | SearchTab, 22 | }, 23 | web::head_html, 24 | }; 25 | 26 | fn render_beginning_of_html(search: &SearchQuery) -> String { 27 | let form_html = html! { 28 | form.search-form action="/search" method="get" { 29 | input #search-input type="text" name="q" placeholder="Search" value=(search.query) autofocus onfocus="this.select()" autocomplete="off"; 30 | @if search.tab != SearchTab::default() { 31 | input type="hidden" name="tab" value=(search.tab.to_string()); 32 | } 33 | input type="submit" value="Search"; 34 | } 35 | @if search.config.image_search.enabled { 36 | div.search-tabs { 37 | @if search.tab == SearchTab::All { span.search-tab.selected { "All" } } 38 | @else { a.search-tab href={ "?q=" (search.query) } { "All" } } 39 | @if search.tab == SearchTab::Images { span.search-tab.selected { "Images" } } 40 | @else { a.search-tab href={ "?q=" (search.query) "&tab=images" } { "Images" } } 41 | } 42 | } 43 | }; 44 | 45 | // we don't close the elements here because we do chunked responses 46 | html! { 47 | (DOCTYPE) 48 | html lang="en"; 49 | {(head_html(Some(&search.query), &search.config))} 50 | body; 51 | div.main-container.{"search-" (search.tab.to_string())}; 52 | main; 53 | (form_html) 54 | div.progress-updates; 55 | } 56 | .into_string() 57 | } 58 | 59 | fn render_end_of_html() -> String { 60 | r"
".to_string() 61 | } 62 | 63 | fn render_results_for_tab(response: ResponseForTab) -> PreEscaped { 64 | match response { 65 | ResponseForTab::All(r) => all::render_results(r), 66 | ResponseForTab::Images(r) => images::render_results(r), 67 | } 68 | } 69 | 70 | fn render_engine_progress_update( 71 | engine: Engine, 72 | progress_update: &EngineProgressUpdate, 73 | time_ms: u64, 74 | ) -> String { 75 | let message = match progress_update { 76 | EngineProgressUpdate::Requesting => "requesting".to_string(), 77 | EngineProgressUpdate::Downloading => "downloading".to_string(), 78 | EngineProgressUpdate::Parsing => "parsing".to_string(), 79 | EngineProgressUpdate::Done => html! { span.progress-update-done { "done" } }.into_string(), 80 | EngineProgressUpdate::Error(msg) => { 81 | html! { span.progress-update-error { (msg) } }.into_string() 82 | } 83 | }; 84 | 85 | html! { 86 | span.progress-update-time { 87 | (format!("{time_ms:>4}")) 88 | "ms" 89 | } 90 | " " 91 | (engine) 92 | " " 93 | (PreEscaped(message)) 94 | } 95 | .into_string() 96 | } 97 | 98 | pub fn render_engine_list(engines: &[engines::Engine], config: &Config) -> PreEscaped { 99 | let mut html = String::new(); 100 | for (i, engine) in engines.iter().enumerate() { 101 | if config.ui.show_engine_list_separator && i > 0 { 102 | html.push_str(" · "); 103 | } 104 | let raw_engine_id = &engine.id(); 105 | let engine_id = if config.ui.show_engine_list_separator { 106 | raw_engine_id.replace('_', " ") 107 | } else { 108 | raw_engine_id.to_string() 109 | }; 110 | html.push_str(&html! { span.engine-list-item { (engine_id) } }.into_string()) 111 | } 112 | html! { 113 | div.engine-list { 114 | (PreEscaped(html)) 115 | } 116 | } 117 | } 118 | 119 | pub async fn get( 120 | Query(params): Query>, 121 | Extension(config): Extension, 122 | headers: HeaderMap, 123 | ConnectInfo(addr): ConnectInfo, 124 | ) -> axum::response::Response { 125 | let query = params 126 | .get("q") 127 | .cloned() 128 | .unwrap_or_default() 129 | .trim() 130 | .replace('\n', " "); 131 | if query.is_empty() { 132 | // redirect to index 133 | return ( 134 | StatusCode::FOUND, 135 | [ 136 | (header::LOCATION, "/"), 137 | (header::CONTENT_TYPE, "text/html; charset=utf-8"), 138 | ], 139 | Body::from("No query provided, click here to go back to index"), 140 | ) 141 | .into_response(); 142 | } 143 | 144 | let search_tab = params 145 | .get("tab") 146 | .and_then(|t| SearchTab::from_str(t).ok()) 147 | .unwrap_or_default(); 148 | 149 | let query = SearchQuery { 150 | query, 151 | tab: search_tab, 152 | request_headers: headers 153 | .clone() 154 | .into_iter() 155 | .map(|(k, v)| { 156 | ( 157 | k.map(|k| k.to_string()).unwrap_or_default(), 158 | v.to_str().unwrap_or_default().to_string(), 159 | ) 160 | }) 161 | .collect(), 162 | ip: headers 163 | // this could be exploited under some setups, but the ip is only used for the 164 | // "what is my ip" answer so it doesn't really matter 165 | .get("x-forwarded-for") 166 | .map_or_else( 167 | || addr.ip().to_string(), 168 | |ip| ip.to_str().unwrap_or_default().to_string(), 169 | ), 170 | config: config.clone().into(), 171 | }; 172 | 173 | let trying_to_use_api = 174 | query.request_headers.get("accept") == Some(&"application/json".to_string()); 175 | if trying_to_use_api { 176 | if !config.api { 177 | return (StatusCode::FORBIDDEN, "API access is disabled").into_response(); 178 | } 179 | 180 | let (progress_tx, mut progress_rx) = tokio::sync::mpsc::unbounded_channel(); 181 | let search_future = tokio::spawn(async move { engines::search(&query, progress_tx).await }); 182 | if let Err(e) = search_future.await { 183 | return (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()).into_response(); 184 | } 185 | 186 | let mut results = Vec::new(); 187 | while let Some(progress_update) = progress_rx.recv().await { 188 | if let ProgressUpdateData::Response(r) = progress_update.data { 189 | results.push(r); 190 | } 191 | } 192 | 193 | return Json(results).into_response(); 194 | } 195 | 196 | let s = stream! { 197 | type R = Result; 198 | 199 | // the html is sent in three chunks (technically more if you count progress updates): 200 | // 1) the beginning of the html, including the search bar 201 | // 1.5) the progress updates 202 | // 2) the results 203 | // 3) the post-search infobox (usually not sent) + the end of the html 204 | 205 | let first_part = render_beginning_of_html(&query); 206 | // second part is in the loop 207 | let mut third_part = String::new(); 208 | 209 | yield R::Ok(Bytes::from(first_part)); 210 | 211 | let (progress_tx, mut progress_rx) = tokio::sync::mpsc::unbounded_channel(); 212 | 213 | let search_future = tokio::spawn(async move { engines::search(&query, progress_tx).await }); 214 | 215 | while let Some(progress_update) = progress_rx.recv().await { 216 | match progress_update.data { 217 | ProgressUpdateData::Engine { engine, update } => { 218 | let progress_html = format!( 219 | r#"

{}

"#, 220 | render_engine_progress_update(engine, &update, progress_update.time_ms) 221 | ); 222 | yield R::Ok(Bytes::from(progress_html)); 223 | }, 224 | ProgressUpdateData::Response(results) => { 225 | let mut second_part = String::new(); 226 | 227 | second_part.push_str(""); // close progress-updates 228 | #[allow(clippy::literal_string_with_formatting_args)] 229 | second_part.push_str(""); 230 | second_part.push_str(&render_results_for_tab(results).into_string()); 231 | yield Ok(Bytes::from(second_part)); 232 | }, 233 | ProgressUpdateData::PostSearchInfobox(infobox) => { 234 | third_part.push_str(&all::render_infobox(&infobox, &config).into_string()); 235 | } 236 | } 237 | } 238 | 239 | if let Err(e) = search_future.await? { 240 | let error_html = html! { 241 | h1 { 242 | "Error: " 243 | (e) 244 | } 245 | }.into_string(); 246 | yield R::Ok(Bytes::from(error_html)); 247 | return; 248 | }; 249 | 250 | third_part.push_str(&render_end_of_html()); 251 | 252 | yield Ok(Bytes::from(third_part)); 253 | 254 | }; 255 | 256 | let stream = Body::from_stream(s); 257 | 258 | ( 259 | [ 260 | (header::CONTENT_TYPE, "text/html; charset=utf-8"), 261 | (header::TRANSFER_ENCODING, "chunked"), 262 | ], 263 | stream, 264 | ) 265 | .into_response() 266 | } 267 | -------------------------------------------------------------------------------- /src/web/assets/scripts/colorpicker.js: -------------------------------------------------------------------------------- 1 | // some guy on stackoverflow wrote a bunch of codegolfed color space conversion functions so i 2 | // stole them for this (except the cmyk functions, those were stolen from other places) 3 | 4 | // https://stackoverflow.com/a/54116681 5 | function hsvToHsl(h, s, v) { 6 | const l = v - (v * s) / 2; 7 | const m = Math.min(l, 1 - l); 8 | return [h, m ? (v - l) / m : 0, l]; 9 | } 10 | function hslToHsv(h, s, l) { 11 | let v = s * Math.min(l, 1 - l) + l; 12 | return [h, v ? 2 - (2 * l) / v : 0, v]; 13 | } 14 | 15 | // https://stackoverflow.com/a/54024653 16 | function hsvToRgb(h, s, v) { 17 | let f = (n, k = (n + h / 60) % 6) => 18 | v - v * s * Math.max(Math.min(k, 4 - k, 1), 0); 19 | return [f(5), f(3), f(1)]; 20 | } 21 | // https://stackoverflow.com/a/54070620 22 | function rgbToHsv(r, g, b) { 23 | let v = Math.max(r, g, b), 24 | c = v - Math.min(r, g, b); 25 | let h = 26 | c && (v == r ? (g - b) / c : v == g ? 2 + (b - r) / c : 4 + (r - g) / c); 27 | return [60 * (h < 0 ? h + 6 : h), v && c / v, v]; 28 | } 29 | // https://stackoverflow.com/a/54071699 30 | function rgbToHsl(r, g, b) { 31 | let v = Math.max(r, g, b), 32 | c = v - Math.min(r, g, b), 33 | f = 1 - Math.abs(v + v - c - 1); 34 | let h = 35 | c && (v == r ? (g - b) / c : v == g ? 2 + (b - r) / c : 4 + (r - g) / c); 36 | return [60 * (h < 0 ? h + 6 : h), f ? c / f : 0, (v + v - c) / 2]; 37 | } 38 | 39 | // https://www.codeproject.com/Articles/4488/XCmyk-CMYK-to-RGB-Calculator-with-source-code 40 | function rgbToCmyk(r, g, b) { 41 | const k = 1 - Math.max(r, g, b); 42 | if (k === 1) return [0, 0, 0, 1]; 43 | const c = (1 - r - k) / (1 - k); 44 | const m = (1 - g - k) / (1 - k); 45 | const y = (1 - b - k) / (1 - k); 46 | return [c, m, y, k]; 47 | } 48 | // https://stackoverflow.com/a/37643472 49 | function cmykToRgb(c, m, y, k) { 50 | const r = (1 - c) * (1 - k); 51 | const g = (1 - m) * (1 - k); 52 | const b = (1 - y) * (1 - k); 53 | return [r, g, b]; 54 | } 55 | 56 | // used for making it so an input isn't modified if we just typed in it 57 | let activeInput = null; 58 | document.addEventListener("keydown", () => { 59 | activeInput = document.activeElement; 60 | }); 61 | document.addEventListener("focusout", () => { 62 | activeInput = null; 63 | 64 | // in case they set an input to an invalid value 65 | updateColorPreview(); 66 | }); 67 | 68 | const colorPickerEl = document.getElementsByClassName("answer-colorpicker")[0]; 69 | 70 | const canvasEl = colorPickerEl.getElementsByClassName( 71 | "answer-colorpicker-canvas" 72 | )[0]; 73 | const canvasHueSvgEl = canvasEl.getElementsByClassName( 74 | "answer-colorpicker-canvas-hue-svg" 75 | )[0]; 76 | const pickerEl = colorPickerEl.getElementsByClassName( 77 | "answer-colorpicker-picker" 78 | )[0]; 79 | const previewEl = colorPickerEl.getElementsByClassName( 80 | "answer-colorpicker-preview" 81 | )[0]; 82 | const sliderEl = colorPickerEl.getElementsByClassName( 83 | "answer-colorpicker-slider" 84 | )[0]; 85 | const huepickerEl = colorPickerEl.getElementsByClassName( 86 | "answer-colorpicker-huepicker" 87 | )[0]; 88 | 89 | const hexInputEl = document.getElementById("answer-colorpicker-hex-input"); 90 | const rgbInputEl = document.getElementById("answer-colorpicker-rgb-input"); 91 | const cmykInputEl = document.getElementById("answer-colorpicker-cmyk-input"); 92 | const hsvInputEl = document.getElementById("answer-colorpicker-hsv-input"); 93 | const hslInputEl = document.getElementById("answer-colorpicker-hsl-input"); 94 | 95 | let hsv = parseHsv(hsvInputEl.value); 96 | let hsl = parseHsl(hslInputEl.value); 97 | let rgb = parseRgb(rgbInputEl.value); 98 | let cmyk = parseCmyk(cmykInputEl.value); 99 | 100 | function clamp(n, min, max) { 101 | return Math.max(min, Math.min(max, n)); 102 | } 103 | 104 | function setHsv(h, s, v) { 105 | h = clamp(h, 0, 360); 106 | s = clamp(s, 0, 1); 107 | v = clamp(v, 0, 1); 108 | 109 | hsv = [h, s, v]; 110 | hsl = hsvToHsl(...hsv); 111 | rgb = hsvToRgb(...hsv); 112 | cmyk = rgbToCmyk(...rgb); 113 | updateColorPreview(); 114 | } 115 | function setHsl(h, s, l) { 116 | h = clamp(h, 0, 360); 117 | s = clamp(s, 0, 1); 118 | l = clamp(l, 0, 1); 119 | 120 | hsl = [h, s, l]; 121 | hsv = hslToHsv(...hsl); 122 | rgb = hsvToRgb(...hsv); 123 | cmyk = rgbToCmyk(...rgb); 124 | updateColorPreview(); 125 | } 126 | function setRgb(r, g, b) { 127 | r = clamp(r, 0, 1); 128 | g = clamp(g, 0, 1); 129 | b = clamp(b, 0, 1); 130 | 131 | rgb = [r, g, b]; 132 | hsl = rgbToHsl(...rgb); 133 | hsv = hslToHsv(...hsl); 134 | cmyk = rgbToCmyk(...rgb); 135 | updateColorPreview(); 136 | } 137 | function setCmyk(c, m, y, k) { 138 | c = clamp(c, 0, 1); 139 | m = clamp(m, 0, 1); 140 | y = clamp(y, 0, 1); 141 | k = clamp(k, 0, 1); 142 | 143 | cmyk = [c, m, y, k]; 144 | rgb = cmykToRgb(...cmyk); 145 | hsl = rgbToHsl(...rgb); 146 | hsv = rgbToHsv(...rgb); 147 | updateColorPreview(); 148 | } 149 | 150 | let mouseInCanvas = false; 151 | function canvasMouseDown(clientX, clientY) { 152 | activeInput = null; 153 | updatePicker(clientX, clientY); 154 | mouseInCanvas = true; 155 | } 156 | function canvasMouseMove(clientX, clientY) { 157 | activeInput; 158 | if (mouseInCanvas) updatePicker(clientX, clientY); 159 | } 160 | function canvasMouseUp() { 161 | mouseInCanvas = false; 162 | } 163 | canvasEl.addEventListener("mousedown", (e) => { 164 | canvasMouseDown(e.clientX, e.clientY); 165 | }); 166 | canvasEl.addEventListener("touchstart", (e) => { 167 | canvasMouseDown(e.touches[0].clientX, e.touches[0].clientY); 168 | }); 169 | document.addEventListener("mouseup", () => { 170 | canvasMouseUp(); 171 | }); 172 | document.addEventListener("touchend", () => { 173 | canvasMouseUp(); 174 | }); 175 | document.addEventListener("mousemove", (e) => { 176 | canvasMouseMove(e.clientX, e.clientY); 177 | }); 178 | document.addEventListener("touchmove", (e) => { 179 | canvasMouseMove(e.touches[0].clientX, e.touches[0].clientY); 180 | }); 181 | 182 | let mouseInSlider = false; 183 | function sliderMouseDown(clientX) { 184 | updateHuePicker(clientX); 185 | mouseInSlider = true; 186 | } 187 | function sliderMouseMove(clientX) { 188 | if (mouseInSlider) updateHuePicker(clientX); 189 | } 190 | function sliderMouseUp() { 191 | mouseInSlider = false; 192 | } 193 | sliderEl.addEventListener("mousedown", (e) => { 194 | sliderMouseDown(e.clientX); 195 | }); 196 | sliderEl.addEventListener("touchstart", (e) => { 197 | sliderMouseDown(e.touches[0].clientX); 198 | }); 199 | huepickerEl.addEventListener("mousedown", (e) => { 200 | sliderMouseDown(e.clientX); 201 | }); 202 | huepickerEl.addEventListener("touchstart", (e) => { 203 | sliderMouseDown(e.touches[0].clientX); 204 | }); 205 | document.addEventListener("mouseup", () => { 206 | sliderMouseUp(); 207 | }); 208 | document.addEventListener("touchend", () => { 209 | sliderMouseUp(); 210 | }); 211 | document.addEventListener("mousemove", (e) => { 212 | sliderMouseMove(e.clientX); 213 | }); 214 | document.addEventListener("touchmove", (e) => { 215 | sliderMouseMove(e.touches[0].clientX); 216 | }); 217 | 218 | function updatePicker(clientX, clientY) { 219 | const rect = canvasEl.getBoundingClientRect(); 220 | let x = clientX - rect.left; 221 | let y = clientY - rect.top; 222 | if (x < 0) x = 0; 223 | if (y < 0) y = 0; 224 | if (x > rect.width) x = rect.width; 225 | if (y > rect.height) y = rect.height; 226 | 227 | pickerEl.style.left = `${(x / rect.width) * 100}%`; 228 | pickerEl.style.top = `${(y / rect.height) * 100}%`; 229 | 230 | const hue = hsv[0]; 231 | setHsv(hue, x / rect.width, 1 - y / rect.height); 232 | } 233 | 234 | function updateHuePicker(clientX) { 235 | const rect = sliderEl.getBoundingClientRect(); 236 | let x = clientX - rect.left; 237 | if (x < 0) x = 0; 238 | if (x > rect.width) x = rect.width; 239 | 240 | huepickerEl.style.left = `${(x / rect.width) * 100}%`; 241 | 242 | const hue = (x / rect.width) * 360; 243 | setHsv(hue, hsv[1], hsv[2]); 244 | } 245 | 246 | function updateColorPreview() { 247 | const [r, g, b] = rgb; 248 | const [hue, saturation, value] = hsv; 249 | 250 | const color = `rgb(${r * 255}, ${g * 255}, ${b * 255})`; 251 | pickerEl.style.backgroundColor = color; 252 | previewEl.style.backgroundColor = color; 253 | 254 | const hueColor = `hsl(${hue}, 100%, 50%)`; 255 | huepickerEl.style.backgroundColor = hueColor; 256 | canvasHueSvgEl.style.setProperty("stop-color", hueColor); 257 | 258 | pickerEl.style.left = `${saturation * 100}%`; 259 | pickerEl.style.top = `${(1 - value) * 100}%`; 260 | 261 | if (activeInput !== hexInputEl) { 262 | hexInputEl.value = 263 | "#" + 264 | rgb 265 | .map((c) => 266 | Math.round(c * 255) 267 | .toString(16) 268 | .padStart(2, "0") 269 | ) 270 | .join(""); 271 | } 272 | if (activeInput !== rgbInputEl) { 273 | rgbInputEl.value = rgb.map((c) => Math.round(c * 255)).join(", "); 274 | } 275 | if (activeInput !== cmykInputEl) { 276 | const cmykPercent = cmyk.map((c) => Math.round(c * 100)); 277 | cmykInputEl.value = `${cmykPercent[0]}%, ${cmykPercent[1]}%, ${cmykPercent[2]}%, ${cmykPercent[3]}%`; 278 | } 279 | if (activeInput !== hsvInputEl) { 280 | const hAngle = Math.round(hsv[0]); 281 | hsvInputEl.value = `${hAngle}°, ${Math.round(hsv[1] * 100)}%, ${Math.round( 282 | hsv[2] * 100 283 | )}%`; 284 | } 285 | if (activeInput !== hslInputEl) { 286 | hslInputEl.value = `${Math.round(hsl[0])}°, ${Math.round( 287 | hsl[1] * 100 288 | )}%, ${Math.round(hsl[2] * 100)}%`; 289 | } 290 | } 291 | 292 | function parseHex(value) { 293 | value = hexInputEl.value.replace("#", ""); 294 | if (value.length === 6) { 295 | const r = parseInt(value.slice(0, 2), 16) / 255; 296 | const g = parseInt(value.slice(2, 4), 16) / 255; 297 | const b = parseInt(value.slice(4, 6), 16) / 255; 298 | return [r, g, b]; 299 | } else if (value.length === 3) { 300 | const r = parseInt(value[0] + value[0], 16) / 255; 301 | const g = parseInt(value[1] + value[1], 16) / 255; 302 | const b = parseInt(value[2] + value[2], 16) / 255; 303 | return [r, g, b]; 304 | } 305 | } 306 | function setFromHexInput() { 307 | setRgb(...parseHex(hexInputEl.value)); 308 | } 309 | hexInputEl.addEventListener("input", setFromHexInput); 310 | 311 | function parseRgb(value) { 312 | return value.split(",").map((c) => parseInt(c) / 255); 313 | } 314 | function setFromRgbInput() { 315 | setRgb(...parseRgb(rgbInputEl.value)); 316 | } 317 | rgbInputEl.addEventListener("input", setFromRgbInput); 318 | 319 | function parseCmyk(value) { 320 | return value.split(",").map((c) => parseInt(c) / 100); 321 | } 322 | function setFromCmykInput() { 323 | setCmyk(...parseCmyk(cmykInputEl.value)); 324 | } 325 | cmykInputEl.addEventListener("input", setFromCmykInput); 326 | 327 | function parseHsv(value) { 328 | value = hsvInputEl.value.split(",").map((c) => parseInt(c)); 329 | value[1] /= 100; 330 | value[2] /= 100; 331 | return value; 332 | } 333 | function setFromHsvInput() { 334 | setHsv(...parseHsv(hsvInputEl.value)); 335 | } 336 | hsvInputEl.addEventListener("input", setFromHsvInput); 337 | 338 | function parseHsl(value) { 339 | value = hslInputEl.value.split(",").map((c) => parseInt(c)); 340 | value[1] /= 100; 341 | value[2] /= 100; 342 | return value; 343 | } 344 | function setFromHslInput() { 345 | setHsl(...parseHsl(hslInputEl.value)); 346 | } 347 | hslInputEl.addEventListener("input", setFromHslInput); 348 | 349 | updateColorPreview(); 350 | -------------------------------------------------------------------------------- /src/web/assets/style.css: -------------------------------------------------------------------------------- 1 | :root { 2 | /* body background */ 3 | --bg-1: #0b0e14; 4 | /* background of the content */ 5 | --bg-2: #0d1017; 6 | /* input suggestions background */ 7 | --bg-3: #0f131a; 8 | /* mostly borders */ 9 | --bg-4: #234; 10 | 11 | /* main text color */ 12 | --fg-1: #bfbdb6; 13 | /* search result description */ 14 | --fg-2: #bba; 15 | --fg-3: #998; 16 | 17 | /* focus outline */ 18 | --accent: #e6b450; 19 | 20 | --link: #29e; 21 | --link-visited: #92e; 22 | 23 | /* green, success */ 24 | --positive: #7fd962; 25 | /* red, error */ 26 | --negative: #d95757; 27 | 28 | --syntax-string: #aad94c; 29 | --syntax-special: #e6b673; 30 | --syntax-constant: #d2a6ff; 31 | --syntax-comment: #acb6bf8c; 32 | --syntax-func: #ffb454; 33 | } 34 | 35 | ::selection { 36 | background-color: var(--accent); 37 | color: var(--bg-3); 38 | } 39 | 40 | html { 41 | height: 100%; 42 | } 43 | body { 44 | font-family: monospace; 45 | background-color: var(--bg-1); 46 | color: var(--fg-1); 47 | margin: 0; 48 | line-height: 1.2; 49 | height: 100%; 50 | } 51 | 52 | .settings-link, 53 | .settings-link:visited { 54 | position: absolute; 55 | top: 1em; 56 | right: 1em; 57 | color: var(--link); 58 | } 59 | .version-info { 60 | position: absolute; 61 | bottom: 1em; 62 | right: 1em; 63 | } 64 | 65 | .main-container { 66 | /* enough space for the infobox */ 67 | max-width: 73.5rem; 68 | margin: 0 auto; 69 | word-break: break-word; 70 | } 71 | main { 72 | max-width: 40rem; 73 | /* margin: 0 0 0 10rem; */ 74 | padding: 1rem 0.5rem; 75 | background-color: var(--bg-2); 76 | min-height: 100%; 77 | } 78 | .search-images > main { 79 | /* image search uses 100% width */ 80 | max-width: 100%; 81 | } 82 | .main-container.search-images { 83 | max-width: none; 84 | } 85 | @media screen and (max-width: 74rem) { 86 | /* small screens */ 87 | .main-container { 88 | margin: 0 auto; 89 | max-width: 40rem; 90 | } 91 | } 92 | input, 93 | textarea, 94 | select { 95 | font-family: monospace; 96 | background-color: var(--bg-2); 97 | color: var(--fg-1); 98 | border: 1px solid var(--bg-4); 99 | font-size: inherit; 100 | padding: 0.25rem; 101 | } 102 | input:focus-visible { 103 | outline: none; 104 | border-color: var(--accent); 105 | } 106 | :focus-visible { 107 | outline: 1px solid var(--accent); 108 | } 109 | input[type="submit"] { 110 | cursor: pointer; 111 | } 112 | a { 113 | color: var(--link); 114 | text-decoration: none; 115 | } 116 | a:visited { 117 | color: var(--link-visited); 118 | } 119 | pre { 120 | white-space: pre-wrap; 121 | } 122 | blockquote { 123 | margin: 0; 124 | padding-left: 0.5em; 125 | border-left: 0.25em solid var(--bg-4); 126 | } 127 | 128 | /* index page */ 129 | .main-container.index-page { 130 | display: flex; 131 | flex-direction: column; 132 | min-height: 100%; 133 | height: 100%; 134 | justify-content: center; 135 | margin: 0 auto; 136 | padding: 0 0.5em; 137 | text-align: center; 138 | max-width: 30em; 139 | } 140 | h1 { 141 | margin-top: 0; 142 | } 143 | 144 | /* settings page */ 145 | .settings-page .back-to-index-button { 146 | bottom: 0.5em; 147 | position: relative; 148 | color: var(--link); 149 | } 150 | .settings-form select { 151 | display: block; 152 | } 153 | #save-settings-button { 154 | margin-top: 1em; 155 | display: block; 156 | } 157 | #custom-css { 158 | tab-size: 2; 159 | width: calc(100% - 0.5rem); 160 | height: 20rem; 161 | border: 0; 162 | outline: 1px solid var(--bg-4); 163 | } 164 | 165 | /* header */ 166 | .search-form { 167 | margin-bottom: 1rem; 168 | display: flex; 169 | gap: 0.5rem; 170 | } 171 | #search-input { 172 | flex: 1; 173 | } 174 | .main-container:not(.index-page) #search-input { 175 | max-width: 30em; 176 | } 177 | #search-input-suggestions { 178 | position: absolute; 179 | text-align: left; 180 | margin-top: calc(1.9em + 1px); 181 | background: var(--bg-3); 182 | padding: 0.1em 0 0.3em 0; 183 | border: 1px solid var(--bg-4); 184 | border-top: transparent; 185 | z-index: 10; 186 | } 187 | .search-input-suggestion { 188 | cursor: pointer; 189 | padding: 0.3em 0.3em; 190 | white-space: nowrap; 191 | } 192 | .search-input-suggestion.focused, 193 | .search-input-suggestion:hover { 194 | background: var(--bg-4); 195 | } 196 | 197 | /* search tabs (like images, if enabled) */ 198 | .search-tabs { 199 | display: flex; 200 | gap: 0.5rem; 201 | margin-bottom: 0.5rem; 202 | margin-top: -0.5rem; 203 | } 204 | .search-tab { 205 | border: 1px solid var(--bg-4); 206 | padding: 0.25rem; 207 | } 208 | a.search-tab { 209 | color: var(--link); 210 | } 211 | 212 | /* search result */ 213 | .search-result { 214 | padding-top: 1rem; 215 | border-top: 1px solid var(--bg-4); 216 | font-size: 1rem; 217 | } 218 | .search-result-anchor { 219 | display: block; 220 | word-break: break-all; 221 | } 222 | .search-result-url { 223 | margin: 0; 224 | font-size: 0.8rem; 225 | color: var(--fg-3); 226 | } 227 | .search-result-title { 228 | margin: 0; 229 | font-size: 1rem; 230 | } 231 | .search-result-description { 232 | margin: 0; 233 | font-size: 0.8em; 234 | color: var(--fg-2); 235 | } 236 | 237 | /* engine list */ 238 | .engine-list { 239 | opacity: 0.5; 240 | justify-content: end; 241 | display: flex; 242 | gap: 0.5em; 243 | font-size: 0.8rem; 244 | } 245 | 246 | /* featured snippet */ 247 | .featured-snippet { 248 | margin-bottom: 1rem; 249 | border: 1px solid var(--bg-4); 250 | padding: 0.5rem; 251 | font-size: 1.2rem; 252 | } 253 | .featured-snippet .search-result-description { 254 | margin-bottom: 1rem; 255 | white-space: pre-wrap; 256 | } 257 | 258 | /* progress update */ 259 | .progress-updates { 260 | margin-bottom: 1rem; 261 | border: 1px solid var(--bg-4); 262 | padding: 0.5rem; 263 | min-height: 5em; 264 | } 265 | .progress-update { 266 | margin: 0; 267 | white-space: pre-wrap; 268 | } 269 | .progress-update-time { 270 | opacity: 0.5; 271 | } 272 | .progress-update-done { 273 | color: var(--positive); 274 | font-weight: bold; 275 | } 276 | .progress-update-error { 277 | color: var(--negative); 278 | font-weight: bold; 279 | } 280 | 281 | /* answer */ 282 | .answer { 283 | margin-bottom: 1rem; 284 | border: 1px solid var(--bg-4); 285 | padding: 0.5rem; 286 | word-break: break-word; 287 | } 288 | .answer h3 { 289 | margin: 0; 290 | font-weight: normal; 291 | font-size: 1.2rem; 292 | } 293 | 294 | /* styles that are somewhat answer-specific but get reused across other styles sometimes */ 295 | .answer-query { 296 | margin: 0; 297 | opacity: 0.5; 298 | } 299 | .answer-comment { 300 | color: var(--syntax-comment); 301 | font-weight: normal; 302 | } 303 | 304 | /* styles for specific answers */ 305 | .answer-calc-constant { 306 | color: var(--syntax-constant); 307 | white-space: pre-wrap; 308 | } 309 | .answer-calc-string { 310 | color: var(--syntax-string); 311 | } 312 | .answer-calc-special { 313 | color: var(--syntax-special); 314 | } 315 | .answer-calc-func { 316 | color: var(--syntax-func); 317 | } 318 | 319 | .answer-dictionary-word, 320 | .answer-thesaurus-word { 321 | margin-top: 0; 322 | } 323 | .answer-dictionary-part-of-speech { 324 | font-style: italic; 325 | opacity: 0.8; 326 | } 327 | .answer-dictionary-example { 328 | margin-bottom: 0.5em; 329 | } 330 | 331 | .answer-thesaurus-item:not(:last-child) { 332 | border-bottom: 1px solid var(--bg-4); 333 | margin-bottom: 1rem; 334 | padding-bottom: 1rem; 335 | } 336 | .answer-thesaurus-word-description { 337 | font-style: italic; 338 | opacity: 0.8; 339 | } 340 | .answer-thesaurus-part-of-speech { 341 | font-weight: bold; 342 | } 343 | .answer-thesaurus-as-in { 344 | font-style: italic; 345 | } 346 | h3.answer-thesaurus-category-title { 347 | margin-top: 0.5rem; 348 | } 349 | .answer-thesaurus-strongest { 350 | opacity: 1; 351 | } 352 | .answer-thesaurus-strong { 353 | opacity: 0.8; 354 | } 355 | .answer-thesaurus-weak { 356 | opacity: 0.6; 357 | } 358 | .answer-thesaurus-list { 359 | margin: 0; 360 | padding: 0; 361 | display: flex; 362 | flex-wrap: wrap; 363 | gap: 0.8em; 364 | list-style-type: none; 365 | } 366 | .answer-thesaurus-list a { 367 | text-decoration: underline; 368 | } 369 | .answer-notepad { 370 | width: calc(100% - 4px); 371 | height: fit-content; 372 | overflow-y: show; 373 | background-color: transparent; 374 | color: white; 375 | border: none; 376 | outline: none; 377 | min-height: 4em; 378 | font-size: 12px; 379 | resize: none; 380 | } 381 | 382 | .answer-colorpicker-preview-container { 383 | display: flex; 384 | height: 228px; 385 | } 386 | .answer-colorpicker-preview { 387 | width: 204px; 388 | max-width: 33%; 389 | } 390 | .answer-colorpicker-picker-container { 391 | position: absolute; 392 | pointer-events: none; 393 | width: 100%; 394 | height: 100%; 395 | } 396 | .answer-colorpicker-picker, 397 | .answer-colorpicker-huepicker { 398 | position: absolute; 399 | width: 1rem; 400 | height: 1rem; 401 | transform: translate(-0.5rem, -0.5rem); 402 | border-radius: 50%; 403 | border: 2px solid #fff; 404 | 405 | touch-action: none; 406 | } 407 | .answer-colorpicker-canvas-container { 408 | flex: 1; 409 | position: relative; 410 | } 411 | .answer-colorpicker-canvas { 412 | height: 100%; 413 | width: 100%; 414 | 415 | touch-action: none; 416 | } 417 | .answer-colorpicker-slider-container { 418 | margin: 1rem; 419 | position: relative; 420 | height: 1rem; 421 | 422 | touch-action: none; 423 | } 424 | .answer-colorpicker-slider { 425 | height: 100%; 426 | width: 100%; 427 | } 428 | .answer-colorpicker-huepicker { 429 | transform: translate(-0.5rem, -50%); 430 | top: 50%; 431 | } 432 | .answer-colorpicker label { 433 | display: block; 434 | width: fit-content; 435 | } 436 | .answer-colorpicker-hex-input-container { 437 | text-align: center; 438 | margin-bottom: 0.5rem; 439 | } 440 | .answer-colorpicker-hex-input-container label { 441 | margin: 0 auto; 442 | } 443 | #answer-colorpicker-hex-input { 444 | width: 100%; 445 | text-align: center; 446 | } 447 | .answer-colorpicker-other-inputs { 448 | display: flex; 449 | gap: 0.5rem; 450 | } 451 | .answer-colorpicker-input-container { 452 | display: flex; 453 | } 454 | .answer-colorpicker-other-inputs input { 455 | width: 100%; 456 | } 457 | 458 | /* infobox */ 459 | .infobox { 460 | margin-bottom: 1rem; 461 | border: 1px solid var(--bg-4); 462 | padding: 0.5rem; 463 | position: absolute; 464 | top: 3.5rem; 465 | max-width: 30rem; 466 | margin-left: 42rem; 467 | } 468 | @media screen and (max-width: 74rem) { 469 | /* small screens */ 470 | .infobox { 471 | position: static; 472 | margin: 0; 473 | max-width: unset; 474 | margin-bottom: 1rem; 475 | } 476 | 477 | .postsearch-infobox { 478 | /* displaying these properly is too hard so don't */ 479 | display: none; 480 | } 481 | } 482 | .infobox h2 { 483 | margin-top: 0; 484 | margin-bottom: 0.5em; 485 | } 486 | .infobox p { 487 | margin: 0; 488 | } 489 | .infobox pre:not(.infobox-github-readme) { 490 | border: 1px solid var(--bg-4); 491 | padding: 0.5rem; 492 | display: block; 493 | } 494 | .infobox pre > code { 495 | font-weight: normal; 496 | } 497 | .infobox code { 498 | font-weight: bold; 499 | } 500 | .infobox img { 501 | max-width: 100%; 502 | } 503 | .infobox-docs_rs-version { 504 | opacity: 0.5; 505 | font-weight: normal; 506 | font-size: 0.8em; 507 | } 508 | .infobox-github-readme .markdown-alert { 509 | padding-left: 0.5em; 510 | border-left: 0.25em solid var(--bg-4); 511 | } 512 | .infobox-github-readme .markdown-alert-title { 513 | font-weight: bold; 514 | } 515 | .postsearch-infobox p { 516 | margin-bottom: 1em; 517 | } 518 | .infobox-minecraft_wiki-article > .notaninfobox { 519 | display: none !important; 520 | } 521 | .noexcerpt, 522 | .navigation-not-searchable { 523 | display: none !important; 524 | } 525 | .mcw-mainpage-icon { 526 | display: inline-block; 527 | } 528 | 529 | /* image results */ 530 | .image-results { 531 | display: flex; 532 | flex-wrap: wrap; 533 | gap: 0.5rem; 534 | } 535 | .image-result { 536 | min-width: 12rem; 537 | position: relative; 538 | flex-grow: 1; 539 | overflow: hidden; 540 | } 541 | .image-result-img-container { 542 | margin: 0 auto; 543 | width: fit-content; 544 | } 545 | .image-result img { 546 | height: 10.3rem; 547 | width: auto; 548 | object-fit: scale-down; 549 | object-position: left; 550 | } 551 | .image-result-page-anchor { 552 | display: block; 553 | height: 2.25em; 554 | } 555 | .image-result-page-url { 556 | overflow: hidden; 557 | text-overflow: ellipsis; 558 | 559 | font-size: 0.8rem; 560 | 561 | white-space: nowrap; 562 | width: 100%; 563 | position: absolute; 564 | display: block; 565 | } 566 | .image-result-title { 567 | overflow: hidden; 568 | text-overflow: ellipsis; 569 | 570 | font-size: 0.85rem; 571 | 572 | white-space: nowrap; 573 | width: 100%; 574 | position: absolute; 575 | display: block; 576 | margin-top: 1em; 577 | } 578 | -------------------------------------------------------------------------------- /src/config.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | collections::HashMap, 3 | fs, 4 | net::SocketAddr, 5 | path::Path, 6 | sync::{Arc, LazyLock}, 7 | }; 8 | 9 | use serde::Deserialize; 10 | use tracing::info; 11 | 12 | use crate::engines::Engine; 13 | 14 | impl Default for Config { 15 | fn default() -> Self { 16 | Config { 17 | bind: "0.0.0.0:28019".parse().unwrap(), 18 | api: false, 19 | ui: UiConfig { 20 | show_engine_list_separator: false, 21 | show_version_info: false, 22 | site_name: "metasearch".to_string(), 23 | show_settings_link: true, 24 | stylesheet_url: "".to_string(), 25 | stylesheet_str: "".to_string(), 26 | favicon_url: "".to_string(), 27 | show_autocomplete: true, 28 | }, 29 | image_search: ImageSearchConfig { 30 | enabled: false, 31 | show_engines: true, 32 | proxy: ImageProxyConfig { 33 | enabled: true, 34 | max_download_size: 10_000_000, 35 | }, 36 | }, 37 | engines: Arc::new(EnginesConfig::default()), 38 | urls: UrlsConfig { 39 | replace: vec![( 40 | HostAndPath::new("minecraft.fandom.com/wiki/"), 41 | HostAndPath::new("minecraft.wiki/w/"), 42 | )], 43 | weight: vec![], 44 | }, 45 | } 46 | } 47 | } 48 | 49 | impl Default for EnginesConfig { 50 | fn default() -> Self { 51 | use toml::value::Value; 52 | 53 | let mut map = HashMap::new(); 54 | // engines are enabled by default, so engines that aren't listed here are 55 | // enabled 56 | 57 | // main search engines 58 | map.insert(Engine::Google, EngineConfig::new().with_weight(1.05)); 59 | map.insert(Engine::Bing, EngineConfig::new().with_weight(1.0)); 60 | map.insert(Engine::Brave, EngineConfig::new().with_weight(1.25)); 61 | map.insert( 62 | Engine::Marginalia, 63 | EngineConfig::new().with_weight(0.15).with_extra( 64 | vec![( 65 | "args".to_string(), 66 | Value::Table( 67 | vec![ 68 | ("profile".to_string(), Value::String("corpo".to_string())), 69 | ("js".to_string(), Value::String("default".to_string())), 70 | ("adtech".to_string(), Value::String("default".to_string())), 71 | ] 72 | .into_iter() 73 | .collect(), 74 | ), 75 | )] 76 | .into_iter() 77 | .collect(), 78 | ), 79 | ); 80 | 81 | // additional search engines 82 | map.insert( 83 | Engine::GoogleScholar, 84 | EngineConfig::new().with_weight(0.50).disabled(), 85 | ); 86 | map.insert( 87 | Engine::RightDao, 88 | EngineConfig::new().with_weight(0.10).disabled(), 89 | ); 90 | map.insert( 91 | Engine::Stract, 92 | EngineConfig::new().with_weight(0.15).disabled(), 93 | ); 94 | map.insert( 95 | Engine::Yep, 96 | EngineConfig::new().with_weight(0.10).disabled(), 97 | ); 98 | 99 | // calculators (give them a high weight so they're always the first thing in 100 | // autocomplete) 101 | map.insert(Engine::Numbat, EngineConfig::new().with_weight(10.0)); 102 | map.insert( 103 | Engine::Fend, 104 | EngineConfig::new().with_weight(10.0).disabled(), 105 | ); 106 | 107 | // other engines 108 | map.insert( 109 | Engine::Mdn, 110 | EngineConfig::new().with_extra( 111 | vec![("max_sections".to_string(), Value::Integer(1))] 112 | .into_iter() 113 | .collect(), 114 | ), 115 | ); 116 | 117 | Self { map } 118 | } 119 | } 120 | 121 | impl Default for EngineConfig { 122 | fn default() -> Self { 123 | Self { 124 | enabled: true, 125 | weight: 1.0, 126 | extra: Default::default(), 127 | } 128 | } 129 | } 130 | static DEFAULT_ENGINE_CONFIG_REF: LazyLock = LazyLock::new(EngineConfig::default); 131 | impl EngineConfig { 132 | pub fn new() -> Self { 133 | Self::default() 134 | } 135 | pub fn with_weight(self, weight: f64) -> Self { 136 | Self { weight, ..self } 137 | } 138 | pub fn disabled(self) -> Self { 139 | Self { 140 | enabled: false, 141 | ..self 142 | } 143 | } 144 | pub fn with_extra(self, extra: toml::Table) -> Self { 145 | Self { extra, ..self } 146 | } 147 | } 148 | 149 | // 150 | 151 | #[derive(Debug, Clone)] 152 | pub struct Config { 153 | pub bind: SocketAddr, 154 | /// Whether the JSON API should be accessible. 155 | pub api: bool, 156 | pub ui: UiConfig, 157 | pub image_search: ImageSearchConfig, 158 | // wrapped in an arc to make Config cheaper to clone 159 | pub engines: Arc, 160 | pub urls: UrlsConfig, 161 | } 162 | 163 | #[derive(Deserialize, Debug)] 164 | pub struct PartialConfig { 165 | pub bind: Option, 166 | pub api: Option, 167 | pub ui: Option, 168 | pub image_search: Option, 169 | pub engines: Option, 170 | pub urls: Option, 171 | } 172 | 173 | impl Config { 174 | pub fn overlay(&mut self, partial: PartialConfig) { 175 | self.bind = partial.bind.unwrap_or(self.bind); 176 | self.api = partial.api.unwrap_or(self.api); 177 | self.ui.overlay(partial.ui.unwrap_or_default()); 178 | self.image_search 179 | .overlay(partial.image_search.unwrap_or_default()); 180 | if let Some(partial_engines) = partial.engines { 181 | let mut engines = self.engines.as_ref().clone(); 182 | engines.overlay(partial_engines); 183 | self.engines = Arc::new(engines); 184 | } 185 | self.urls.overlay(partial.urls.unwrap_or_default()); 186 | } 187 | } 188 | 189 | #[derive(Debug, Clone)] 190 | pub struct UiConfig { 191 | pub show_engine_list_separator: bool, 192 | pub show_version_info: bool, 193 | /// Settings are always accessible anyways, this just controls whether the 194 | /// link to them in the index page is visible. 195 | pub show_settings_link: bool, 196 | pub site_name: String, 197 | pub show_autocomplete: bool, 198 | pub stylesheet_url: String, 199 | pub stylesheet_str: String, 200 | pub favicon_url: String, 201 | } 202 | 203 | #[derive(Deserialize, Debug, Default)] 204 | pub struct PartialUiConfig { 205 | pub show_engine_list_separator: Option, 206 | pub show_version_info: Option, 207 | pub show_settings_link: Option, 208 | pub show_autocomplete: Option, 209 | 210 | pub site_name: Option, 211 | pub stylesheet_url: Option, 212 | pub stylesheet_str: Option, 213 | pub favicon_url: Option, 214 | } 215 | 216 | impl UiConfig { 217 | pub fn overlay(&mut self, partial: PartialUiConfig) { 218 | self.show_engine_list_separator = partial 219 | .show_engine_list_separator 220 | .unwrap_or(self.show_engine_list_separator); 221 | self.show_version_info = partial.show_version_info.unwrap_or(self.show_version_info); 222 | self.show_settings_link = partial 223 | .show_settings_link 224 | .unwrap_or(self.show_settings_link); 225 | self.show_autocomplete = partial.show_autocomplete.unwrap_or(self.show_autocomplete); 226 | self.site_name = partial.site_name.unwrap_or(self.site_name.clone()); 227 | self.stylesheet_url = partial 228 | .stylesheet_url 229 | .unwrap_or(self.stylesheet_url.clone()); 230 | self.stylesheet_str = partial 231 | .stylesheet_str 232 | .unwrap_or(self.stylesheet_str.clone()); 233 | self.favicon_url = partial.favicon_url.unwrap_or(self.favicon_url.clone()); 234 | } 235 | } 236 | 237 | #[derive(Debug, Clone)] 238 | pub struct ImageSearchConfig { 239 | pub enabled: bool, 240 | pub show_engines: bool, 241 | pub proxy: ImageProxyConfig, 242 | } 243 | 244 | #[derive(Deserialize, Debug, Default)] 245 | pub struct PartialImageSearchConfig { 246 | pub enabled: Option, 247 | pub show_engines: Option, 248 | pub proxy: Option, 249 | } 250 | 251 | impl ImageSearchConfig { 252 | pub fn overlay(&mut self, partial: PartialImageSearchConfig) { 253 | self.enabled = partial.enabled.unwrap_or(self.enabled); 254 | self.show_engines = partial.show_engines.unwrap_or(self.show_engines); 255 | self.proxy.overlay(partial.proxy.unwrap_or_default()); 256 | } 257 | } 258 | 259 | #[derive(Debug, Clone)] 260 | pub struct ImageProxyConfig { 261 | /// Whether we should proxy remote images through our server. This is mostly 262 | /// a privacy feature. 263 | pub enabled: bool, 264 | /// The maximum size of an image that can be proxied. This is in bytes. 265 | pub max_download_size: u64, 266 | } 267 | 268 | #[derive(Deserialize, Debug, Default)] 269 | pub struct PartialImageProxyConfig { 270 | pub enabled: Option, 271 | pub max_download_size: Option, 272 | } 273 | 274 | impl ImageProxyConfig { 275 | pub fn overlay(&mut self, partial: PartialImageProxyConfig) { 276 | self.enabled = partial.enabled.unwrap_or(self.enabled); 277 | self.max_download_size = partial.max_download_size.unwrap_or(self.max_download_size); 278 | } 279 | } 280 | 281 | #[derive(Debug, Clone)] 282 | pub struct EnginesConfig { 283 | pub map: HashMap, 284 | } 285 | 286 | #[derive(Deserialize, Debug, Default)] 287 | pub struct PartialEnginesConfig { 288 | #[serde(flatten)] 289 | pub map: HashMap, 290 | } 291 | 292 | #[derive(Deserialize, Clone, Debug)] 293 | #[serde(untagged)] 294 | pub enum PartialDefaultableEngineConfig { 295 | Boolean(bool), 296 | Full(PartialEngineConfig), 297 | } 298 | 299 | impl EnginesConfig { 300 | pub fn overlay(&mut self, partial: PartialEnginesConfig) { 301 | for (key, value) in partial.map { 302 | let full = match value { 303 | PartialDefaultableEngineConfig::Boolean(enabled) => PartialEngineConfig { 304 | enabled: Some(enabled), 305 | ..Default::default() 306 | }, 307 | PartialDefaultableEngineConfig::Full(full) => full, 308 | }; 309 | if let Some(existing) = self.map.get_mut(&key) { 310 | existing.overlay(full); 311 | } else { 312 | let mut new = EngineConfig::default(); 313 | new.overlay(full); 314 | self.map.insert(key, new); 315 | } 316 | } 317 | } 318 | 319 | pub fn get(&self, engine: Engine) -> &EngineConfig { 320 | self.map.get(&engine).unwrap_or(&DEFAULT_ENGINE_CONFIG_REF) 321 | } 322 | } 323 | 324 | #[derive(Debug, Clone)] 325 | pub struct EngineConfig { 326 | pub enabled: bool, 327 | /// The priority of this engine relative to the other engines. 328 | pub weight: f64, 329 | /// Per-engine configs. These are parsed at request time. 330 | pub extra: toml::Table, 331 | } 332 | 333 | #[derive(Deserialize, Clone, Debug, Default)] 334 | pub struct PartialEngineConfig { 335 | pub enabled: Option, 336 | pub weight: Option, 337 | #[serde(flatten)] 338 | pub extra: toml::Table, 339 | } 340 | 341 | impl EngineConfig { 342 | pub fn overlay(&mut self, partial: PartialEngineConfig) { 343 | self.enabled = partial.enabled.unwrap_or(self.enabled); 344 | self.weight = partial.weight.unwrap_or(self.weight); 345 | self.extra.extend(partial.extra); 346 | } 347 | } 348 | 349 | impl Config { 350 | pub fn read_or_create(config_path: &Path) -> eyre::Result { 351 | let mut config = Config::default(); 352 | 353 | if !config_path.exists() { 354 | info!("No config found, creating one at {config_path:?}"); 355 | let default_config_str = include_str!("../config-default.toml"); 356 | if let Some(parent_path) = config_path.parent() { 357 | let _ = fs::create_dir_all(parent_path); 358 | } 359 | fs::write(config_path, default_config_str)?; 360 | } 361 | 362 | let given_config = toml::from_str::(&fs::read_to_string(config_path)?)?; 363 | config.overlay(given_config); 364 | Ok(config) 365 | } 366 | } 367 | 368 | #[derive(Debug, Clone, PartialEq)] 369 | pub struct HostAndPath { 370 | pub host: String, 371 | pub path: String, 372 | } 373 | impl HostAndPath { 374 | pub fn new(s: &str) -> Self { 375 | let (host, path) = s.split_once('/').unwrap_or((s, "")); 376 | Self { 377 | host: host.to_owned(), 378 | path: path.to_owned(), 379 | } 380 | } 381 | } 382 | 383 | #[derive(Debug, Clone)] 384 | pub struct UrlsConfig { 385 | pub replace: Vec<(HostAndPath, HostAndPath)>, 386 | pub weight: Vec<(HostAndPath, f64)>, 387 | } 388 | #[derive(Deserialize, Debug, Default)] 389 | pub struct PartialUrlsConfig { 390 | #[serde(default)] 391 | pub replace: HashMap, 392 | #[serde(default)] 393 | pub weight: HashMap, 394 | } 395 | impl UrlsConfig { 396 | pub fn overlay(&mut self, partial: PartialUrlsConfig) { 397 | for (from, to) in partial.replace { 398 | let from = HostAndPath::new(&from); 399 | if to.is_empty() { 400 | // setting the value to an empty string removes it 401 | let index = self.replace.iter().position(|(u, _)| u == &from); 402 | // swap_remove is fine because the order of this vec doesn't matter 403 | self.replace.swap_remove(index.unwrap()); 404 | } else { 405 | let to = HostAndPath::new(&to); 406 | self.replace.push((from, to)); 407 | } 408 | } 409 | 410 | for (url, weight) in partial.weight { 411 | let url = HostAndPath::new(&url); 412 | self.weight.push((url, weight)); 413 | } 414 | 415 | // sort by length so that more specific checks are done first 416 | self.weight.sort_by(|(a, _), (b, _)| { 417 | let a_len = a.path.len() + a.host.len(); 418 | let b_len = b.path.len() + b.host.len(); 419 | b_len.cmp(&a_len) 420 | }); 421 | } 422 | } 423 | --------------------------------------------------------------------------------