├── .envrc
├── rustfmt.toml
├── src
    ├── web
    │   ├── assets
    │   │   ├── robots.txt
    │   │   ├── README
    │   │   ├── themes
    │   │   │   ├── README
    │   │   │   ├── catppuccin-macchiato.css
    │   │   │   ├── nord-bluish.css
    │   │   │   ├── discord.css
    │   │   │   ├── catppuccin-latte.css
    │   │   │   └── catppuccin-mocha.css
    │   │   ├── script.js
    │   │   ├── scripts
    │   │   │   └── colorpicker.js
    │   │   └── style.css
    │   ├── autocomplete.rs
    │   ├── opensearch.rs
    │   ├── search
    │   │   ├── images.rs
    │   │   └── all.rs
    │   ├── index.rs
    │   ├── image_proxy.rs
    │   ├── settings.rs
    │   ├── mod.rs
    │   └── search.rs
    ├── engines
    │   ├── search.rs
    │   ├── postsearch.rs
    │   ├── answer
    │   │   ├── ip.rs
    │   │   ├── notepad.rs
    │   │   ├── useragent.rs
    │   │   ├── wikipedia.rs
    │   │   ├── dictionary.rs
    │   │   ├── timezone.rs
    │   │   ├── fend.rs
    │   │   ├── numbat.rs
    │   │   └── thesaurus.rs
    │   ├── answer.rs
    │   ├── search
    │   │   ├── rightdao.rs
    │   │   ├── brave.rs
    │   │   ├── google_scholar.rs
    │   │   ├── stract.rs
    │   │   ├── yep.rs
    │   │   ├── marginalia.rs
    │   │   ├── bing.rs
    │   │   └── google.rs
    │   ├── postsearch
    │   │   ├── minecraft_wiki.rs
    │   │   ├── stackexchange.rs
    │   │   ├── docs_rs.rs
    │   │   ├── mdn.rs
    │   │   └── github.rs
    │   ├── macros.rs
    │   └── ranking.rs
    ├── build.rs
    ├── main.rs
    ├── parse.rs
    ├── urls.rs
    └── config.rs
├── .prettierrc
├── .gitignore
├── compose.yml
├── Containerfile
├── config-default.toml
├── Cargo.toml
├── flake.lock
├── module.nix
├── flake.nix
├── README
└── LICENSE


/.envrc:
--------------------------------------------------------------------------------
1 | use flake
2 | 


--------------------------------------------------------------------------------
/rustfmt.toml:
--------------------------------------------------------------------------------
1 | wrap_comments = true


--------------------------------------------------------------------------------
/src/web/assets/robots.txt:
--------------------------------------------------------------------------------
1 | User-agent: *
2 | Disallow: /
3 | 


--------------------------------------------------------------------------------
/.prettierrc:
--------------------------------------------------------------------------------
1 | {
2 |   "tabWidth": 2,
3 |   "useTabs": false
4 | }
5 | 


--------------------------------------------------------------------------------
/src/engines/search.rs:
--------------------------------------------------------------------------------
1 | pub mod bing;
2 | pub mod brave;
3 | pub mod google;
4 | pub mod google_scholar;
5 | pub mod marginalia;
6 | pub mod rightdao;
7 | pub mod stract;
8 | pub mod yep;
9 | 


--------------------------------------------------------------------------------
/src/web/assets/README:
--------------------------------------------------------------------------------
1 | files added in this directory aren't automatically made accessible, you have to add them as a route
2 | in src/web/mod.rs (so the files are included in the binary)
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | /target
 2 | /config.toml
 3 | 
 4 | # convenience script i use for deploying the site to my server, feel free to
 5 | # write your own here too
 6 | /deploy.sh
 7 | 
 8 | # direnv (mostly used for Nix)
 9 | .direnv/
10 | 


--------------------------------------------------------------------------------
/compose.yml:
--------------------------------------------------------------------------------
 1 | version: '3.7'
 2 | 
 3 | services:
 4 |   metasearch2:
 5 |     build:
 6 |       args:
 7 |         CONFIG: "/usr/local/bin/config.toml"
 8 |       context: .
 9 |       dockerfile: Containerfile
10 |     network_mode: "host"
11 |     restart: unless-stopped


--------------------------------------------------------------------------------
/src/web/assets/themes/README:
--------------------------------------------------------------------------------
1 | How to add new themes:
2 | 
3 | - Copy a similar theme in this directory and rename it to your theme's name and modify it as necessary
4 | - Add the route at `src/web/mod.rs`
5 | - Add it as an option in settings at `src/web/settings.rs`
6 | 


--------------------------------------------------------------------------------
/src/engines/postsearch.rs:
--------------------------------------------------------------------------------
 1 | //! These search engines are requested after we've built the main search
 2 | //! results. They can only show stuff in infoboxes and don't get requested if
 3 | //! an infobox was added by another earlier engine.
 4 | 
 5 | pub mod docs_rs;
 6 | pub mod github;
 7 | pub mod mdn;
 8 | pub mod minecraft_wiki;
 9 | pub mod stackexchange;
10 | 


--------------------------------------------------------------------------------
/src/engines/answer/ip.rs:
--------------------------------------------------------------------------------
 1 | use maud::html;
 2 | 
 3 | use crate::engines::{EngineResponse, SearchQuery};
 4 | 
 5 | use super::regex;
 6 | 
 7 | pub async fn request(query: &SearchQuery) -> EngineResponse {
 8 |     if !regex!("^what('s|s| is) my ip").is_match(&query.query.to_lowercase()) {
 9 |         return EngineResponse::new();
10 |     }
11 | 
12 |     let ip = &query.ip;
13 | 
14 |     EngineResponse::answer_html(html! {
15 |         h3 { b { (ip) } }
16 |     })
17 | }
18 | 


--------------------------------------------------------------------------------
/src/engines/answer.rs:
--------------------------------------------------------------------------------
 1 | pub mod colorpicker;
 2 | pub mod dictionary;
 3 | pub mod fend;
 4 | pub mod ip;
 5 | pub mod notepad;
 6 | pub mod numbat;
 7 | pub mod thesaurus;
 8 | pub mod timezone;
 9 | pub mod useragent;
10 | pub mod wikipedia;
11 | 
12 | macro_rules! regex {
13 |     ($re:literal $(,)?) => {{
14 |         static RE: std::sync::LazyLock<regex::Regex> =
15 |             std::sync::LazyLock::new(|| regex::Regex::new($re).unwrap());
16 |         &RE
17 |     }};
18 | }
19 | pub(crate) use regex;
20 | 


--------------------------------------------------------------------------------
/Containerfile:
--------------------------------------------------------------------------------
 1 | FROM lukemathwalker/cargo-chef:latest-rust-1.91-alpine as chef
 2 | WORKDIR /app
 3 | 
 4 | FROM chef AS planner
 5 | COPY . .
 6 | RUN cargo chef prepare --recipe-path recipe.json
 7 | 
 8 | FROM chef AS builder
 9 | COPY --from=planner /app/recipe.json recipe.json
10 | RUN cargo chef cook --release --recipe-path recipe.json
11 | COPY . .
12 | RUN cargo build --release
13 | 
14 | FROM alpine:latest AS runtime
15 | WORKDIR /app
16 | COPY --from=builder /app/config.toml /usr/local/bin/config.toml
17 | COPY --from=builder /app/target/release/metasearch /usr/local/bin/metasearch
18 | ARG CONFIG
19 | ENV CONFIG=${CONFIG}
20 | EXPOSE 28019
21 | ENTRYPOINT /usr/local/bin/metasearch $CONFIG
22 | 


--------------------------------------------------------------------------------
/src/engines/search/rightdao.rs:
--------------------------------------------------------------------------------
 1 | use url::Url;
 2 | 
 3 | use crate::{
 4 |     engines::{EngineResponse, RequestResponse, CLIENT},
 5 |     parse::{parse_html_response_with_opts, ParseOpts},
 6 | };
 7 | 
 8 | pub async fn request(query: &str) -> RequestResponse {
 9 |     CLIENT
10 |         .get(Url::parse_with_params("https://rightdao.com/search", &[("q", query)]).unwrap())
11 |         .into()
12 | }
13 | 
14 | pub fn parse_response(body: &str) -> eyre::Result<EngineResponse> {
15 |     parse_html_response_with_opts(
16 |         body,
17 |         ParseOpts::new()
18 |             .result("div.item")
19 |             .title("div.title")
20 |             .href("a[href]")
21 |             .description("div.description"),
22 |     )
23 | }
24 | 


--------------------------------------------------------------------------------
/src/engines/answer/notepad.rs:
--------------------------------------------------------------------------------
 1 | use maud::html;
 2 | 
 3 | use crate::engines::{EngineResponse, SearchQuery};
 4 | 
 5 | use super::regex;
 6 | 
 7 | pub async fn request(query: &SearchQuery) -> EngineResponse {
 8 |     if !regex!("^(note|text|code) ?(pad|book|edit(or|er)?)$").is_match(&query.query.to_lowercase())
 9 |     {
10 |         return EngineResponse::new();
11 |     }
12 | 
13 |     // This allows pasting styles which is undesired behavior, and the
14 |     // `contenteditable="plaintext-only"` attribute currently only works on Chrome.
15 |     // This should be updated when the attribute becomes available in more browsers
16 |     EngineResponse::answer_html(html! {
17 |         div.answer-notepad contenteditable {}
18 |     })
19 | }
20 | 


--------------------------------------------------------------------------------
/src/engines/search/brave.rs:
--------------------------------------------------------------------------------
 1 | use url::Url;
 2 | 
 3 | use crate::{
 4 |     engines::{EngineResponse, RequestResponse, CLIENT},
 5 |     parse::{parse_html_response_with_opts, ParseOpts},
 6 | };
 7 | 
 8 | pub async fn request(query: &str) -> RequestResponse {
 9 |     CLIENT
10 |         .get(Url::parse_with_params("https://search.brave.com/search", &[("q", query)]).unwrap())
11 |         .into()
12 | }
13 | 
14 | pub fn parse_response(body: &str) -> eyre::Result<EngineResponse> {
15 |     parse_html_response_with_opts(
16 |         body,
17 |         ParseOpts::new()
18 |             .result("#results > .snippet[data-pos]:not(.standalone)")
19 |             .title(".title")
20 |             .href("a")
21 |             .description(".generic-snippet, .video-snippet > .snippet-description"),
22 |     )
23 | }
24 | 


--------------------------------------------------------------------------------
/src/web/assets/themes/catppuccin-macchiato.css:
--------------------------------------------------------------------------------
 1 | :root {
 2 |   --bg-1: #24273a; /* background */
 3 |   --bg-2: #1e2030; /* mantle */
 4 |   --bg-3: #181926; /* crust */
 5 |   --bg-4: #363a4e; /* surface 0 */
 6 |   --fg-1: #cad3f5; /* text */
 7 |   --fg-2: #b8c0e0; /* subtext 1 */
 8 |   --fg-2: #a5adcb; /* subtext 0 */
 9 |   --accent: #c6a0f6; /* mauve */
10 |   --positive: #a6da95; /* green */
11 |   --negative: #ed8796; /* red */
12 | 
13 |   --syntax-string: #a6da95; /* green (string) */
14 |   --syntax-special: #c69ff5; /* mauve (keyword) */
15 |   --syntax-constant: #f5a97f; /* peach (constant/number) */
16 |   --syntax-comment: #929ab7; /* overlay 2 (comment) */
17 |   --syntax-func: #8aadf3; /* blue (method/function) */
18 | 
19 |   --link: var(--accent);
20 |   --link-visited: var(--link);
21 | }
22 | 


--------------------------------------------------------------------------------
/src/build.rs:
--------------------------------------------------------------------------------
 1 | use std::process::Command;
 2 | 
 3 | fn main() {
 4 |     println!("cargo:rustc-rerun-if-changed=.git/HEAD");
 5 |     let output = Command::new("git").args(["rev-parse", "HEAD"]).output();
 6 |     let git_hash = match output {
 7 |         Ok(output) => String::from_utf8(output.stdout).unwrap_or("unknown".into()),
 8 |         Err(_) => "unknown".into(),
 9 |     };
10 |     let output = Command::new("git")
11 |         .args(["rev-parse", "--short", "HEAD"])
12 |         .output();
13 |     let git_hash_short = match output {
14 |         Ok(output) => String::from_utf8(output.stdout).unwrap_or("unknown".into()),
15 |         Err(_) => "unknown".into(),
16 |     };
17 |     println!("cargo:rustc-env=GIT_HASH={git_hash}");
18 |     println!("cargo:rustc-env=GIT_HASH_SHORT={git_hash_short}");
19 | }
20 | 


--------------------------------------------------------------------------------
/src/web/assets/themes/nord-bluish.css:
--------------------------------------------------------------------------------
 1 | :root {
 2 |   /* body background */
 3 |   --bg-1: #2e3440;
 4 |   /* background of the content */
 5 |   --bg-2: #3b4252;
 6 |   /* input suggestions background */
 7 |   --bg-3: #434c5e;
 8 |   /* mostly borders */
 9 |   --bg-4: #5e81ac;
10 | 
11 |   /* main text color */
12 |   --fg-1: #eceff4;
13 |   /* search result description */
14 |   --fg-2: #e5e9f0;
15 |   --fg-3: #d8dee9;
16 | 
17 |   /* focus outline */
18 |   --accent: #88c0d0;
19 | 
20 |   --link: #88c0d0;
21 |   --link-visited: #5e81ac;
22 | 
23 |   /* green */
24 |   --positive: #a3be8c;
25 |   /* red, error */
26 |   --negative: #bf616a;
27 | 
28 |   --syntax-string: #a3be8c;
29 |   --syntax-special: #81a1c1;
30 |   --syntax-constant: #d8dee9;
31 |   --syntax-comment: #616e88;
32 |   --syntax-func: #88c0d0;
33 | }
34 | 


--------------------------------------------------------------------------------
/src/web/assets/themes/discord.css:
--------------------------------------------------------------------------------
 1 | :root {
 2 |   /* body background */
 3 |   --bg-1: #313338;
 4 |   /* background of the content */
 5 |   --bg-2: #2b2d31;
 6 |   /* input suggestions background */
 7 |   --bg-3: #1e1f22;
 8 |   /* mostly borders */
 9 |   --bg-4: transparent;
10 | 
11 |   /* main text color */
12 |   --fg-1: #dbdee1;
13 |   /* search result description */
14 |   --fg-2: #b5bac1;
15 |   --fg-3: #949ba4;
16 | 
17 |   /* focus outline */
18 |   --accent: #41434a;
19 | 
20 |   --link: #00a8fc;
21 |   --link-visited: #2eb6ff;
22 | 
23 |   /* green, success */
24 |   --positive: #23a559;
25 |   /* red, error */
26 |   --negative: #fa777c;
27 | 
28 |   --syntax-string: #aad94c;
29 |   --syntax-special: #e6b673;
30 |   --syntax-constant: #d2a6ff;
31 |   --syntax-comment: #acb6bf8c;
32 |   --syntax-func: #ffb454;
33 | }
34 | 


--------------------------------------------------------------------------------
/src/web/assets/themes/catppuccin-latte.css:
--------------------------------------------------------------------------------
 1 | :root {
 2 |   /* body background */
 3 |   --bg-1: #dce0e8;
 4 |   /* background of the content */
 5 |   --bg-2: #e6e9ef;
 6 |   /* input suggestions background */
 7 |   --bg-3: #eff1f5;
 8 |   /* mostly borders */
 9 |   --bg-4: #9ca0b0;
10 | 
11 |   /* main text color */
12 |   --fg-1: #4c4f69;
13 |   /* search result description */
14 |   --fg-2: #4c4f69;
15 |   --fg-3: #5c5f77;
16 | 
17 |   /* focus outline */
18 |   --accent: #7287fd;
19 | 
20 |   --link: #1e66f5;
21 |   --link-visited: #8839ef;
22 | 
23 |   /* green, success */
24 |   --positive: #40a02b;
25 |   /* red, error */
26 |   --negative: #e64553;
27 | 
28 |   --syntax-string: #40a02b;
29 |   --syntax-special: #dc8a78;
30 |   --syntax-constant: #ea76cb;
31 |   --syntax-comment: #5c5f77;
32 |   --syntax-func: #7287fd;
33 | }
34 | 


--------------------------------------------------------------------------------
/src/web/assets/themes/catppuccin-mocha.css:
--------------------------------------------------------------------------------
 1 | :root {
 2 |   /* body background */
 3 |   --bg-1: #11111b;
 4 |   /* background of the content */
 5 |   --bg-2: #181825;
 6 |   /* input suggestions background */
 7 |   --bg-3: #1e1e2e;
 8 |   /* mostly borders */
 9 |   --bg-4: #313244;
10 | 
11 |   /* main text color */
12 |   --fg-1: #cdd6f4;
13 |   /* search result description */
14 |   --fg-2: #bac2de;
15 |   --fg-3: #a6adc8;
16 | 
17 |   /* focus outline */
18 |   --accent: #b4befe;
19 | 
20 |   --link: #89b4fa;
21 |   --link-visited: #bc78f8;
22 | 
23 |   /* green, success */
24 |   --positive: #7fd962;
25 |   /* red, error */
26 |   --negative: #f38ba8;
27 | 
28 |   --syntax-string: #a6e3a1;
29 |   --syntax-special: #fab387;
30 |   --syntax-constant: #cba6f7;
31 |   --syntax-comment: #6c7086;
32 |   --syntax-func: #89b4fa;
33 | }
34 | 


--------------------------------------------------------------------------------
/src/web/autocomplete.rs:
--------------------------------------------------------------------------------
 1 | use std::collections::HashMap;
 2 | 
 3 | use axum::{extract::Query, http::StatusCode, response::IntoResponse, Extension, Json};
 4 | use tracing::error;
 5 | 
 6 | use crate::{config::Config, engines};
 7 | 
 8 | pub async fn route(
 9 |     Query(params): Query<HashMap<String, String>>,
10 |     Extension(config): Extension<Config>,
11 | ) -> impl IntoResponse {
12 |     let query = params
13 |         .get("q")
14 |         .cloned()
15 |         .unwrap_or_default()
16 |         .replace('\n', " ");
17 | 
18 |     let res = match engines::autocomplete(&config, &query).await {
19 |         Ok(res) => res,
20 |         Err(err) => {
21 |             error!("Autocomplete error for {query}: {err}");
22 |             return (StatusCode::INTERNAL_SERVER_ERROR, Json((query, vec![])));
23 |         }
24 |     };
25 | 
26 |     (StatusCode::OK, Json((query, res)))
27 | }
28 | 


--------------------------------------------------------------------------------
/src/engines/search/google_scholar.rs:
--------------------------------------------------------------------------------
 1 | use url::Url;
 2 | 
 3 | use crate::{
 4 |     engines::{EngineResponse, RequestResponse, CLIENT},
 5 |     parse::{parse_html_response_with_opts, ParseOpts},
 6 | };
 7 | 
 8 | pub async fn request(query: &str) -> RequestResponse {
 9 |     CLIENT
10 |         .get(
11 |             Url::parse_with_params(
12 |                 "https://scholar.google.com/scholar",
13 |                 &[("hl", "en"), ("as_sdt", "0,5"), ("q", query), ("btnG", "")],
14 |             )
15 |             .unwrap(),
16 |         )
17 |         .into()
18 | }
19 | 
20 | pub fn parse_response(body: &str) -> eyre::Result<EngineResponse> {
21 |     parse_html_response_with_opts(
22 |         body,
23 |         ParseOpts::new()
24 |             .result("div.gs_r")
25 |             .title("h3")
26 |             .href("h3 > a[href]")
27 |             .description("div.gs_rs"),
28 |     )
29 | }
30 | 


--------------------------------------------------------------------------------
/config-default.toml:
--------------------------------------------------------------------------------
 1 | # See src/config.rs for all of the possible options.
 2 | # The commented-out lines are examples of values you could set, not the defaults.
 3 | 
 4 | bind = "0.0.0.0:28019"
 5 | api = false
 6 | 
 7 | [ui]
 8 | # engine_list_separator = true
 9 | # show_version_info = true
10 | # stylesheet_url = "/themes/catppuccin-mocha.css"
11 | # favicon_url = "data:image/svg+xml;base64,PHN2ZyB2aWV3Qm94PSIwIDAgMzIgMzIiIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyI+PGNpcmNsZSBjeD0iMTYiIGN5PSIxNiIgcj0iMTEiLz48L3N2Zz4="
12 | 
13 | [image_search]
14 | # enabled = true
15 | 
16 | [engines]
17 | # numbat = false
18 | # fend = true
19 | 
20 | [urls.replace]
21 | # "www.reddit.com" = "old.reddit.com"
22 | # "medium.com" = "scribe.rip"
23 | # ".medium.com" = "scribe.rip"
24 | 
25 | [urls.weight]
26 | # These are checked after applying replacements. Setting the weight to 0 (or any
27 | # negative number) completely hides the result. Longer matches have a higher
28 | # priority.
29 | # "quora.com" = 0.1
30 | # ".quora.com" = 0.1
31 | 


--------------------------------------------------------------------------------
/src/web/opensearch.rs:
--------------------------------------------------------------------------------
 1 | use axum::{
 2 |     http::{header, HeaderMap},
 3 |     response::IntoResponse,
 4 | };
 5 | use maud::{html, PreEscaped};
 6 | 
 7 | pub async fn route(headers: HeaderMap) -> impl IntoResponse {
 8 |     let host = headers
 9 |         .get("host")
10 |         .and_then(|host| host.to_str().ok())
11 |         .unwrap_or("localhost");
12 | 
13 |     (
14 |         [(
15 |             header::CONTENT_TYPE,
16 |             "application/opensearchdescription+xml",
17 |         )],
18 |         html! {
19 |             (PreEscaped(r#"<?xml version="1.0" encoding="utf-8"?>"#))
20 |             OpenSearchDescription xmlns="http://a9.com/-/spec/opensearch/1.1/" {
21 |                 ShortName { "metasearch" }
22 |                 Description { "Search metasearch" }
23 |                 InputEncoding { "UTF-8" }
24 |                 Url type="text/html" method="get" template=(format!("https://{host}/search?q={{searchTerms}}")) {}
25 |                 Url type="application/x-suggestions+json" method="get" template=(format!("https://{host}/autocomplete?q={{searchTerms}}")) {}
26 |             }
27 |         }.into_string(),
28 |     )
29 | }
30 | 


--------------------------------------------------------------------------------
/src/engines/answer/useragent.rs:
--------------------------------------------------------------------------------
 1 | use maud::html;
 2 | 
 3 | use crate::engines::{EngineResponse, SearchQuery};
 4 | 
 5 | use super::regex;
 6 | 
 7 | pub async fn request(query: &SearchQuery) -> EngineResponse {
 8 |     if !regex!("^(what('s|s| is) my (user ?agent|ua)|ua|user ?agent)$")
 9 |         .is_match(&query.query.to_lowercase())
10 |     {
11 |         return EngineResponse::new();
12 |     }
13 | 
14 |     let user_agent = query.request_headers.get("user-agent");
15 | 
16 |     let all_headers_html = html! {
17 |         br;
18 |         details {
19 |             summary { "All headers" }
20 |             @for (header, value) in query.request_headers.iter() {
21 |                 div {
22 |                     b { (header) } ": " (value)
23 |                 }
24 |             }
25 |         }
26 |     };
27 | 
28 |     EngineResponse::answer_html(if let Some(user_agent) = user_agent {
29 |         html! {
30 |             h3 { b { (user_agent) } }
31 |             (all_headers_html)
32 |         }
33 |     } else {
34 |         html! {
35 |             "You don't have a user agent"
36 |             (all_headers_html)
37 |         }
38 |     })
39 | }
40 | 


--------------------------------------------------------------------------------
/src/engines/search/stract.rs:
--------------------------------------------------------------------------------
 1 | use url::Url;
 2 | 
 3 | use crate::{
 4 |     engines::{EngineResponse, RequestResponse, CLIENT},
 5 |     parse::{parse_html_response_with_opts, ParseOpts},
 6 | };
 7 | 
 8 | pub async fn request(query: &str) -> RequestResponse {
 9 |     CLIENT
10 |         .get(
11 |             Url::parse_with_params(
12 |                 "https://stract.com/search",
13 |                 &[
14 |                     ("ss", "false"),
15 |                     // this is not a tracking parameter or token
16 |                     // this is stract's default value for the search rankings parameter
17 |                     ("sr", "N4IgNglg1gpgJiAXAbQLoBoRwgZ0rBFDEAIzAHsBjApNAXyA"),
18 |                     ("q", query),
19 |                     ("optic", ""),
20 |                 ],
21 |             )
22 |             .unwrap(),
23 |         )
24 |         .into()
25 | }
26 | 
27 | pub fn parse_response(body: &str) -> eyre::Result<EngineResponse> {
28 |     parse_html_response_with_opts(
29 |         body,
30 |         ParseOpts::new()
31 |             .result("div.grid.w-full.grid-cols-1.space-y-10.place-self-start > div > div.flex.min-w-0.grow.flex-col")
32 |             .title("a[title]")
33 |             .href("a[href]")
34 |             .description("#snippet-text"),
35 |     )
36 | }
37 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "metasearch"
 3 | version = "0.2.4"
 4 | edition = "2021"
 5 | build = "src/build.rs"
 6 | description = "a cute metasearch engine"
 7 | license = "CC0-1.0"
 8 | repository = "https://github.com/mat-1/metasearch2"
 9 | 
10 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
11 | 
12 | [dependencies]
13 | ammonia = "4.1.2"
14 | async-stream = "0.3.6"
15 | axum = { version = "0.8.7", default-features = false, features = [
16 |     "tokio",
17 |     "http1",
18 |     "http2",
19 |     "query",
20 |     "json",
21 |     "form",
22 | ] }
23 | axum-extra = { version = "0.12.2", features = ["cookie"] }
24 | base64 = "0.22.1"
25 | bytes = "1.11.0"
26 | chrono = "0.4.42"
27 | chrono-tz = { version = "0.10.4", features = ["case-insensitive"] }
28 | eyre = "0.6.12"
29 | fend-core = "1.5.7"
30 | futures = "0.3.31"
31 | html-escape = "0.2.13"
32 | maud = "0.27.0"
33 | numbat = "1.16.0"
34 | parking_lot = "0.12.5"
35 | rand = "0.9.2"
36 | regex = "1.12.2"
37 | scraper = "0.24.0"
38 | serde = { version = "1.0.228", features = ["derive"] }
39 | # preserve_order is needed for google images. yippee!
40 | serde_json = { version = "1.0.145", features = ["preserve_order"] }
41 | tokio = { version = "1.48.0", features = ["rt", "macros"] }
42 | tokio-stream = "0.1.17"
43 | toml = { version = "0.9.8", default-features = false, features = [
44 |     "std",
45 |     "parse",
46 |     "serde",
47 | ] }
48 | tower = "0.5.2"
49 | tower-http = "0.6.6"
50 | tracing = "0.1.41"
51 | tracing-subscriber = "0.3.20"
52 | url = "2.5.7"
53 | urlencoding = "2.1.3"
54 | wreq = { version = "5.3.0", features = [
55 |     "brotli",
56 |     "cookies",
57 |     "deflate",
58 |     "gzip",
59 |     "zstd",
60 | ] }
61 | wreq-util = "2.2.6"
62 | 


--------------------------------------------------------------------------------
/flake.lock:
--------------------------------------------------------------------------------
 1 | {
 2 |   "nodes": {
 3 |     "crane": {
 4 |       "locked": {
 5 |         "lastModified": 1729273024,
 6 |         "narHash": "sha256-Mb5SemVsootkn4Q2IiY0rr9vrXdCCpQ9HnZeD/J3uXs=",
 7 |         "owner": "ipetkov",
 8 |         "repo": "crane",
 9 |         "rev": "fa8b7445ddadc37850ed222718ca86622be01967",
10 |         "type": "github"
11 |       },
12 |       "original": {
13 |         "owner": "ipetkov",
14 |         "repo": "crane",
15 |         "type": "github"
16 |       }
17 |     },
18 |     "flake-parts": {
19 |       "inputs": {
20 |         "nixpkgs-lib": [
21 |           "nixpkgs"
22 |         ]
23 |       },
24 |       "locked": {
25 |         "lastModified": 1733312601,
26 |         "narHash": "sha256-4pDvzqnegAfRkPwO3wmwBhVi/Sye1mzps0zHWYnP88c=",
27 |         "owner": "hercules-ci",
28 |         "repo": "flake-parts",
29 |         "rev": "205b12d8b7cd4802fbcb8e8ef6a0f1408781a4f9",
30 |         "type": "github"
31 |       },
32 |       "original": {
33 |         "owner": "hercules-ci",
34 |         "repo": "flake-parts",
35 |         "type": "github"
36 |       }
37 |     },
38 |     "nixpkgs": {
39 |       "locked": {
40 |         "lastModified": 1729265718,
41 |         "narHash": "sha256-4HQI+6LsO3kpWTYuVGIzhJs1cetFcwT7quWCk/6rqeo=",
42 |         "owner": "NixOS",
43 |         "repo": "nixpkgs",
44 |         "rev": "ccc0c2126893dd20963580b6478d1a10a4512185",
45 |         "type": "github"
46 |       },
47 |       "original": {
48 |         "owner": "NixOS",
49 |         "ref": "nixpkgs-unstable",
50 |         "repo": "nixpkgs",
51 |         "type": "github"
52 |       }
53 |     },
54 |     "root": {
55 |       "inputs": {
56 |         "crane": "crane",
57 |         "flake-parts": "flake-parts",
58 |         "nixpkgs": "nixpkgs"
59 |       }
60 |     }
61 |   },
62 |   "root": "root",
63 |   "version": 7
64 | }
65 | 


--------------------------------------------------------------------------------
/src/engines/postsearch/minecraft_wiki.rs:
--------------------------------------------------------------------------------
 1 | use maud::{html, PreEscaped};
 2 | use scraper::{Html, Selector};
 3 | 
 4 | use crate::engines::{HttpResponse, Response, CLIENT};
 5 | 
 6 | pub async fn request(response: &Response) -> Option<wreq::RequestBuilder> {
 7 |     for search_result in response.search_results.iter().take(8) {
 8 |         if search_result
 9 |             .result
10 |             .url
11 |             .starts_with("https://minecraft.wiki/w/")
12 |         {
13 |             return Some(CLIENT.get(search_result.result.url.as_str()));
14 |         }
15 |     }
16 | 
17 |     None
18 | }
19 | 
20 | pub fn parse_response(HttpResponse { res, body, .. }: &HttpResponse) -> Option<PreEscaped<String>> {
21 |     let url = res.url().clone();
22 | 
23 |     let dom = Html::parse_document(body);
24 | 
25 |     let page_title = dom
26 |         .select(&Selector::parse("#firstHeading").unwrap())
27 |         .next()?
28 |         .text()
29 |         .collect::<String>()
30 |         .trim()
31 |         .to_string();
32 | 
33 |     let doc_query = Selector::parse(".mw-parser-output > p").unwrap();
34 | 
35 |     let doc_html = dom
36 |         .select(&doc_query)
37 |         .next()
38 |         .map(|doc| doc.html())
39 |         .unwrap_or_default();
40 | 
41 |     let doc_html = ammonia::Builder::default()
42 |         .link_rel(None)
43 |         .add_allowed_classes("div", ["notaninfobox", "mcw-mainpage-icon"])
44 |         .add_allowed_classes("pre", ["noexcerpt", "navigation-not-searchable"])
45 |         .url_relative(ammonia::UrlRelative::RewriteWithBase(url.clone()))
46 |         .clean(&doc_html)
47 |         .to_string();
48 | 
49 |     Some(html! {
50 |         h2 {
51 |             a href=(url) { (page_title) }
52 |         }
53 |         div.infobox-minecraft_wiki-article {
54 |             (PreEscaped(doc_html))
55 |         }
56 |     })
57 | }
58 | 


--------------------------------------------------------------------------------
/src/web/search/images.rs:
--------------------------------------------------------------------------------
 1 | use maud::{html, PreEscaped};
 2 | 
 3 | use crate::{
 4 |     config::Config,
 5 |     engines::{self, EngineImageResult, ImagesResponse},
 6 |     web::search::render_engine_list,
 7 | };
 8 | 
 9 | pub fn render_results(response: ImagesResponse) -> PreEscaped<String> {
10 |     html! {
11 |         div.image-results {
12 |             @for image in &response.image_results {
13 |                 (render_image_result(image, &response.config))
14 |             }
15 |         }
16 |     }
17 | }
18 | 
19 | fn render_image_result(
20 |     result: &engines::SearchResult<EngineImageResult>,
21 |     config: &Config,
22 | ) -> PreEscaped<String> {
23 |     let original_image_src = &result.result.image_url;
24 |     let image_src = if config.image_search.proxy.enabled {
25 |         // serialize url params
26 |         let escaped_param =
27 |             url::form_urlencoded::byte_serialize(original_image_src.as_bytes()).collect::<String>();
28 |         format!("/image-proxy?url={escaped_param}")
29 |     } else {
30 |         original_image_src.to_string()
31 |     };
32 |     html! {
33 |         div.image-result {
34 |             a.image-result-anchor rel="noreferrer" href=(original_image_src) target="_blank" {
35 |                 div.image-result-img-container {
36 |                     img loading="lazy" src=(image_src) width=(result.result.width) height=(result.result.height);
37 |                 }
38 |             }
39 |             a.image-result-page-anchor href=(result.result.page_url) {
40 |                 span.image-result-page-url.search-result-url { (result.result.page_url) }
41 |                 span.image-result-title { (result.result.title) }
42 |             }
43 |             @if config.image_search.show_engines {
44 |                 {(render_engine_list(&result.engines.iter().copied().collect::<Vec<_>>(), config))}
45 |             }
46 |         }
47 |     }
48 | }
49 | 


--------------------------------------------------------------------------------
/src/engines/search/yep.rs:
--------------------------------------------------------------------------------
 1 | use serde::Deserialize;
 2 | use url::Url;
 3 | 
 4 | use crate::engines::{EngineResponse, EngineSearchResult, RequestResponse, CLIENT};
 5 | 
 6 | pub async fn request(query: &str) -> RequestResponse {
 7 |     CLIENT
 8 |         .get(
 9 |             Url::parse_with_params(
10 |                 "https://api.yep.com/fs/2/search",
11 |                 &[
12 |                     ("client", "web"),
13 |                     ("gl", "all"),
14 |                     ("no_correct", "true"),
15 |                     ("q", query),
16 |                     ("safeSearch", "off"),
17 |                     ("type", "web"),
18 |                 ],
19 |             )
20 |             .unwrap(),
21 |         )
22 |         .into()
23 | }
24 | 
25 | #[derive(Deserialize, Debug)]
26 | struct YepApiResponse {
27 |     pub results: Vec<YepApiResponseResult>,
28 | }
29 | 
30 | #[derive(Deserialize, Debug)]
31 | struct YepApiResponseResult {
32 |     pub url: String,
33 |     pub title: String,
34 |     pub snippet: String,
35 | }
36 | 
37 | pub fn parse_response(body: &str) -> eyre::Result<EngineResponse> {
38 |     let (code, response): (String, YepApiResponse) = serde_json::from_str(body)?;
39 |     if &code != "Ok" {
40 |         return Ok(EngineResponse::new());
41 |     }
42 | 
43 |     let search_results = response
44 |         .results
45 |         .into_iter()
46 |         .map(|result| {
47 |             let description_html = scraper::Html::parse_document(&result.snippet);
48 |             let description = description_html.root_element().text().collect();
49 |             EngineSearchResult {
50 |                 url: result.url,
51 |                 title: result.title,
52 |                 description,
53 |             }
54 |         })
55 |         .collect();
56 | 
57 |     Ok(EngineResponse {
58 |         search_results,
59 |         featured_snippet: None,
60 |         answer_html: None,
61 |         infobox_html: None,
62 |     })
63 | }
64 | 


--------------------------------------------------------------------------------
/src/engines/search/marginalia.rs:
--------------------------------------------------------------------------------
 1 | use serde::Deserialize;
 2 | use tracing::error;
 3 | use url::Url;
 4 | 
 5 | use crate::{
 6 |     engines::{Engine, EngineResponse, RequestResponse, SearchQuery, CLIENT},
 7 |     parse::{parse_html_response_with_opts, ParseOpts},
 8 | };
 9 | 
10 | #[derive(Deserialize)]
11 | pub struct MarginaliaConfig {
12 |     pub args: MarginaliaArgs,
13 | }
14 | #[derive(Deserialize)]
15 | pub struct MarginaliaArgs {
16 |     pub profile: String,
17 |     pub js: String,
18 |     pub adtech: String,
19 | }
20 | 
21 | pub async fn request(query: &SearchQuery) -> RequestResponse {
22 |     // if the query is more than 3 words or has any special characters then abort
23 |     if query.split_whitespace().count() > 3
24 |         || !query.chars().all(|c| c.is_ascii_alphanumeric() || c == ' ')
25 |     {
26 |         return RequestResponse::None;
27 |     }
28 | 
29 |     let config_toml = query.config.engines.get(Engine::Marginalia).extra.clone();
30 |     let config: MarginaliaConfig = match toml::Value::Table(config_toml).try_into() {
31 |         Ok(args) => args,
32 |         Err(err) => {
33 |             error!("Failed to parse Marginalia config: {err}");
34 |             return RequestResponse::None;
35 |         }
36 |     };
37 | 
38 |     CLIENT
39 |         .get(
40 |             Url::parse_with_params(
41 |                 "https://old-search.marginalia.nu/search",
42 |                 &[
43 |                     ("query", query.query.as_str()),
44 |                     ("profile", config.args.profile.as_str()),
45 |                     ("js", config.args.js.as_str()),
46 |                     ("adtech", config.args.adtech.as_str()),
47 |                 ],
48 |             )
49 |             .unwrap(),
50 |         )
51 |         .into()
52 | }
53 | 
54 | pub fn parse_response(body: &str) -> eyre::Result<EngineResponse> {
55 |     parse_html_response_with_opts(
56 |         body,
57 |         ParseOpts::new()
58 |             .result("section.search-result")
59 |             .title("h2")
60 |             .href("a[href]")
61 |             .description("p.description"),
62 |     )
63 | }
64 | 


--------------------------------------------------------------------------------
/module.nix:
--------------------------------------------------------------------------------
 1 | self: {
 2 |   config,
 3 |   pkgs,
 4 |   lib,
 5 |   ...
 6 | }: let
 7 |   cfg = config.services.metasearch;
 8 |   port =
 9 |     if lib.hasAttr "bind" cfg.settings
10 |     then lib.toInt (builtins.elemAt (lib.splitString ":" cfg.settings.bind) 1)
11 |     else 28019;
12 | 
13 |   settingArg =
14 |     if cfg.settings != {}
15 |     then " " + pkgs.writers.writeTOML "metasearch.toml" cfg.settings
16 |     else "";
17 | 
18 |   loggingArg =
19 |     if !cfg.enableLogging
20 |     then " > /dev/null"
21 |     else "";
22 | in {
23 |   options.services.metasearch = {
24 |     enable = lib.mkEnableOption "metasearch";
25 |     openFirewall = lib.mkOption {
26 |       type = lib.types.bool;
27 |       default = false;
28 |       description = ''
29 |         Open firewall ports used by metasearch.
30 |       '';
31 |     };
32 |     enableLogging = lib.mkOption {
33 |       type = lib.types.bool;
34 |       default = false;
35 |       description = ''
36 |         Enable metasearch logging. Does not affect stderr.
37 |       '';
38 |     };
39 |     settings = lib.mkOption {
40 |       type = lib.types.attrs;
41 |       default = {};
42 |       description = ''
43 |         Optional metasearch configuration. If not defined, defaults in `src/config.rs` will be used
44 |       '';
45 |       example = {
46 |         bind = "0.0.0.0:4444";
47 |         ui.show_version_info = true;
48 |         urls = {
49 |           replace = {
50 |             "www.reddit.com" = "old.reddit.com";
51 |           };
52 | 
53 |           weight = {
54 |             "quora.com" = 0.1;
55 |           };
56 |         };
57 |       };
58 |     };
59 |   };
60 | 
61 |   config = lib.mkIf cfg.enable {
62 |     systemd.services.metasearch = {
63 |       wantedBy = ["multi-user.target"];
64 |       after = ["network.target"];
65 |       description = "a cute metasearch engine";
66 |       serviceConfig = {
67 |         ExecStart = "${self.packages.${pkgs.system}.default}/bin/metasearch" + settingArg + loggingArg;
68 |       };
69 |     };
70 | 
71 |     networking.firewall = lib.mkIf cfg.openFirewall {
72 |       allowedTCPPorts = [port];
73 |     };
74 |   };
75 | }
76 | 


--------------------------------------------------------------------------------
/src/web/index.rs:
--------------------------------------------------------------------------------
 1 | use axum::{http::header, response::IntoResponse, Extension};
 2 | use maud::{html, PreEscaped, DOCTYPE};
 3 | 
 4 | use crate::{config::Config, web::head_html};
 5 | 
 6 | const BASE_COMMIT_URL: &str = "https://github.com/mat-1/metasearch2/commit/";
 7 | const VERSION: &str = std::env!("CARGO_PKG_VERSION");
 8 | const COMMIT_HASH: &str = std::env!("GIT_HASH");
 9 | const COMMIT_HASH_SHORT: &str = std::env!("GIT_HASH_SHORT");
10 | 
11 | pub async fn get(Extension(config): Extension<Config>) -> impl IntoResponse {
12 |     let html = html! {
13 |         (PreEscaped("<!-- source code: https://github.com/mat-1/metasearch2 -->\n"))
14 |         (DOCTYPE)
15 |         html lang="en" {
16 |             {(head_html(None, &config))}
17 |             body {
18 |                 @if config.ui.show_settings_link {
19 |                     a.settings-link href="/settings" { "Settings" }
20 |                 }
21 |                 div.main-container.index-page {
22 |                     h1 { {(config.ui.site_name)} }
23 |                     form.search-form action="/search" method="get" {
24 |                         input type="text" name="q" placeholder="Search" id="search-input" autofocus onfocus="this.select()" autocomplete="off";
25 |                         input type="submit" value="Search";
26 |                     }
27 |                 }
28 |                 @if config.ui.show_version_info {
29 |                     span.version-info {
30 |                         @if COMMIT_HASH == "unknown" || COMMIT_HASH_SHORT == "unknown" {
31 |                             "Version "
32 |                             (VERSION)
33 |                         } @else {
34 |                             "Version "
35 |                             (VERSION)
36 |                             " ("
37 |                             a href=(format!("{BASE_COMMIT_URL}{COMMIT_HASH}")) { (COMMIT_HASH_SHORT) }
38 |                             ")"
39 |                         }
40 |                     }
41 |                 }
42 |             }
43 |         }
44 |     }
45 |     .into_string();
46 | 
47 |     ([(header::CONTENT_TYPE, "text/html; charset=utf-8")], html)
48 | }
49 | 


--------------------------------------------------------------------------------
/src/main.rs:
--------------------------------------------------------------------------------
 1 | use std::{
 2 |     env,
 3 |     path::{Path, PathBuf},
 4 | };
 5 | 
 6 | use config::Config;
 7 | use tracing::error;
 8 | 
 9 | pub mod config;
10 | pub mod engines;
11 | pub mod parse;
12 | pub mod urls;
13 | pub mod web;
14 | 
15 | #[tokio::main(flavor = "current_thread")]
16 | async fn main() {
17 |     tracing_subscriber::fmt::init();
18 | 
19 |     if env::args().any(|arg| arg == "--help" || arg == "-h" || arg == "help" || arg == "h") {
20 |         println!("Usage: metasearch [config_path]");
21 |         return;
22 |     }
23 | 
24 |     let config_path = config_path();
25 |     let config = match Config::read_or_create(&config_path) {
26 |         Ok(config) => config,
27 |         Err(err) => {
28 |             error!("Couldn't parse config:\n{err}");
29 |             return;
30 |         }
31 |     };
32 |     web::run(config).await;
33 | }
34 | 
35 | fn config_path() -> PathBuf {
36 |     if let Some(config_path) = env::args().nth(1) {
37 |         return PathBuf::from(config_path);
38 |     }
39 | 
40 |     let app_name = env!("CARGO_PKG_NAME");
41 | 
42 |     let mut default_config_dir = None;
43 | 
44 |     // $XDG_CONFIG_HOME/metasearch/config.toml
45 |     if let Ok(xdg_config_home) = env::var("XDG_CONFIG_HOME") {
46 |         let path = PathBuf::from(xdg_config_home)
47 |             .join(app_name)
48 |             .join("config.toml");
49 |         if path.is_file() {
50 |             return path;
51 |         }
52 |         if default_config_dir.is_none() {
53 |             default_config_dir = Some(path);
54 |         }
55 |     }
56 | 
57 |     // $HOME/.config/metasearch/config.toml
58 |     if let Ok(home) = env::var("HOME") {
59 |         let path = PathBuf::from(home)
60 |             .join(".config")
61 |             .join(app_name)
62 |             .join("config.toml");
63 |         if path.is_file() {
64 |             return path;
65 |         }
66 |         if default_config_dir.is_none() {
67 |             default_config_dir = Some(path);
68 |         }
69 |     }
70 | 
71 |     // ./config.toml
72 |     let path = Path::new("config.toml");
73 |     if path.exists() {
74 |         return path.to_path_buf();
75 |     }
76 |     default_config_dir.unwrap_or(PathBuf::from("config.toml"))
77 | }
78 | 


--------------------------------------------------------------------------------
/src/engines/postsearch/stackexchange.rs:
--------------------------------------------------------------------------------
 1 | use maud::{html, PreEscaped};
 2 | use scraper::{Html, Selector};
 3 | use url::Url;
 4 | 
 5 | use crate::engines::{answer::regex, Response, CLIENT};
 6 | 
 7 | pub async fn request(response: &Response) -> Option<wreq::RequestBuilder> {
 8 |     for search_result in response.search_results.iter().take(8) {
 9 |         if regex!(r"^https:\/\/(stackoverflow\.com|serverfault\.com|superuser\.com|\w{1,}\.stackexchange\.com)\/questions\/\d+")
10 |             .is_match(&search_result.result.url)
11 |         {
12 |             return Some(CLIENT.get(search_result.result.url.as_str()));
13 |         }
14 |     }
15 | 
16 |     None
17 | }
18 | 
19 | pub fn parse_response(body: &str) -> Option<PreEscaped<String>> {
20 |     let dom = Html::parse_document(body);
21 | 
22 |     let title = dom
23 |         .select(&Selector::parse("h1").unwrap())
24 |         .next()?
25 |         .text()
26 |         .collect::<String>();
27 | 
28 |     let base_url = dom
29 |         .select(&Selector::parse("link[rel=canonical]").unwrap())
30 |         .next()?
31 |         .value()
32 |         .attr("href")?;
33 |     let url = Url::join(
34 |         &Url::parse(base_url).unwrap(),
35 |         dom.select(&Selector::parse(".question-hyperlink").unwrap())
36 |             .next()?
37 |             .value()
38 |             .attr("href")?,
39 |     )
40 |     .ok()?;
41 | 
42 |     let answer_query = Selector::parse("div.answer.accepted-answer").unwrap();
43 | 
44 |     let answer = dom.select(&answer_query).next()?;
45 |     let answer_id = answer.value().attr("data-answerid")?;
46 |     let answer_html = answer
47 |         .select(&Selector::parse("div.answercell > div.js-post-body").unwrap())
48 |         .next()?
49 |         .html()
50 |         .to_string();
51 | 
52 |     let answer_html = ammonia::Builder::default()
53 |         .url_relative(ammonia::UrlRelative::RewriteWithBase(url.clone()))
54 |         .clean(&answer_html)
55 |         .to_string();
56 | 
57 |     let url = format!("{url}#{answer_id}");
58 | 
59 |     Some(html! {
60 |         a href=(url) {
61 |             h2 { (title) }
62 |         }
63 |         div.infobox-stackexchange-answer {
64 |             (PreEscaped(answer_html))
65 |         }
66 |     })
67 | }
68 | 


--------------------------------------------------------------------------------
/src/engines/postsearch/docs_rs.rs:
--------------------------------------------------------------------------------
 1 | use maud::{html, PreEscaped};
 2 | use scraper::{Html, Selector};
 3 | 
 4 | use crate::engines::{HttpResponse, Response, CLIENT};
 5 | 
 6 | pub async fn request(response: &Response) -> Option<wreq::RequestBuilder> {
 7 |     for search_result in response.search_results.iter().take(8) {
 8 |         if search_result.result.url.starts_with("https://docs.rs/") {
 9 |             return Some(CLIENT.get(search_result.result.url.as_str()));
10 |         }
11 |     }
12 | 
13 |     None
14 | }
15 | 
16 | pub fn parse_response(HttpResponse { res, body, .. }: &HttpResponse) -> Option<PreEscaped<String>> {
17 |     let url = res.url().clone();
18 | 
19 |     let dom = Html::parse_document(body);
20 | 
21 |     let version = dom
22 |         .select(&Selector::parse("h2 .version").unwrap())
23 |         .next()?
24 |         .text()
25 |         .collect::<String>();
26 | 
27 |     let page_title = dom
28 |         .select(&Selector::parse("h1").unwrap())
29 |         .next()?
30 |         .text()
31 |         .collect::<String>()
32 |         .trim()
33 |         .to_string();
34 | 
35 |     let doc_query = Selector::parse(".docblock").unwrap();
36 | 
37 |     let doc_html = dom
38 |         .select(&doc_query)
39 |         .next()
40 |         .map(|doc| doc.inner_html())
41 |         .unwrap_or_default();
42 | 
43 |     let item_decl = dom
44 |         .select(&Selector::parse(".item-decl").unwrap())
45 |         .next()
46 |         .map(|el| el.html())
47 |         .unwrap_or_default();
48 | 
49 |     let doc_html = ammonia::Builder::default()
50 |         .link_rel(None)
51 |         .url_relative(ammonia::UrlRelative::RewriteWithBase(url.clone()))
52 |         .clean(&format!("{item_decl}{doc_html}"))
53 |         .to_string();
54 | 
55 |     let (category, title) = page_title.split_once(' ').unwrap_or(("", &page_title));
56 | 
57 |     let title_html = html! {
58 |         h2 {
59 |             (category)
60 |             " "
61 |             a href=(url) { (title) }
62 |             @if category == "Crate" {
63 |                 span.infobox-docs_rs-version { (version) }
64 |             }
65 |         }
66 |     };
67 | 
68 |     Some(html! {
69 |         (title_html)
70 |         div.infobox-docs_rs-doc {
71 |             (PreEscaped(doc_html))
72 |         }
73 |     })
74 | }
75 | 


--------------------------------------------------------------------------------
/src/engines/postsearch/mdn.rs:
--------------------------------------------------------------------------------
 1 | use maud::{html, PreEscaped};
 2 | use scraper::{Html, Selector};
 3 | use serde::Deserialize;
 4 | use tracing::error;
 5 | 
 6 | use crate::engines::{Engine, HttpResponse, Response, CLIENT};
 7 | 
 8 | #[derive(Deserialize)]
 9 | pub struct MdnConfig {
10 |     pub max_sections: usize,
11 | }
12 | 
13 | pub async fn request(response: &Response) -> Option<wreq::RequestBuilder> {
14 |     for search_result in response.search_results.iter().take(8) {
15 |         if search_result
16 |             .result
17 |             .url
18 |             .starts_with("https://developer.mozilla.org/en-US/docs/Web")
19 |         {
20 |             return Some(CLIENT.get(search_result.result.url.as_str()));
21 |         }
22 |     }
23 | 
24 |     None
25 | }
26 | 
27 | pub fn parse_response(
28 |     HttpResponse { res, body, config }: &HttpResponse,
29 | ) -> Option<PreEscaped<String>> {
30 |     let config_toml = config.engines.get(Engine::Mdn).extra.clone();
31 |     let config: MdnConfig = match toml::Value::Table(config_toml).try_into() {
32 |         Ok(args) => args,
33 |         Err(err) => {
34 |             error!("Failed to parse Mdn config: {err}");
35 |             return None;
36 |         }
37 |     };
38 | 
39 |     let url = res.url().clone();
40 | 
41 |     let dom = Html::parse_document(body);
42 | 
43 |     let page_title = dom
44 |         .select(&Selector::parse("header > h1").unwrap())
45 |         .next()?
46 |         .text()
47 |         .collect::<String>()
48 |         .trim()
49 |         .to_string();
50 | 
51 |     let doc_query = Selector::parse(".section-content").unwrap();
52 | 
53 |     let max_sections = if config.max_sections == 0 {
54 |         usize::MAX
55 |     } else {
56 |         config.max_sections
57 |     };
58 | 
59 |     let doc_html = dom
60 |         .select(&doc_query)
61 |         .map(|doc| doc.inner_html())
62 |         .take(max_sections)
63 |         .collect::<Vec<_>>()
64 |         .join("<br>");
65 | 
66 |     let doc_html = ammonia::Builder::default()
67 |         .link_rel(None)
68 |         .url_relative(ammonia::UrlRelative::RewriteWithBase(url.clone()))
69 |         .clean(&doc_html)
70 |         .to_string();
71 | 
72 |     Some(html! {
73 |         h2 {
74 |             a href=(url) { (page_title) }
75 |         }
76 |         div.infobox-mdn-article {
77 |             (PreEscaped(doc_html))
78 |         }
79 |     })
80 | }
81 | 


--------------------------------------------------------------------------------
/flake.nix:
--------------------------------------------------------------------------------
 1 | {
 2 |   description = "a cute metasearch engine";
 3 | 
 4 |   inputs = {
 5 |     nixpkgs.url = "github:NixOS/nixpkgs/nixpkgs-unstable";
 6 | 
 7 |     crane.url = "github:ipetkov/crane";
 8 | 
 9 |     flake-parts = {
10 |       url = "github:hercules-ci/flake-parts";
11 |       inputs.nixpkgs-lib.follows = "nixpkgs";
12 |     };
13 |   };
14 | 
15 |   outputs = inputs @ {
16 |     self,
17 |     crane,
18 |     flake-parts,
19 |     ...
20 |   }:
21 |     flake-parts.lib.mkFlake {inherit inputs;} {
22 |       systems = ["x86_64-linux" "x86_64-darwin" "aarch64-darwin" "aarch64-linux"];
23 |       flake.nixosModules.default = import ./module.nix self;
24 | 
25 |       perSystem = {
26 |         pkgs,
27 |         system,
28 |         ...
29 |       }: let
30 |         craneLib = crane.mkLib pkgs;
31 | 
32 |         assetFilter = path: _type: (pkgs.lib.strings.hasPrefix (toString ./src/web/assets) path);
33 |         sourceFilter = path: type: (craneLib.filterCargoSources path type) || (assetFilter path type);
34 | 
35 |         # Common arguments can be set here to avoid repeating them later
36 |         # Note: changes here will rebuild all dependency crates
37 |         commonArgs = {
38 |           src = pkgs.lib.cleanSourceWith {
39 |             src = ./.;
40 |             filter = sourceFilter;
41 |             name = "source"; # Be reproducible, regardless of the directory name
42 |           };
43 |           strictDeps = true;
44 | 
45 |           buildInputs = [
46 |             # Add additional build inputs here
47 |           ];
48 |         };
49 | 
50 |         metasearch2 = craneLib.buildPackage (commonArgs
51 |           // {
52 |             cargoArtifacts = craneLib.buildDepsOnly commonArgs;
53 | 
54 |             # Additional environment variables or build phases/hooks can be set
55 |             # here *without* rebuilding all dependency crates
56 |             # MY_CUSTOM_VAR = "some value";
57 |           });
58 |       in {
59 |         formatter = pkgs.alejandra;
60 | 
61 |         checks = {
62 |           inherit metasearch2;
63 |         };
64 | 
65 |         packages.default = metasearch2;
66 | 
67 |         apps.default = {
68 |           type = "app";
69 |           program = "${self.packages.${system}.default}/bin/metasearch";
70 |         };
71 | 
72 |         devShells.default = craneLib.devShell {
73 |           checks = self.checks.${system};
74 | 
75 |           # Additional dev-shell environment variables can be set directly
76 |           # MY_CUSTOM_DEVELOPMENT_VAR = "something else";
77 | 
78 |           # Extra inputs can be added here; cargo and rustc are provided by default.
79 |           packages = [
80 |             # pkgs.ripgrep
81 |           ];
82 |         };
83 |       };
84 |     };
85 | }
86 | 


--------------------------------------------------------------------------------
/README:
--------------------------------------------------------------------------------
 1 |                                    ==========
 2 |                                    metasearch
 3 |                                    ==========
 4 | 
 5 |                       https://github.com/mat-1/metasearch2
 6 | 
 7 | ----
 8 | INFO
 9 | ----
10 | 
11 | metasearch (aka metasearch2) is a cute metasearch engine. It sources its results
12 | from Google, Bing, Brave, and several others. It's designed to be as lightweight
13 | as possible, both on the server and client. There is no required client-side
14 | JavaScript.
15 | 
16 | There's a public demo instance at https://s.matdoes.dev, but please do not use
17 | it as your default or rely on it. This is so I don't get ratelimited by Google
18 | or other engines. Also, searches are logged. Run your own instance instead!
19 | 
20 | ------------
21 | INSTALLATION
22 | ------------
23 | 
24 | The easiest way to install metasearch is with `cargo install metasearch`. To get
25 | the unstable version with the latest features, you can install it with
26 | `cargo install --git https://github.com/mat-1/metasearch2`.
27 | 
28 | Usage: `metasearch [config_file]`
29 | 
30 | The config_file argument is optional; if it's not specified then it'll be
31 | checked at the following locations:
32 | 
33 |   - $XDG_CONFIG_HOME/metasearch/config.toml
34 |   - $HOME/.config/metasearch/config.toml
35 |   - ./config.toml
36 | 
37 | If no config file exists, it'll be created at the first valid path in the list.
38 | 
39 | By default, metasearch runs on port 28019. You are encouraged to use a reverse
40 | proxy.
41 | 
42 | -------------
43 | CONFIGURATION
44 | -------------
45 | 
46 | You can see all the default config options at `src/config.rs`. Some interesting
47 | options you may want to change are:
48 | 
49 |   - bind - the host and port that the web server runs on, defaults to
50 |     `0.0.0.0:28019`.
51 |   - api - whether your instance is accessible through a JSON API. See below for
52 |     more details.
53 |   - ui.stylesheet_url - a link to a stylesheet that will be loaded alongside the
54 |     main one, for example `/themes/catppuccin-mocha.css`.
55 |   - image_search.enabled - add a tab for viewing image results for your query.
56 |     this is disabled by default as the image proxy could be used to make GET
57 |     requests to arbitrary URLs from your server.
58 |   - engines.google.weight - the ranking score multiplier for an engine, you can
59 |     modify this if you prefer the results from certain engines.
60 | 
61 | --------
62 | JSON API
63 | --------
64 | 
65 | metasearch has a JSON API that can be enabled by setting `api = true` in your
66 | config. To use it, set the `Accept: application/json` header in your requests.
67 | 
68 | For example:
69 | curl 'http://localhost:28019/search?q=sandcats' -H 'Accept: application/json'
70 | 
71 | The structure of the API is not guaranteed to be stable, as it relies on
72 | serializing internal structs. It may change without warning in the future.
73 | 


--------------------------------------------------------------------------------
/src/web/image_proxy.rs:
--------------------------------------------------------------------------------
 1 | use std::collections::HashMap;
 2 | 
 3 | use axum::{
 4 |     extract::Query,
 5 |     http::StatusCode,
 6 |     response::{IntoResponse, Response},
 7 |     Extension,
 8 | };
 9 | use tracing::error;
10 | use wreq::header;
11 | 
12 | use crate::{config::Config, engines};
13 | 
14 | pub async fn route(
15 |     Query(params): Query<HashMap<String, String>>,
16 |     Extension(config): Extension<Config>,
17 | ) -> Response {
18 |     let image_search_config = &config.image_search;
19 |     let proxy_config = &image_search_config.proxy;
20 |     if !image_search_config.enabled || !proxy_config.enabled {
21 |         return (StatusCode::FORBIDDEN, "Image proxy is disabled").into_response();
22 |     };
23 |     let url = params.get("url").cloned().unwrap_or_default();
24 |     if url.is_empty() {
25 |         return (StatusCode::BAD_REQUEST, "Missing `url` parameter").into_response();
26 |     }
27 | 
28 |     let mut res = match engines::CLIENT
29 |         .get(&url)
30 |         .header("accept", "image/*")
31 |         .send()
32 |         .await
33 |     {
34 |         Ok(res) => res,
35 |         Err(err) => {
36 |             error!("Image proxy error for {url}: {err}");
37 |             return (StatusCode::INTERNAL_SERVER_ERROR, "Image proxy error").into_response();
38 |         }
39 |     };
40 | 
41 |     let max_size = proxy_config.max_download_size;
42 | 
43 |     if res.content_length().unwrap_or_default() > max_size {
44 |         return (StatusCode::PAYLOAD_TOO_LARGE, "Image too large").into_response();
45 |     }
46 | 
47 |     const ALLOWED_IMAGE_TYPES: &[&str] = &["apng", "avif", "gif", "jpeg", "png", "webp"];
48 | 
49 |     // validate content-type
50 |     let content_type = res
51 |         .headers()
52 |         .get(wreq::header::CONTENT_TYPE)
53 |         .and_then(|v| v.to_str().ok())
54 |         .unwrap_or_default()
55 |         .to_string();
56 | 
57 |     let Some((base_type, subtype)) = content_type.split_once("/") else {
58 |         return (StatusCode::UNSUPPORTED_MEDIA_TYPE, "Invalid Content-Type").into_response();
59 |     };
60 |     if base_type != "image" {
61 |         return (StatusCode::UNSUPPORTED_MEDIA_TYPE, "Not an image").into_response();
62 |     }
63 |     if !ALLOWED_IMAGE_TYPES.contains(&subtype) {
64 |         return (StatusCode::UNSUPPORTED_MEDIA_TYPE, "Image type not allowed").into_response();
65 |     }
66 | 
67 |     let mut image_bytes = Vec::new();
68 |     while let Ok(Some(chunk)) = res.chunk().await {
69 |         image_bytes.extend_from_slice(&chunk);
70 |         if image_bytes.len() as u64 > max_size {
71 |             return (StatusCode::PAYLOAD_TOO_LARGE, "Image too large").into_response();
72 |         }
73 |     }
74 | 
75 |     (
76 |         [
77 |             (header::CONTENT_TYPE, content_type),
78 |             (header::CACHE_CONTROL, "public, max-age=31536000".to_owned()),
79 |             (header::X_CONTENT_TYPE_OPTIONS, "nosniff".to_owned()),
80 |             (header::CONTENT_DISPOSITION, "attachment".to_owned()),
81 |         ],
82 |         image_bytes,
83 |     )
84 |         .into_response()
85 | }
86 | 


--------------------------------------------------------------------------------
/src/web/search/all.rs:
--------------------------------------------------------------------------------
 1 | //! Rendering results in the "all" tab.
 2 | 
 3 | use maud::{html, PreEscaped};
 4 | 
 5 | use crate::{
 6 |     config::Config,
 7 |     engines::{self, EngineSearchResult, Infobox, Response},
 8 |     web::search::render_engine_list,
 9 | };
10 | 
11 | pub fn render_results(response: Response) -> PreEscaped<String> {
12 |     let mut html = String::new();
13 |     if let Some(answer) = &response.answer {
14 |         html.push_str(
15 |             &html! {
16 |                 div.answer {
17 |                     (answer.html)
18 |                     (render_engine_list(&[answer.engine], &response.config))
19 |                 }
20 |             }
21 |             .into_string(),
22 |         );
23 |     }
24 |     if let Some(infobox) = &response.infobox {
25 |         html.push_str(
26 |             &html! {
27 |                 div.infobox {
28 |                     (infobox.html)
29 |                     (render_engine_list(&[infobox.engine], &response.config))
30 |                 }
31 |             }
32 |             .into_string(),
33 |         );
34 |     }
35 |     if let Some(featured_snippet) = &response.featured_snippet {
36 |         html.push_str(&render_featured_snippet(featured_snippet, &response.config).into_string());
37 |     }
38 |     for result in &response.search_results {
39 |         html.push_str(&render_search_result(result, &response.config).into_string());
40 |     }
41 | 
42 |     if html.is_empty() {
43 |         html.push_str(
44 |             &html! {
45 |                 p { "No results." }
46 |             }
47 |             .into_string(),
48 |         );
49 |     }
50 | 
51 |     PreEscaped(html)
52 | }
53 | 
54 | fn render_search_result(
55 |     result: &engines::SearchResult<EngineSearchResult>,
56 |     config: &Config,
57 | ) -> PreEscaped<String> {
58 |     html! {
59 |         div.search-result {
60 |             a.search-result-anchor rel="noreferrer" href=(result.result.url) {
61 |                 span.search-result-url { (result.result.url) }
62 |                 h3.search-result-title { (result.result.title) }
63 |             }
64 |             p.search-result-description { (result.result.description) }
65 |             (render_engine_list(&result.engines.iter().copied().collect::<Vec<_>>(), config))
66 |         }
67 |     }
68 | }
69 | 
70 | fn render_featured_snippet(
71 |     featured_snippet: &engines::FeaturedSnippet,
72 |     config: &Config,
73 | ) -> PreEscaped<String> {
74 |     html! {
75 |         div.featured-snippet {
76 |             p.search-result-description { (featured_snippet.description) }
77 |             a.search-result-anchor rel="noreferrer" href=(featured_snippet.url) {
78 |                 span.search-result-url { (featured_snippet.url) }
79 |                 h3.search-result-title { (featured_snippet.title) }
80 |             }
81 |             (render_engine_list(&[featured_snippet.engine], config))
82 |         }
83 |     }
84 | }
85 | 
86 | pub fn render_infobox(infobox: &Infobox, config: &Config) -> PreEscaped<String> {
87 |     html! {
88 |         div.infobox.postsearch-infobox {
89 |             (infobox.html)
90 |             (render_engine_list(&[infobox.engine], config))
91 |         }
92 |     }
93 | }
94 | 


--------------------------------------------------------------------------------
/src/engines/answer/wikipedia.rs:
--------------------------------------------------------------------------------
  1 | use std::collections::HashMap;
  2 | 
  3 | use maud::html;
  4 | use serde::Deserialize;
  5 | use url::Url;
  6 | 
  7 | use crate::engines::{EngineResponse, RequestResponse, CLIENT};
  8 | 
  9 | use super::colorpicker;
 10 | 
 11 | pub async fn request(mut query: &str) -> RequestResponse {
 12 |     if !colorpicker::MatchedColorModel::new(query).is_empty() {
 13 |         // "color picker" is a wikipedia article but we only want to show the
 14 |         // actual color picker answer
 15 |         return RequestResponse::None;
 16 |     }
 17 | 
 18 |     // adding "wikipedia" to the start or end of your query is common when you
 19 |     // want to get a wikipedia article
 20 |     if let Some(stripped_query) = query.strip_suffix(" wikipedia") {
 21 |         query = stripped_query
 22 |     } else if let Some(stripped_query) = query.strip_prefix("wikipedia ") {
 23 |         query = stripped_query
 24 |     }
 25 | 
 26 |     CLIENT
 27 |         .get(
 28 |             Url::parse_with_params(
 29 |                 "https://en.wikipedia.org/w/api.php",
 30 |                 &[
 31 |                     ("format", "json"),
 32 |                     ("action", "query"),
 33 |                     ("prop", "extracts|pageimages"),
 34 |                     ("exintro", ""),
 35 |                     ("explaintext", ""),
 36 |                     ("redirects", "1"),
 37 |                     ("exsentences", "2"),
 38 |                     ("titles", query),
 39 |                 ],
 40 |             )
 41 |             .unwrap(),
 42 |         )
 43 |         .into()
 44 | }
 45 | 
 46 | #[derive(Debug, Deserialize)]
 47 | pub struct WikipediaResponse {
 48 |     pub batchcomplete: String,
 49 |     pub query: WikipediaQuery,
 50 | }
 51 | 
 52 | #[derive(Debug, Deserialize)]
 53 | pub struct WikipediaQuery {
 54 |     pub pages: HashMap<String, WikipediaPage>,
 55 | }
 56 | 
 57 | #[derive(Debug, Deserialize)]
 58 | pub struct WikipediaPage {
 59 |     pub pageid: u64,
 60 |     pub ns: u64,
 61 |     pub title: String,
 62 |     pub extract: String,
 63 |     pub thumbnail: Option<WikipediaThumbnail>,
 64 | }
 65 | 
 66 | #[derive(Debug, Deserialize)]
 67 | pub struct WikipediaThumbnail {
 68 |     pub source: String,
 69 |     pub width: u64,
 70 |     pub height: u64,
 71 | }
 72 | 
 73 | pub fn parse_response(body: &str) -> eyre::Result<EngineResponse> {
 74 |     let Ok(res) = serde_json::from_str::<WikipediaResponse>(body) else {
 75 |         return Ok(EngineResponse::new());
 76 |     };
 77 | 
 78 |     let pages: Vec<(String, WikipediaPage)> = res.query.pages.into_iter().collect();
 79 | 
 80 |     if pages.is_empty() || pages[0].0 == "-1" {
 81 |         return Ok(EngineResponse::new());
 82 |     }
 83 | 
 84 |     let page = &pages[0].1;
 85 |     let WikipediaPage {
 86 |         pageid: _,
 87 |         ns: _,
 88 |         title,
 89 |         extract,
 90 |         thumbnail: _,
 91 |     } = page;
 92 |     if extract.ends_with(':') {
 93 |         return Ok(EngineResponse::new());
 94 |     }
 95 | 
 96 |     let mut previous_extract = String::new();
 97 |     let mut extract = extract.clone();
 98 |     while previous_extract != extract {
 99 |         previous_extract.clone_from(&extract);
100 |         extract = extract
101 |             .replace("( ", "(")
102 |             .replace("(, ", "(")
103 |             .replace("(; ", "(")
104 |             .replace(" ()", "")
105 |             .replace("()", "");
106 |     }
107 | 
108 |     let page_title = title.replace(' ', "_");
109 |     let page_url = format!("https://en.wikipedia.org/wiki/{page_title}");
110 | 
111 |     Ok(EngineResponse::infobox_html(html! {
112 |         a href=(page_url) {
113 |             h2 { (title) }
114 |         }
115 |         p { (extract) }
116 |     }))
117 | }
118 | 


--------------------------------------------------------------------------------
/src/web/settings.rs:
--------------------------------------------------------------------------------
 1 | use axum::{
 2 |     http::{header, HeaderMap, StatusCode},
 3 |     response::{IntoResponse, Response},
 4 |     Extension, Form,
 5 | };
 6 | use axum_extra::extract::{cookie::Cookie, CookieJar};
 7 | use maud::{html, Markup, PreEscaped, DOCTYPE};
 8 | use serde::{Deserialize, Serialize};
 9 | 
10 | use crate::{config::Config, web::head_html};
11 | 
12 | pub async fn get(Extension(config): Extension<Config>) -> impl IntoResponse {
13 |     let theme_option = |value: &str, name: &str| -> Markup {
14 |         let selected = config.ui.stylesheet_url == value;
15 |         html! {
16 |             option value=(value) selected[selected] {
17 |                 { (name) }
18 |             }
19 |         }
20 |     };
21 | 
22 |     let html = html! {
23 |         (PreEscaped("<!-- source code: https://github.com/mat-1/metasearch2 -->\n"))
24 |         (DOCTYPE)
25 |         html lang="en" {
26 |             {(head_html(Some("settings"), &config))}
27 |             body {
28 |                 div.main-container.settings-page {
29 |                     main {
30 |                         a.back-to-index-button href="/" { "Back" }
31 |                         h1 { "Settings" }
32 |                         form.settings-form method="post" {
33 |                             label for="theme" { "Theme" }
34 |                             select name="stylesheet-url" selected=(config.ui.stylesheet_url) {
35 |                                 { (theme_option("", "Ayu Dark")) }
36 |                                 { (theme_option("/themes/catppuccin-mocha.css", "Catppuccin Mocha")) }
37 |                                 { (theme_option("/themes/catppuccin-macchiato.css", "Catppuccin Macchiato")) }
38 |                                 { (theme_option("/themes/catppuccin-latte.css", "Catppuccin Latte")) }
39 |                                 { (theme_option("/themes/nord-bluish.css", "Nord Bluish")) }
40 |                                 { (theme_option("/themes/discord.css", "Discord")) }
41 |                             }
42 | 
43 |                             br;
44 | 
45 |                             // custom css textarea
46 |                             details #custom-css-details {
47 |                                 summary { "Custom CSS" }
48 |                                 textarea #custom-css name="stylesheet-str"  {
49 |                                     { (config.ui.stylesheet_str) }
50 |                                 }
51 |                             }
52 | 
53 |                             input #save-settings-button type="submit" value="Save";
54 |                         }
55 |                     }
56 |                 }
57 |             }
58 |         }
59 |     }
60 |     .into_string();
61 | 
62 |     ([(header::CONTENT_TYPE, "text/html; charset=utf-8")], html)
63 | }
64 | 
65 | #[derive(Serialize, Deserialize)]
66 | #[serde(rename_all = "kebab-case")]
67 | pub struct Settings {
68 |     pub stylesheet_url: String,
69 |     pub stylesheet_str: String,
70 | }
71 | 
72 | pub async fn post(
73 |     headers: HeaderMap,
74 |     mut jar: CookieJar,
75 |     Form(settings): Form<Settings>,
76 | ) -> Response {
77 |     let Some(origin) = headers.get("origin").and_then(|h| h.to_str().ok()) else {
78 |         return (StatusCode::BAD_REQUEST, "Missing or invalid Origin header").into_response();
79 |     };
80 |     let Some(host) = headers.get("host").and_then(|h| h.to_str().ok()) else {
81 |         return (StatusCode::BAD_REQUEST, "Missing or invalid Host header").into_response();
82 |     };
83 |     if origin != format!("http://{host}") && origin != format!("https://{host}") {
84 |         return (StatusCode::BAD_REQUEST, "Origin does not match Host").into_response();
85 |     }
86 | 
87 |     let mut settings_cookie = Cookie::new("settings", serde_json::to_string(&settings).unwrap());
88 |     settings_cookie.make_permanent();
89 |     jar = jar.add(settings_cookie);
90 | 
91 |     (StatusCode::FOUND, [(header::LOCATION, "/settings")], jar).into_response()
92 | }
93 | 


--------------------------------------------------------------------------------
/src/engines/postsearch/github.rs:
--------------------------------------------------------------------------------
  1 | use maud::{html, PreEscaped};
  2 | use scraper::{Html, Selector};
  3 | use url::Url;
  4 | 
  5 | use crate::engines::{answer::regex, Response, CLIENT};
  6 | 
  7 | pub async fn request(response: &Response) -> Option<wreq::RequestBuilder> {
  8 |     for search_result in response.search_results.iter().take(8) {
  9 |         if regex!(r"^https:\/\/github\.com\/[\w-]+\/[\w.-]+$").is_match(&search_result.result.url) {
 10 |             return Some(CLIENT.get(search_result.result.url.as_str()));
 11 |         }
 12 |     }
 13 | 
 14 |     None
 15 | }
 16 | 
 17 | pub fn parse_response(body: &str) -> Option<PreEscaped<String>> {
 18 |     let dom = Html::parse_document(body);
 19 | 
 20 |     let url_relative = dom
 21 |         .select(
 22 |             &Selector::parse("main #repository-container-header strong[itemprop='name'] > a")
 23 |                 .unwrap(),
 24 |         )
 25 |         .next()?
 26 |         .value()
 27 |         .attr("href")?;
 28 |     let url = format!("https://github.com{url_relative}");
 29 | 
 30 |     let embedded_data_script = dom
 31 |         .select(&Selector::parse("script[data-target='react-partial.embeddedData']").unwrap())
 32 |         .next_back()?
 33 |         .inner_html();
 34 |     let embedded_data = serde_json::from_str::<serde_json::Value>(&embedded_data_script).ok()?;
 35 |     let readme_html = embedded_data
 36 |         .get("props")?
 37 |         .get("initialPayload")?
 38 |         .get("overview")?
 39 |         .get("overviewFiles")?
 40 |         .as_array()?
 41 |         .first()?
 42 |         .get("richText")?
 43 |         .as_str()?;
 44 | 
 45 |     let mut readme_html = ammonia::Builder::default()
 46 |         .link_rel(None)
 47 |         .add_allowed_classes("div", &["markdown-alert"])
 48 |         .add_allowed_classes("p", &["markdown-alert-title"])
 49 |         .url_relative(ammonia::UrlRelative::RewriteWithBase(
 50 |             Url::parse("https://github.com").unwrap(),
 51 |         ))
 52 |         .clean(readme_html)
 53 |         .to_string();
 54 | 
 55 |     let readme_dom = Html::parse_fragment(&readme_html);
 56 |     let mut readme_element = readme_dom.root_element();
 57 | 
 58 |     let mut is_readme_element_pre = false;
 59 | 
 60 |     while readme_element.children().count() == 1 {
 61 |         // if the readme is wrapped in <article>, remove that
 62 |         if let Some(article) = readme_element
 63 |             .select(&Selector::parse("article").unwrap())
 64 |             .next()
 65 |         {
 66 |             readme_element = article;
 67 |         }
 68 |         // useless div
 69 |         else if let Some(div) = readme_element
 70 |             .select(&Selector::parse("div").unwrap())
 71 |             .next()
 72 |         {
 73 |             readme_element = div;
 74 |             // useless pre
 75 |         } else if let Some(pre) = readme_element
 76 |             .select(&Selector::parse("pre").unwrap())
 77 |             .next()
 78 |         {
 79 |             readme_element = pre;
 80 |             is_readme_element_pre = true;
 81 |         } else {
 82 |             break;
 83 |         }
 84 |     }
 85 | 
 86 |     readme_html = readme_element.inner_html().to_string();
 87 | 
 88 |     let title = if let Some(title_el) = readme_dom
 89 |         // github wraps their h1s in a <div class="">
 90 |         .select(&Selector::parse("div:has(h1)").unwrap())
 91 |         .next()
 92 |     {
 93 |         // if the readme starts with an h1, remove it
 94 |         let title_html = title_el.html().trim().to_string();
 95 | 
 96 |         if readme_html.starts_with(&title_html) {
 97 |             readme_html = readme_html[title_html.len()..].to_string();
 98 |         }
 99 |         title_el.text().collect::<String>()
100 |     } else {
101 |         dom.select(
102 |             &Selector::parse("main #repository-container-header strong[itemprop='name'] > a")
103 |                 .unwrap(),
104 |         )
105 |         .next()?
106 |         .text()
107 |         .collect::<String>()
108 |     };
109 | 
110 |     Some(html! {
111 |         a href=(url) {
112 |             h1 { (title) }
113 |         }
114 |         @if is_readme_element_pre {
115 |             pre.infobox-github-readme {
116 |                 (PreEscaped(readme_html))
117 |             }
118 |         } @else {
119 |             div.infobox-github-readme {
120 |                 (PreEscaped(readme_html))
121 |             }
122 |         }
123 |     })
124 | }
125 | 


--------------------------------------------------------------------------------
/src/web/mod.rs:
--------------------------------------------------------------------------------
  1 | mod autocomplete;
  2 | mod image_proxy;
  3 | mod index;
  4 | mod opensearch;
  5 | mod search;
  6 | mod settings;
  7 | 
  8 | use std::{convert::Infallible, net::SocketAddr, sync::Arc};
  9 | 
 10 | use axum::{
 11 |     extract::{Request, State},
 12 |     http::{header, StatusCode},
 13 |     middleware::{self, Next},
 14 |     response::Response,
 15 |     routing::{get, post, MethodRouter},
 16 |     Router,
 17 | };
 18 | use axum_extra::extract::CookieJar;
 19 | use maud::{html, Markup, PreEscaped};
 20 | use tracing::info;
 21 | 
 22 | use crate::config::Config;
 23 | 
 24 | macro_rules! register_static_routes {
 25 |     ( $app:ident, $( $x:expr ),* ) => {
 26 |         {
 27 |             $(
 28 |                 let $app = $app.route(
 29 |                     concat!("/", $x),
 30 |                     static_route(
 31 |                         include_str!(concat!("assets/", $x)),
 32 |                         guess_mime_type($x)
 33 |                     ),
 34 |                 );
 35 |             )*
 36 | 
 37 |             $app
 38 |         }
 39 |     };
 40 | }
 41 | 
 42 | pub async fn run(config: Config) {
 43 |     let bind_addr = config.bind;
 44 | 
 45 |     let config = Arc::new(config);
 46 | 
 47 |     fn static_route<S>(
 48 |         content: &'static str,
 49 |         content_type: &'static str,
 50 |     ) -> MethodRouter<S, Infallible>
 51 |     where
 52 |         S: Clone + Send + Sync + 'static,
 53 |     {
 54 |         let response = ([(header::CONTENT_TYPE, content_type)], content);
 55 |         get(|| async { response })
 56 |     }
 57 | 
 58 |     let app = Router::new()
 59 |         .route("/", get(index::get))
 60 |         .route("/search", get(search::get))
 61 |         .route("/settings", get(settings::get))
 62 |         .route("/settings", post(settings::post))
 63 |         .route("/opensearch.xml", get(opensearch::route))
 64 |         .route("/autocomplete", get(autocomplete::route))
 65 |         .route("/image-proxy", get(image_proxy::route))
 66 |         .layer(middleware::from_fn_with_state(
 67 |             config.clone(),
 68 |             config_middleware,
 69 |         ))
 70 |         .with_state(config);
 71 |     let app = register_static_routes![
 72 |         app,
 73 |         "style.css",
 74 |         "script.js",
 75 |         "robots.txt",
 76 |         "scripts/colorpicker.js",
 77 |         "themes/catppuccin-mocha.css",
 78 |         "themes/catppuccin-macchiato.css",
 79 |         "themes/catppuccin-latte.css",
 80 |         "themes/nord-bluish.css",
 81 |         "themes/discord.css"
 82 |     ];
 83 | 
 84 |     info!("Listening on http://{bind_addr}");
 85 | 
 86 |     let listener = tokio::net::TcpListener::bind(bind_addr).await.unwrap();
 87 |     axum::serve(
 88 |         listener,
 89 |         app.into_make_service_with_connect_info::<SocketAddr>(),
 90 |     )
 91 |     .await
 92 |     .unwrap();
 93 | }
 94 | 
 95 | fn guess_mime_type(path: &str) -> &'static str {
 96 |     match path.rsplit('.').next() {
 97 |         Some("css") => "text/css; charset=utf-8",
 98 |         Some("js") => "text/javascript; charset=utf-8",
 99 |         Some("txt") => "text/plain; charset=utf-8",
100 |         _ => "text/plain; charset=utf-8",
101 |     }
102 | }
103 | 
104 | async fn config_middleware(
105 |     State(config): State<Arc<Config>>,
106 |     cookies: CookieJar,
107 |     mut req: Request,
108 |     next: Next,
109 | ) -> Result<Response, StatusCode> {
110 |     let mut config = config.clone().as_ref().clone();
111 | 
112 |     let settings_cookie = cookies.get("settings");
113 |     if let Some(settings_cookie) = settings_cookie {
114 |         if let Ok(settings) = serde_json::from_str::<settings::Settings>(settings_cookie.value()) {
115 |             config.ui.stylesheet_url = settings.stylesheet_url;
116 |             config.ui.stylesheet_str = settings.stylesheet_str;
117 |         }
118 |     }
119 | 
120 |     // modify the state
121 |     req.extensions_mut().insert(config);
122 | 
123 |     Ok(next.run(req).await)
124 | }
125 | 
126 | pub fn head_html(title: Option<&str>, config: &Config) -> Markup {
127 |     html! {
128 |         head {
129 |             meta charset="UTF-8";
130 |             meta name="viewport" content="width=device-width, initial-scale=1.0";
131 |             title {
132 |                 @if let Some(title) = title {
133 |                     { (title) }
134 |                     { " - " }
135 |                 }
136 |                 {(config.ui.site_name)}
137 |             }
138 |             link rel="stylesheet" href="/style.css";
139 |             @if !config.ui.stylesheet_url.is_empty() {
140 |                 link rel="stylesheet" href=(config.ui.stylesheet_url);
141 |             }
142 |             @if !config.ui.stylesheet_str.is_empty() {
143 |                 style { (PreEscaped(html_escape::encode_style(&config.ui.stylesheet_str))) }
144 |             }
145 |             @if !config.ui.favicon_url.is_empty() {
146 |                 link rel="icon" href=(config.ui.favicon_url);
147 |             }
148 |             script src="/script.js" defer {}
149 |             link rel="search" type="application/opensearchdescription+xml" title="metasearch" href="/opensearch.xml";
150 |         }
151 |     }
152 | }
153 | 


--------------------------------------------------------------------------------
/src/engines/answer/dictionary.rs:
--------------------------------------------------------------------------------
  1 | use std::collections::HashMap;
  2 | 
  3 | use eyre::eyre;
  4 | use maud::{html, PreEscaped};
  5 | use serde::Deserialize;
  6 | use url::Url;
  7 | 
  8 | use crate::engines::{EngineResponse, HttpResponse, RequestResponse, CLIENT};
  9 | 
 10 | use super::regex;
 11 | 
 12 | pub async fn request(query: &str) -> RequestResponse {
 13 |     // if the query starts with "define " then use that, otherwise abort
 14 |     let re = regex!(r"^define\s+(\w+)$");
 15 |     let query = match re.captures(query) {
 16 |         Some(caps) => caps.get(1).unwrap().as_str(),
 17 |         None => return RequestResponse::None,
 18 |     }
 19 |     .to_lowercase();
 20 | 
 21 |     CLIENT
 22 |         .get(
 23 |             Url::parse(
 24 |                 format!(
 25 |                     "https://en.wiktionary.org/api/rest_v1/page/definition/{}",
 26 |                     urlencoding::encode(&query)
 27 |                 )
 28 |                 .as_str(),
 29 |             )
 30 |             .unwrap(),
 31 |         )
 32 |         .into()
 33 | }
 34 | 
 35 | #[derive(Debug, Deserialize)]
 36 | pub struct WiktionaryResponse(pub HashMap<String, Vec<WiktionaryEntry>>);
 37 | 
 38 | #[derive(Debug, Deserialize)]
 39 | #[serde(rename_all = "camelCase")]
 40 | pub struct WiktionaryEntry {
 41 |     pub part_of_speech: String,
 42 |     pub language: String,
 43 |     pub definitions: Vec<WiktionaryDefinition>,
 44 | }
 45 | 
 46 | #[derive(Debug, Deserialize)]
 47 | #[serde(rename_all = "camelCase")]
 48 | pub struct WiktionaryDefinition {
 49 |     pub definition: String,
 50 |     #[serde(default)]
 51 |     pub examples: Vec<String>,
 52 | }
 53 | 
 54 | pub fn parse_response(
 55 |     HttpResponse { res, body, .. }: &HttpResponse,
 56 | ) -> eyre::Result<EngineResponse> {
 57 |     let url = res.url();
 58 | 
 59 |     let Ok(res) = serde_json::from_str::<WiktionaryResponse>(body) else {
 60 |         return Ok(EngineResponse::new());
 61 |     };
 62 | 
 63 |     let mediawiki_key = url
 64 |         .path_segments()
 65 |         .ok_or_else(|| eyre!("url has no path segments"))?
 66 |         .next_back()
 67 |         .ok_or_else(|| eyre!("url has no last path segment"))?;
 68 | 
 69 |     let word = key_to_title(mediawiki_key);
 70 | 
 71 |     let Some(entries) = res.0.get("en") else {
 72 |         return Ok(EngineResponse::new());
 73 |     };
 74 | 
 75 |     let mut cleaner = ammonia::Builder::default();
 76 |     cleaner
 77 |         .link_rel(None)
 78 |         .url_relative(ammonia::UrlRelative::RewriteWithBase(
 79 |             Url::parse("https://en.wiktionary.org").unwrap(),
 80 |         ));
 81 | 
 82 |     let mut html = String::new();
 83 | 
 84 |     html.push_str(
 85 |         &html! {
 86 |             h2.answer-dictionary-word {
 87 |                 a href={ "https://en.wiktionary.org/wiki/" (mediawiki_key) } {
 88 |                     (word)
 89 |                 }
 90 |             }
 91 |         }
 92 |         .into_string(),
 93 |     );
 94 | 
 95 |     for entry in entries {
 96 |         html.push_str(
 97 |             &html! {
 98 |                 span.answer-dictionary-part-of-speech {
 99 |                     (entry.part_of_speech.to_lowercase())
100 |                 }
101 |             }
102 |             .into_string(),
103 |         );
104 | 
105 |         html.push_str("<ol>");
106 |         let mut previous_definitions = Vec::<String>::new();
107 |         for definition in &entry.definitions {
108 |             if definition.definition.is_empty() {
109 |                 // wiktionary does this sometimes, for example https://en.wiktionary.org/api/rest_v1/page/definition/variance
110 |                 continue;
111 |             }
112 |             if previous_definitions
113 |                 .iter()
114 |                 .any(|d| d.contains(&definition.definition))
115 |             {
116 |                 // wiktionary will sometimes duplicate definitions, for example https://en.wiktionary.org/api/rest_v1/page/definition/google
117 |                 continue;
118 |             }
119 |             previous_definitions.push(definition.definition.clone());
120 | 
121 |             html.push_str("<li class=\"answer-dictionary-definition\">");
122 |             let definition_html = cleaner
123 |                 .clean(&definition.definition.replace('“', "\""))
124 |                 .to_string();
125 | 
126 |             html.push_str(&html! { p { (PreEscaped(definition_html)) } }.into_string());
127 | 
128 |             if !definition.examples.is_empty() {
129 |                 for example in &definition.examples {
130 |                     let example_html = cleaner.clean(example).to_string();
131 |                     html.push_str(
132 |                         &html! {
133 |                             blockquote.answer-dictionary-example {
134 |                                 (PreEscaped(example_html))
135 |                             }
136 |                         }
137 |                         .into_string(),
138 |                     );
139 |                 }
140 |             }
141 |             html.push_str("</li>");
142 |         }
143 |         html.push_str("</ol>");
144 |     }
145 | 
146 |     Ok(EngineResponse::answer_html(PreEscaped(html)))
147 | }
148 | 
149 | fn key_to_title(key: &str) -> String {
150 |     // https://github.com/wikimedia/mediawiki-title
151 |     // In general, the page title is converted to the mediawiki DB key format by
152 |     // trimming spaces, replacing whitespace symbols to underscores and applying
153 |     // wiki-specific capitalization rules.
154 | 
155 |     let title = key.trim().replace('_', " ");
156 |     let mut c = title.chars();
157 |     match c.next() {
158 |         None => String::new(),
159 |         Some(f) => f.to_uppercase().chain(c).collect(),
160 |     }
161 | }
162 | 


--------------------------------------------------------------------------------
/src/engines/macros.rs:
--------------------------------------------------------------------------------
  1 | #[macro_export]
  2 | macro_rules! engines {
  3 |     ($($engine:ident = $id:expr),* $(,)?) => {
  4 |         #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize)]
  5 |         pub enum Engine {
  6 |             $($engine,)*
  7 |         }
  8 | 
  9 |         impl Engine {
 10 |             #[must_use]
 11 |             pub fn all() -> &'static [Engine] {
 12 |                 &[$(Engine::$engine,)*]
 13 |             }
 14 | 
 15 |             #[must_use]
 16 |             pub fn id(&self) -> &'static str {
 17 |                 match self {
 18 |                     $(Engine::$engine => $id,)*
 19 |                 }
 20 |             }
 21 |         }
 22 | 
 23 |         impl FromStr for Engine {
 24 |             type Err = ();
 25 | 
 26 |             fn from_str(s: &str) -> Result<Self, Self::Err> {
 27 |                 match s {
 28 |                     $($id => Ok(Engine::$engine),)*
 29 |                     _ => Err(()),
 30 |                 }
 31 |             }
 32 |         }
 33 |     };
 34 | }
 35 | 
 36 | #[macro_export]
 37 | macro_rules! engine_parse_response {
 38 |     ($res:ident, $module:ident::$engine_id:ident::None) => {
 39 |         None
 40 |     };
 41 |     ($res:ident, $module:ident::$engine_id:ident::$parse_response:ident) => {
 42 |         Some($module::$engine_id::$parse_response($res.into()))
 43 |     };
 44 | }
 45 | 
 46 | #[macro_export]
 47 | macro_rules! engine_requests {
 48 |     ($($engine:ident => $module:ident::$engine_id:ident::$request:ident, $parse_response:ident),* $(,)?) => {
 49 |         impl Engine {
 50 |             pub async fn request(&self, query: &SearchQuery) -> eyre::Result<RequestResponse> {
 51 |                 #[allow(clippy::useless_conversion)]
 52 |                 match self {
 53 |                     $(
 54 |                         Engine::$engine => $module::$engine_id::$request(query).await.into_request_response_result(),
 55 |                     )*
 56 |                     _ => Ok(RequestResponse::None),
 57 |                 }
 58 |             }
 59 | 
 60 |             #[tracing::instrument(skip(self, res), fields(engine = %self))]
 61 |             pub fn parse_response(&self, res: &HttpResponse) -> eyre::Result<EngineResponse> {
 62 |                 #[allow(clippy::useless_conversion)]
 63 |                 match self {
 64 |                     $(
 65 |                         Engine::$engine => $crate::engine_parse_response! { res, $module::$engine_id::$parse_response }
 66 |                             .ok_or_else(|| eyre::eyre!("engine {self:?} can't parse response"))?,
 67 |                     )*
 68 |                     _ => eyre::bail!("engine {self:?} can't parse response"),
 69 |                 }
 70 |             }
 71 |         }
 72 |     };
 73 | }
 74 | 
 75 | #[macro_export]
 76 | macro_rules! engine_autocomplete_requests {
 77 |     ($($engine:ident => $module:ident::$engine_id:ident::$request:ident, $parse_response:ident),* $(,)?) => {
 78 |         impl Engine {
 79 |             #[must_use]
 80 |             pub fn request_autocomplete(&self, query: &str) -> Option<RequestAutocompleteResponse> {
 81 |                 match self {
 82 |                     $(
 83 |                         Engine::$engine => Some($module::$engine_id::$request(query).into()),
 84 |                     )*
 85 |                     _ => None,
 86 |                 }
 87 |             }
 88 | 
 89 |             pub fn parse_autocomplete_response(&self, body: &str) -> eyre::Result<Vec<String>> {
 90 |                 match self {
 91 |                     $(
 92 |                         Engine::$engine => $crate::engine_parse_response! { body, $module::$engine_id::$parse_response }
 93 |                             .ok_or_else(|| eyre::eyre!("engine {self:?} can't parse autocomplete response"))?,
 94 |                     )*
 95 |                     _ => eyre::bail!("engine {self:?} can't parse autocomplete response"),
 96 |                 }
 97 |             }
 98 |         }
 99 |     };
100 | }
101 | 
102 | #[macro_export]
103 | macro_rules! engine_postsearch_requests {
104 |     ($($engine:ident => $module:ident::$engine_id:ident::$request:ident, $parse_response:ident),* $(,)?) => {
105 |         impl Engine {
106 |             #[must_use]
107 |             pub async fn postsearch_request(&self, response: &Response) -> Option<wreq::RequestBuilder> {
108 |                 match self {
109 |                     $(
110 |                         Engine::$engine => $module::$engine_id::$request(response).await,
111 |                     )*
112 |                     _ => None,
113 |                 }
114 |             }
115 | 
116 |             #[must_use]
117 |             pub fn postsearch_parse_response(&self, res: &HttpResponse) -> Option<maud::PreEscaped<String>> {
118 |                 match self {
119 |                     $(
120 |                         Engine::$engine => $crate::engine_parse_response! { res, $module::$engine_id::$parse_response }?,
121 |                     )*
122 |                     _ => None,
123 |                 }
124 |             }
125 |         }
126 |     };
127 | }
128 | 
129 | #[macro_export]
130 | macro_rules! engine_image_requests {
131 |     ($($engine:ident => $module:ident::$engine_id:ident::$request:ident, $parse_response:ident),* $(,)?) => {
132 |         impl Engine {
133 |             #[must_use]
134 |             pub fn request_images(&self, query: &SearchQuery) -> RequestResponse {
135 |                 match self {
136 |                     $(
137 |                         Engine::$engine => $module::$engine_id::$request(query).into(),
138 |                     )*
139 |                     _ => RequestResponse::None,
140 |                 }
141 |             }
142 | 
143 |             pub fn parse_images_response(&self, res: &HttpResponse) -> eyre::Result<EngineImagesResponse> {
144 |                 #[allow(clippy::useless_conversion)]
145 |                 match self {
146 |                     $(
147 |                         Engine::$engine => $crate::engine_parse_response! { res, $module::$engine_id::$parse_response }
148 |                             .ok_or_else(|| eyre::eyre!("engine {self:?} can't parse images response"))?,
149 |                     )*
150 |                     _ => eyre::bail!("engine {self:?} can't parse response"),
151 |                 }
152 |             }
153 |         }
154 |     };
155 | }
156 | 


--------------------------------------------------------------------------------
/src/engines/answer/timezone.rs:
--------------------------------------------------------------------------------
  1 | use chrono::{DateTime, TimeZone};
  2 | use chrono_tz::{OffsetComponents, Tz};
  3 | use maud::html;
  4 | 
  5 | use crate::engines::EngineResponse;
  6 | 
  7 | use super::regex;
  8 | 
  9 | pub async fn request(query: &str) -> EngineResponse {
 10 |     match evaluate(query) {
 11 |         None => EngineResponse::new(),
 12 |         Some(TimeResponse::Current { time, timezone }) => EngineResponse::answer_html(html! {
 13 |             p.answer-query { "Current time in " (timezone_to_string(timezone)) }
 14 |             h3 {
 15 |                 b { (time.format("%-I:%M %P")) }
 16 |                 span.answer-comment {
 17 |                     " (" (time.format("%B %-d")) ")"
 18 |                 }
 19 |             }
 20 |         }),
 21 |         Some(TimeResponse::Conversion {
 22 |             source_timezone,
 23 |             target_timezone,
 24 |             source_time,
 25 |             target_time,
 26 |             source_offset,
 27 |             target_offset,
 28 |         }) => {
 29 |             let delta_minutes = (target_offset - source_offset).num_minutes();
 30 |             let delta = if delta_minutes % 60 == 0 {
 31 |                 format!("{:+}", delta_minutes / 60)
 32 |             } else {
 33 |                 format!("{:+}:{}", delta_minutes / 60, delta_minutes % 60)
 34 |             };
 35 | 
 36 |             EngineResponse::answer_html(html! {
 37 |                 p.answer-query {
 38 |                     (source_time.format("%-I:%M %P"))
 39 |                     " "
 40 |                     (timezone_to_string(source_timezone))
 41 |                     " to "
 42 |                     (timezone_to_string(target_timezone))
 43 |                 }
 44 |                 h3 {
 45 |                     b { (target_time.format("%-I:%M %P")) }
 46 |                     " "
 47 |                     span.answer-comment {
 48 |                         (timezone_to_string(target_timezone)) " (" (delta) ")"
 49 |                     }
 50 |                 }
 51 |             })
 52 |         }
 53 |     }
 54 | }
 55 | 
 56 | #[derive(Debug)]
 57 | enum TimeResponse {
 58 |     Current {
 59 |         time: DateTime<Tz>,
 60 |         timezone: Tz,
 61 |     },
 62 |     Conversion {
 63 |         source_timezone: Tz,
 64 |         target_timezone: Tz,
 65 |         source_time: DateTime<Tz>,
 66 |         target_time: DateTime<Tz>,
 67 |         source_offset: chrono::Duration,
 68 |         target_offset: chrono::Duration,
 69 |     },
 70 | }
 71 | 
 72 | fn evaluate(query: &str) -> Option<TimeResponse> {
 73 |     // "4pm utc to cst"
 74 |     let re = regex!(r"(\d{1,2})(?:(\d{1,2}))?\s*(am|pm|) ([\w/+\-]+) (to|as|in) ([\w/+\-]+)");
 75 |     if let Some(captures) = re.captures(query) {
 76 |         if let Some(hour) = captures.get(1).map(|m| m.as_str().parse::<u32>().unwrap()) {
 77 |             let minute = match captures.get(2) {
 78 |                 Some(m) => m.as_str().parse::<u32>().ok()?,
 79 |                 None => 0,
 80 |             };
 81 |             let ampm = captures.get(3).unwrap().as_str();
 82 |             let timezone1_name = captures.get(4).unwrap().as_str();
 83 |             let timezone2_name = captures.get(6).unwrap().as_str();
 84 | 
 85 |             let source_timezone = parse_timezone(timezone1_name)?;
 86 |             let target_timezone = parse_timezone(timezone2_name)?;
 87 | 
 88 |             let current_date = chrono::Utc::now().date_naive();
 89 | 
 90 |             let source_offset = source_timezone.offset_from_utc_date(&current_date);
 91 |             let target_offset = target_timezone.offset_from_utc_date(&current_date);
 92 | 
 93 |             let source_time_naive = current_date.and_hms_opt(
 94 |                 if ampm == "pm" && hour != 12 {
 95 |                     hour + 12
 96 |                 } else if ampm == "am" && hour == 12 {
 97 |                     0
 98 |                 } else {
 99 |                     hour
100 |                 },
101 |                 minute,
102 |                 0,
103 |             )?;
104 |             let source_time_utc = chrono::Utc
105 |                 .from_local_datetime(&source_time_naive)
106 |                 .latest()?
107 |                 - (source_offset.base_utc_offset() + source_offset.dst_offset());
108 | 
109 |             let source_time = source_time_utc.with_timezone(&source_timezone);
110 |             let target_time = source_time_utc.with_timezone(&target_timezone);
111 | 
112 |             return Some(TimeResponse::Conversion {
113 |                 source_timezone,
114 |                 target_timezone,
115 |                 source_time,
116 |                 target_time,
117 |                 source_offset: source_offset.base_utc_offset(),
118 |                 target_offset: target_offset.base_utc_offset(),
119 |             });
120 |         }
121 |     }
122 | 
123 |     // "utc time"
124 |     let re = regex!(r"([\w/+\-]+)(?: current)? time$");
125 |     // "time in utc"
126 |     let re2 = regex!(r"time (?:in|as) ([\w/+\-]+)$");
127 |     if let Some(timezone_name) = re
128 |         .captures(query)
129 |         .and_then(|m| m.get(1))
130 |         .or_else(|| re2.captures(query).and_then(|m| m.get(1)))
131 |     {
132 |         if let Some(timezone) = parse_timezone(timezone_name.as_str()) {
133 |             let time = chrono::Utc::now().with_timezone(&timezone);
134 |             return Some(TimeResponse::Current { time, timezone });
135 |         }
136 |     }
137 | 
138 |     None
139 | }
140 | 
141 | fn parse_timezone(timezone_name: &str) -> Option<Tz> {
142 |     match timezone_name.to_lowercase().as_str() {
143 |         "cst" | "cdt" => Some(Tz::CST6CDT),
144 |         "est" | "edt" => Some(Tz::EST5EDT),
145 |         _ => Tz::from_str_insensitive(timezone_name)
146 |             .ok()
147 |             .or_else(|| Tz::from_str_insensitive(&format!("etc/{timezone_name}")).ok()),
148 |     }
149 | }
150 | 
151 | fn timezone_to_string(tz: Tz) -> String {
152 |     match tz {
153 |         Tz::CST6CDT => "CST".to_string(),
154 |         Tz::EST5EDT => "EST".to_string(),
155 |         _ => {
156 |             let tz_string = tz.name();
157 |             if let Some(tz_string) = tz_string.strip_prefix("Etc/") {
158 |                 tz_string.to_string()
159 |             } else {
160 |                 tz_string.to_string()
161 |             }
162 |         }
163 |     }
164 | }
165 | 
166 | #[cfg(test)]
167 | mod tests {
168 |     use super::*;
169 | 
170 |     #[test]
171 |     fn test_evaluate() {
172 |         let response = evaluate("9 pm est to CST").unwrap();
173 |         let TimeResponse::Conversion {
174 |             source_time,
175 |             target_time,
176 |             ..
177 |         } = response
178 |         else {
179 |             panic!("Expected TimeResponse::Conversion, got {response:?}");
180 |         };
181 | 
182 |         // we don't check the exact offsets since it depends on daylight savings, cst
183 |         // will always be 1 hour behind est though
184 | 
185 |         assert_eq!(source_time.format("%-I:%M %P").to_string(), "9:00 pm");
186 |         assert_eq!(target_time.format("%-I:%M %P").to_string(), "8:00 pm");
187 |     }
188 | }
189 | 


--------------------------------------------------------------------------------
/src/engines/search/bing.rs:
--------------------------------------------------------------------------------
  1 | use base64::Engine;
  2 | use eyre::eyre;
  3 | use rand::Rng;
  4 | use scraper::{ElementRef, Html, Selector};
  5 | use tracing::warn;
  6 | use url::Url;
  7 | 
  8 | use crate::{
  9 |     engines::{EngineImageResult, EngineImagesResponse, EngineResponse, CLIENT},
 10 |     parse::{parse_html_response_with_opts, ParseOpts, QueryMethod},
 11 | };
 12 | 
 13 | pub async fn request(query: &str) -> wreq::RequestBuilder {
 14 |     let cvid = generate_cvid();
 15 |     let url = Url::parse_with_params(
 16 |         "https://www.bing.com/search",
 17 |         &[
 18 |             ("q", query),
 19 |             ("pq", query),
 20 |             ("cvid", &cvid),
 21 |             ("filters", "rcrse:\"1\""), // filters=rcrse:"1" makes it not try to autocorrect
 22 |             ("FORM", "PERE"),
 23 |             ("ghc", "1"),
 24 |             ("lq", "0"),
 25 |             ("qs", "n"),
 26 |             ("sk", ""),
 27 |             ("sp", "-1"),
 28 |         ],
 29 |     )
 30 |     .unwrap();
 31 |     CLIENT
 32 |         .get(url)
 33 |         .header("Cookie", &format!("SRCHHPGUSR=IG={}", cvid))
 34 | }
 35 | 
 36 | fn generate_cvid() -> String {
 37 |     let mut bytes = [0u8; 16];
 38 |     rand::rng().fill(&mut bytes);
 39 |     bytes.iter().map(|b| format!("{:02X}", b)).collect()
 40 | }
 41 | 
 42 | pub fn parse_response(body: &str) -> eyre::Result<EngineResponse> {
 43 |     parse_html_response_with_opts(
 44 |         body,
 45 |         ParseOpts::new()
 46 |             .result("#b_results > li.b_algo")
 47 |             .title(".b_algo h2 > a")
 48 |             .href(QueryMethod::Manual(Box::new(|el: &ElementRef| {
 49 |                 let url = el
 50 |                     .select(&Selector::parse("a[href]").unwrap())
 51 |                     .next()
 52 |                     .and_then(|n| n.value().attr("href"))
 53 |                     .unwrap_or_default();
 54 |                 clean_url(url)
 55 |             })))
 56 |             .description(QueryMethod::Manual(Box::new(|el: &ElementRef| {
 57 |                 let mut description = String::new();
 58 |                 for inner_node in el
 59 |                     .select(
 60 |                         &Selector::parse(".b_caption > p, p.b_algoSlug, .b_caption .ipText")
 61 |                             .unwrap(),
 62 |                     )
 63 |                     .next()
 64 |                     .map(|n| n.children().collect::<Vec<_>>())
 65 |                     .unwrap_or_default()
 66 |                 {
 67 |                     match inner_node.value() {
 68 |                         scraper::Node::Text(t) => {
 69 |                             description.push_str(&t.text);
 70 |                         }
 71 |                         scraper::Node::Element(inner_el) => {
 72 |                             if !inner_el
 73 |                                 .has_class("algoSlug_icon", scraper::CaseSensitivity::CaseSensitive)
 74 |                             {
 75 |                                 let element_ref = ElementRef::wrap(inner_node).unwrap();
 76 |                                 description.push_str(&element_ref.text().collect::<String>());
 77 |                             }
 78 |                         }
 79 |                         _ => {}
 80 |                     }
 81 |                 }
 82 | 
 83 |                 Ok(description)
 84 |             }))),
 85 |     )
 86 | }
 87 | 
 88 | pub fn request_images(query: &str) -> wreq::RequestBuilder {
 89 |     CLIENT.get(
 90 |         Url::parse_with_params(
 91 |             "https://www.bing.com/images/async",
 92 |             &[
 93 |                 ("q", query),
 94 |                 ("async", "content"),
 95 |                 ("first", "1"),
 96 |                 ("count", "35"),
 97 |             ],
 98 |         )
 99 |         .unwrap(),
100 |     )
101 | }
102 | 
103 | #[tracing::instrument(skip(body))]
104 | pub fn parse_images_response(body: &str) -> eyre::Result<EngineImagesResponse> {
105 |     let dom = Html::parse_document(body);
106 | 
107 |     let mut image_results = Vec::new();
108 | 
109 |     let image_container_el_sel = Selector::parse(".imgpt").unwrap();
110 |     let image_el_sel = Selector::parse(".iusc").unwrap();
111 |     for image_container_el in dom.select(&image_container_el_sel) {
112 |         let image_el = image_container_el
113 |             .select(&image_el_sel)
114 |             .next()
115 |             .ok_or_else(|| eyre!("no image element found"))?;
116 | 
117 |         // parse the "m" attribute as json
118 |         let Some(data) = image_el.value().attr("m") else {
119 |             // this is normal, i think
120 |             continue;
121 |         };
122 |         let data = serde_json::from_str::<serde_json::Value>(data)?;
123 |         let page_url = data
124 |             .get("purl")
125 |             .and_then(|v| v.as_str())
126 |             .unwrap_or_default();
127 |         let image_url = data
128 |             // short for media url, probably
129 |             .get("murl")
130 |             .and_then(|v| v.as_str())
131 |             .unwrap_or_default();
132 |         let page_title = data
133 |             .get("t")
134 |             .and_then(|v| v.as_str())
135 |             .unwrap_or_default()
136 |             // bing adds these unicode characters around matches
137 |             .replace(['', ''], "");
138 | 
139 |         // the text looks like "1200 x 1600 · jpegWikipedia"
140 |         // (the last part is incorrectly parsed since the actual text is inside another
141 |         // element but this is already good enough for our purposes)
142 |         let text = image_container_el.text().collect::<String>();
143 |         let width_height: Vec<u64> = text
144 |             .split(" · ")
145 |             .next()
146 |             .unwrap_or_default()
147 |             .split(" x ")
148 |             .map(|s| s.parse().unwrap_or_default())
149 |             .collect();
150 |         let (width, height) = match width_height.as_slice() {
151 |             [width, height] => (*width, *height),
152 |             _ => {
153 |                 warn!("couldn't get width and height from text \"{text}\"");
154 |                 continue;
155 |             }
156 |         };
157 | 
158 |         image_results.push(EngineImageResult {
159 |             page_url: page_url.to_string(),
160 |             image_url: image_url.to_string(),
161 |             title: page_title.to_string(),
162 |             width,
163 |             height,
164 |         });
165 |     }
166 | 
167 |     Ok(EngineImagesResponse { image_results })
168 | }
169 | 
170 | fn clean_url(url: &str) -> eyre::Result<String> {
171 |     // clean up bing's tracking urls
172 |     if url.starts_with("https://www.bing.com/ck/a?") {
173 |         // get the u param
174 |         let url = Url::parse(url)?;
175 |         let u = url
176 |             .query_pairs()
177 |             .find(|(key, _)| key == "u")
178 |             .unwrap_or_default()
179 |             .1;
180 |         // cut off the "a1" and base64 decode
181 |         let u = base64::engine::general_purpose::URL_SAFE_NO_PAD
182 |             .decode(&u[2..])
183 |             .unwrap_or_default();
184 |         // convert to utf8
185 |         Ok(String::from_utf8_lossy(&u).to_string())
186 |     } else {
187 |         Ok(url.to_string())
188 |     }
189 | }
190 | 


--------------------------------------------------------------------------------
/src/parse.rs:
--------------------------------------------------------------------------------
  1 | //! Helper functions for parsing search engine responses.
  2 | 
  3 | use crate::{
  4 |     engines::{EngineFeaturedSnippet, EngineResponse, EngineSearchResult},
  5 |     urls::normalize_url,
  6 | };
  7 | 
  8 | use scraper::{Html, Selector};
  9 | use tracing::trace;
 10 | 
 11 | #[derive(Default)]
 12 | pub struct ParseOpts {
 13 |     result: &'static str,
 14 |     title: QueryMethod,
 15 |     href: QueryMethod,
 16 |     description: QueryMethod,
 17 | 
 18 |     featured_snippet: &'static str,
 19 |     featured_snippet_title: QueryMethod,
 20 |     featured_snippet_href: QueryMethod,
 21 |     featured_snippet_description: QueryMethod,
 22 | }
 23 | 
 24 | impl ParseOpts {
 25 |     #[must_use]
 26 |     pub fn new() -> Self {
 27 |         Self::default()
 28 |     }
 29 | 
 30 |     #[must_use]
 31 |     pub fn result(mut self, result: &'static str) -> Self {
 32 |         self.result = result;
 33 |         self
 34 |     }
 35 | 
 36 |     #[must_use]
 37 |     pub fn title(mut self, title: impl Into<QueryMethod>) -> Self {
 38 |         self.title = title.into();
 39 |         self
 40 |     }
 41 | 
 42 |     #[must_use]
 43 |     pub fn href(mut self, href: impl Into<QueryMethod>) -> Self {
 44 |         self.href = href.into();
 45 |         self
 46 |     }
 47 | 
 48 |     #[must_use]
 49 |     pub fn description(mut self, description: impl Into<QueryMethod>) -> Self {
 50 |         self.description = description.into();
 51 |         self
 52 |     }
 53 | 
 54 |     #[must_use]
 55 |     pub fn featured_snippet(mut self, featured_snippet: &'static str) -> Self {
 56 |         self.featured_snippet = featured_snippet;
 57 |         self
 58 |     }
 59 | 
 60 |     #[must_use]
 61 |     pub fn featured_snippet_title(
 62 |         mut self,
 63 |         featured_snippet_title: impl Into<QueryMethod>,
 64 |     ) -> Self {
 65 |         self.featured_snippet_title = featured_snippet_title.into();
 66 |         self
 67 |     }
 68 | 
 69 |     #[must_use]
 70 |     pub fn featured_snippet_href(mut self, featured_snippet_href: impl Into<QueryMethod>) -> Self {
 71 |         self.featured_snippet_href = featured_snippet_href.into();
 72 |         self
 73 |     }
 74 | 
 75 |     #[must_use]
 76 |     pub fn featured_snippet_description(
 77 |         mut self,
 78 |         featured_snippet_description: impl Into<QueryMethod>,
 79 |     ) -> Self {
 80 |         self.featured_snippet_description = featured_snippet_description.into();
 81 |         self
 82 |     }
 83 | }
 84 | 
 85 | type ManualQueryMethod = Box<dyn Fn(&scraper::ElementRef) -> eyre::Result<String>>;
 86 | 
 87 | #[derive(Default)]
 88 | pub enum QueryMethod {
 89 |     #[default]
 90 |     None,
 91 |     CssSelector(&'static str),
 92 |     Manual(ManualQueryMethod),
 93 | }
 94 | 
 95 | impl From<&'static str> for QueryMethod {
 96 |     fn from(s: &'static str) -> Self {
 97 |         QueryMethod::CssSelector(s)
 98 |     }
 99 | }
100 | 
101 | impl QueryMethod {
102 |     pub fn call_with_css_selector_override(
103 |         &self,
104 |         el: &scraper::ElementRef,
105 |         with_css_selector: impl Fn(&scraper::ElementRef, &'static str) -> Option<String>,
106 |     ) -> eyre::Result<String> {
107 |         match self {
108 |             QueryMethod::None => Ok(String::new()),
109 |             QueryMethod::CssSelector(s) => Ok(with_css_selector(el, s).unwrap_or_default()),
110 |             QueryMethod::Manual(f) => f(el),
111 |         }
112 |     }
113 | 
114 |     pub fn call(&self, el: &scraper::ElementRef) -> eyre::Result<String> {
115 |         self.call_with_css_selector_override(el, |el, s| {
116 |             el.select(&Selector::parse(s).unwrap())
117 |                 .next()
118 |                 .map(|n| n.text().collect::<String>())
119 |         })
120 |     }
121 | }
122 | 
123 | pub(super) fn parse_html_response_with_opts(
124 |     body: &str,
125 |     opts: ParseOpts,
126 | ) -> eyre::Result<EngineResponse> {
127 |     let dom = Html::parse_document(body);
128 | 
129 |     let mut search_results = Vec::new();
130 | 
131 |     let ParseOpts {
132 |         result: result_item_query,
133 |         title: title_query_method,
134 |         href: href_query_method,
135 |         description: description_query_method,
136 |         featured_snippet: featured_snippet_query,
137 |         featured_snippet_title: featured_snippet_title_query_method,
138 |         featured_snippet_href: featured_snippet_href_query_method,
139 |         featured_snippet_description: featured_snippet_description_query_method,
140 |     } = opts;
141 | 
142 |     let result_item_query = Selector::parse(result_item_query).unwrap();
143 | 
144 |     let results = dom.select(&result_item_query);
145 | 
146 |     for result in results {
147 |         let title = title_query_method.call(&result)?;
148 |         let url = href_query_method.call_with_css_selector_override(&result, |el, s| {
149 |             el.select(&Selector::parse(s).unwrap()).next().map(|n| {
150 |                 n.value()
151 |                     .attr("href")
152 |                     .map_or_else(|| n.text().collect::<String>(), str::to_string)
153 |             })
154 |         })?;
155 |         let description = description_query_method.call(&result)?;
156 |         trace!("url: {url}, title: {title}, description: {description}");
157 |         trace!("result: {:?}", result.value().classes().collect::<Vec<_>>());
158 | 
159 |         // this can happen on google if you search "roll d6"
160 |         let is_empty = description.is_empty() && title.is_empty();
161 |         if is_empty {
162 |             trace!("empty content for {url} ({title}), skipping");
163 |             continue;
164 |         }
165 | 
166 |         // this can happen on google if it gives you a featured snippet
167 |         if description.is_empty() {
168 |             trace!("empty description for {url} ({title}), skipping");
169 |             continue;
170 |         }
171 | 
172 |         let url = normalize_url(&url);
173 | 
174 |         search_results.push(EngineSearchResult {
175 |             url,
176 |             title,
177 |             description,
178 |         });
179 |     }
180 | 
181 |     let featured_snippet = if featured_snippet_query.is_empty() {
182 |         None
183 |     } else if let Some(featured_snippet) = dom
184 |         .select(&Selector::parse(featured_snippet_query).unwrap())
185 |         .next()
186 |     {
187 |         let title = featured_snippet_title_query_method.call(&featured_snippet)?;
188 |         let url = featured_snippet_href_query_method.call(&featured_snippet)?;
189 |         let url = normalize_url(&url);
190 |         let description = featured_snippet_description_query_method.call(&featured_snippet)?;
191 | 
192 |         // this can happen on google if you search "what's my user agent"
193 |         let is_empty = description.is_empty() && title.is_empty();
194 |         if is_empty {
195 |             None
196 |         } else {
197 |             Some(EngineFeaturedSnippet {
198 |                 url,
199 |                 title,
200 |                 description,
201 |             })
202 |         }
203 |     } else {
204 |         None
205 |     };
206 | 
207 |     Ok(EngineResponse {
208 |         search_results,
209 |         featured_snippet,
210 |         // these fields are used by instant answers, not normal search engines
211 |         answer_html: None,
212 |         infobox_html: None,
213 |     })
214 | }
215 | 


--------------------------------------------------------------------------------
/src/engines/answer/fend.rs:
--------------------------------------------------------------------------------
  1 | use fend_core::SpanKind;
  2 | use maud::{html, PreEscaped};
  3 | use std::sync::{atomic::AtomicU32, atomic::Ordering, LazyLock};
  4 | 
  5 | use crate::engines::EngineResponse;
  6 | 
  7 | use super::regex;
  8 | 
  9 | pub async fn request(query: &str) -> EngineResponse {
 10 |     let query = clean_query(query);
 11 | 
 12 |     let Some(result_html) = evaluate_to_html(&query, true) else {
 13 |         return EngineResponse::new();
 14 |     };
 15 | 
 16 |     EngineResponse::answer_html(html! {
 17 |         p.answer-query { (query) " =" }
 18 |         h3 { b { (result_html) } }
 19 |     })
 20 | }
 21 | 
 22 | pub fn request_autocomplete(query: &str) -> Vec<String> {
 23 |     let mut results = Vec::new();
 24 | 
 25 |     let query = clean_query(query);
 26 | 
 27 |     if let Some(result) = evaluate_to_plaintext(&query, false) {
 28 |         results.push(format!("= {result}"));
 29 |     }
 30 | 
 31 |     results
 32 | }
 33 | 
 34 | fn clean_query(query: &str) -> String {
 35 |     query.strip_suffix('=').unwrap_or(query).trim().to_string()
 36 | }
 37 | 
 38 | #[derive(Debug)]
 39 | pub struct Span {
 40 |     pub text: String,
 41 |     pub kind: SpanKind,
 42 | }
 43 | 
 44 | fn evaluate_to_plaintext(query: &str, html: bool) -> Option<String> {
 45 |     let spans = evaluate_into_spans(query, html);
 46 |     if spans.is_empty() {
 47 |         return None;
 48 |     }
 49 | 
 50 |     Some(
 51 |         spans
 52 |             .iter()
 53 |             .map(|span| span.text.clone())
 54 |             .collect::<String>(),
 55 |     )
 56 | }
 57 | 
 58 | fn evaluate_to_html(query: &str, html: bool) -> Option<PreEscaped<String>> {
 59 |     let spans = evaluate_into_spans(query, html);
 60 |     if spans.is_empty() {
 61 |         return None;
 62 |     }
 63 | 
 64 |     let mut result_html = String::new();
 65 |     for span in &spans {
 66 |         let class = match span.kind {
 67 |             fend_core::SpanKind::Number
 68 |             | fend_core::SpanKind::Boolean
 69 |             | fend_core::SpanKind::Date => "answer-calc-constant",
 70 |             fend_core::SpanKind::String => "answer-calc-string",
 71 |             _ => "",
 72 |         };
 73 |         if class.is_empty() {
 74 |             result_html.push_str(&html! { (span.text) }.into_string());
 75 |         } else {
 76 |             result_html.push_str(
 77 |                 &html! {
 78 |                     span.(class) {
 79 |                         (span.text)
 80 |                     }
 81 |                 }
 82 |                 .into_string(),
 83 |             );
 84 |         }
 85 |     }
 86 | 
 87 |     // if the result was a single hex number then we add the decimal equivalent
 88 |     // below
 89 |     if spans.len() == 1
 90 |         && spans[0].kind == fend_core::SpanKind::Number
 91 |         && spans[0].text.starts_with("0x")
 92 |     {
 93 |         let hex = spans[0].text.trim_start_matches("0x");
 94 |         if let Ok(num) = u64::from_str_radix(hex, 16) {
 95 |             result_html.push_str(
 96 |                 &html! {
 97 |                     span.answer-comment { " = " (num) }
 98 |                 }
 99 |                 .into_string(),
100 |             );
101 |         }
102 |     }
103 | 
104 |     Some(PreEscaped(result_html))
105 | }
106 | 
107 | pub static FEND_CTX: LazyLock<fend_core::Context> = LazyLock::new(|| {
108 |     let mut context = fend_core::Context::new();
109 | 
110 |     // make lowercase f and c work
111 |     context.define_custom_unit_v1("f", "f", "°F", &fend_core::CustomUnitAttribute::Alias);
112 |     context.define_custom_unit_v1("c", "c", "°C", &fend_core::CustomUnitAttribute::Alias);
113 | 
114 |     context.define_custom_unit_v1(
115 |         "mb",
116 |         "mbs",
117 |         "megabyte",
118 |         &fend_core::CustomUnitAttribute::Alias,
119 |     );
120 |     context.define_custom_unit_v1(
121 |         "gb",
122 |         "gbs",
123 |         "gigabyte",
124 |         &fend_core::CustomUnitAttribute::Alias,
125 |     );
126 |     context.define_custom_unit_v1(
127 |         "tb",
128 |         "tbs",
129 |         "terabyte",
130 |         &fend_core::CustomUnitAttribute::Alias,
131 |     );
132 |     context.define_custom_unit_v1(
133 |         "pb",
134 |         "pbs",
135 |         "petabyte",
136 |         &fend_core::CustomUnitAttribute::Alias,
137 |     );
138 | 
139 |     // make random work
140 |     context.set_random_u32_fn(rand::random::<u32>);
141 | 
142 |     fend_core::evaluate("ord=(x: x to codepoint)", &mut context).unwrap();
143 |     fend_core::evaluate("chr=(x: x to character)", &mut context).unwrap();
144 | 
145 |     context
146 | });
147 | 
148 | struct Interrupter {
149 |     invocations_left: AtomicU32,
150 | }
151 | 
152 | impl fend_core::Interrupt for Interrupter {
153 |     fn should_interrupt(&self) -> bool {
154 |         let v = self.invocations_left.load(Ordering::Relaxed);
155 | 
156 |         if v == 0 {
157 |             return true;
158 |         }
159 | 
160 |         self.invocations_left.store(v - 1, Ordering::Relaxed);
161 |         false
162 |     }
163 | }
164 | 
165 | fn evaluate_into_spans(query: &str, multiline: bool) -> Vec<Span> {
166 |     // fend incorrectly triggers on these often
167 |     {
168 |         // at least 3 characters and not one of the short constants
169 |         if query.len() < 3 && !matches!(query.to_lowercase().as_str(), "pi" | "e" | "c") {
170 |             return vec![];
171 |         }
172 | 
173 |         // probably a query operator thing or a url, fend evaluates these but it
174 |         // shouldn't
175 |         if regex!("^[a-z]{2,}:").is_match(query) {
176 |             return vec![];
177 |         }
178 | 
179 |         // if it starts and ends with quotes then the person was just searching in
180 |         // quotes and didn't mean to evaluate a string
181 |         if query.starts_with('"')
182 |             && query.ends_with('"')
183 |             && query.chars().filter(|c| *c == '"').count() == 2
184 |         {
185 |             return vec![];
186 |         }
187 |     }
188 | 
189 |     let mut context = FEND_CTX.clone();
190 |     if multiline {
191 |         // this makes it generate slightly nicer outputs for some queries like 2d6
192 |         context.set_output_mode_terminal();
193 |     }
194 | 
195 |     // avoids stackoverflows and queries that take too long
196 |     // examples:
197 |     // - Y = (\f. (\x. f x x)) (\x. f x x); Y(Y)
198 |     // - 10**100000000
199 |     let interrupt = Interrupter {
200 |         invocations_left: AtomicU32::new(1000),
201 |     };
202 |     let Ok(result) = fend_core::evaluate_with_interrupt(query, &mut context, &interrupt) else {
203 |         return vec![];
204 |     };
205 |     let main_result = result.get_main_result();
206 |     if main_result == query {
207 |         return vec![];
208 |     }
209 | 
210 |     let res = result
211 |         .get_main_result_spans()
212 |         .filter(|span| !span.string().is_empty())
213 |         .map(|span| Span {
214 |             text: span.string().to_string(),
215 |             kind: span.kind(),
216 |         })
217 |         .collect::<Vec<_>>();
218 | 
219 |     if let Some(first) = res.first() {
220 |         if first.kind == SpanKind::Other && first.text.starts_with("\\") {
221 |             // false positive, can happen if you search like "a: b"
222 |             return vec![];
223 |         }
224 |     }
225 | 
226 |     res
227 | }
228 | 


--------------------------------------------------------------------------------
/src/engines/answer/numbat.rs:
--------------------------------------------------------------------------------
  1 | use std::{collections::HashSet, sync::LazyLock};
  2 | 
  3 | use fend_core::SpanKind;
  4 | use maud::{html, PreEscaped};
  5 | use numbat::{
  6 |     markup::{FormatType, FormattedString, Markup},
  7 |     pretty_print::PrettyPrint,
  8 |     resolver::CodeSource,
  9 |     InterpreterResult, InterpreterSettings, Statement,
 10 | };
 11 | use tracing::debug;
 12 | 
 13 | use crate::engines::EngineResponse;
 14 | 
 15 | pub async fn request(query: &str) -> EngineResponse {
 16 |     let query = clean_query(query);
 17 | 
 18 |     let Some(NumbatResponse {
 19 |         query_html,
 20 |         result_html,
 21 |     }) = evaluate(&query)
 22 |     else {
 23 |         return EngineResponse::new();
 24 |     };
 25 | 
 26 |     EngineResponse::answer_html(html! {
 27 |         p.answer-query { (query_html) " =" }
 28 |         h3 { b { (result_html) } }
 29 |     })
 30 | }
 31 | 
 32 | pub fn request_autocomplete(query: &str) -> Vec<String> {
 33 |     let mut results = Vec::new();
 34 | 
 35 |     let query = clean_query(query);
 36 | 
 37 |     if let Some(result) = evaluate_for_autocomplete(&query) {
 38 |         results.push(format!("= {result}"));
 39 |     }
 40 | 
 41 |     results
 42 | }
 43 | 
 44 | fn clean_query(query: &str) -> String {
 45 |     query.strip_suffix('=').unwrap_or(query).trim().to_string()
 46 | }
 47 | 
 48 | #[derive(Debug)]
 49 | pub struct Span {
 50 |     pub text: String,
 51 |     pub kind: SpanKind,
 52 | }
 53 | 
 54 | fn is_potential_request(query: &str) -> bool {
 55 |     // allow these short constants, they're fine
 56 |     if matches!(query.to_lowercase().as_str(), "pi" | "e" | "c") {
 57 |         return true;
 58 |     }
 59 | 
 60 |     // at least 3 characters
 61 |     if query.len() < 3 {
 62 |         return false;
 63 |     }
 64 | 
 65 |     // must have numbers
 66 |     if !query.chars().any(|c| c.is_numeric()) {
 67 |         return false;
 68 |     }
 69 | 
 70 |     // if it starts and ends with quotes then the person was just searching in
 71 |     // quotes and didn't mean to evaluate a string
 72 |     if query.starts_with('"')
 73 |         && query.ends_with('"')
 74 |         && query.chars().filter(|c| *c == '"').count() == 2
 75 |     {
 76 |         return false;
 77 |     }
 78 | 
 79 |     true
 80 | }
 81 | 
 82 | fn interpret(query: &str) -> Option<(Statement<'_>, Markup)> {
 83 |     if !is_potential_request(query) {
 84 |         return None;
 85 |     }
 86 | 
 87 |     let mut ctx = NUMBAT_CTX.clone();
 88 | 
 89 |     let (statements, res) = match ctx.interpret_with_settings(
 90 |         &mut InterpreterSettings {
 91 |             print_fn: Box::new(move |_: &Markup| {}),
 92 |         },
 93 |         query,
 94 |         CodeSource::Text,
 95 |     ) {
 96 |         Ok(r) => r,
 97 |         Err(err) => {
 98 |             debug!("numbat error: {err}");
 99 |             return None;
100 |         }
101 |     };
102 | 
103 |     let res_markup = match res {
104 |         InterpreterResult::Value(val) => val.pretty_print(),
105 |         InterpreterResult::Continue => return None,
106 |     };
107 |     if res_markup.to_string().trim() == query {
108 |         return None;
109 |     }
110 |     let res_markup = fix_markup(res_markup);
111 | 
112 |     Some((statements.into_iter().next_back()?, res_markup))
113 | }
114 | 
115 | fn evaluate_for_autocomplete(query: &str) -> Option<String> {
116 |     let (_statements, res_markup) = interpret(query)?;
117 | 
118 |     Some(res_markup.to_string().trim().to_string())
119 | }
120 | 
121 | pub struct NumbatResponse {
122 |     pub query_html: PreEscaped<String>,
123 |     pub result_html: PreEscaped<String>,
124 | }
125 | 
126 | fn evaluate(query: &str) -> Option<NumbatResponse> {
127 |     let (statement, res_markup) = interpret(query)?;
128 | 
129 |     let statement_markup = fix_markup(statement.pretty_print());
130 |     let query_html = markup_to_html(statement_markup);
131 |     let result_html = markup_to_html(res_markup);
132 | 
133 |     Some(NumbatResponse {
134 |         query_html,
135 |         result_html,
136 |     })
137 | }
138 | 
139 | fn fix_markup(markup: Markup) -> Markup {
140 |     let mut reordered_markup: Vec<FormattedString> = Vec::new();
141 |     const LEFT_SIDE_UNITS: &[&str] = &["$", "€", "£", "¥"];
142 |     for s in markup.0 {
143 |         let FormattedString(_output_type, format_type, content) = s.clone();
144 | 
145 |         if format_type == FormatType::Unit && LEFT_SIDE_UNITS.contains(&&*content) {
146 |             // remove the last markup if it's whitespace
147 |             if let Some(FormattedString(_, FormatType::Whitespace, _)) = reordered_markup.last() {
148 |                 reordered_markup.pop();
149 |             }
150 |             reordered_markup.insert(reordered_markup.len() - 1, s);
151 |         } else {
152 |             reordered_markup.push(s);
153 |         }
154 |     }
155 |     Markup(reordered_markup)
156 | }
157 | 
158 | fn markup_to_html(markup: Markup) -> PreEscaped<String> {
159 |     let mut html = String::new();
160 |     for FormattedString(_, format_type, content) in markup.0 {
161 |         let class = match format_type {
162 |             FormatType::Value => "answer-calc-constant",
163 |             FormatType::String => "answer-calc-string",
164 |             FormatType::Identifier => "answer-calc-func",
165 |             _ => "",
166 |         };
167 |         if class.is_empty() {
168 |             html.push_str(&html! {(content)}.into_string());
169 |         } else {
170 |             html.push_str(
171 |                 &html! {
172 |                     span.(class) { (content) }
173 |                 }
174 |                 .into_string(),
175 |             );
176 |         }
177 |     }
178 |     PreEscaped(html)
179 | }
180 | 
181 | pub static NUMBAT_CTX: LazyLock<numbat::Context> = LazyLock::new(|| {
182 |     let mut ctx = numbat::Context::new(numbat::module_importer::BuiltinModuleImporter {});
183 |     let _ = ctx.interpret("use prelude", CodeSource::Internal);
184 |     let _ = ctx.interpret("use units::currencies", CodeSource::Internal);
185 | 
186 |     ctx.load_currency_module_on_demand(true);
187 | 
188 |     // a few hardcoded aliases
189 |     // (the lowercase alias code won't work for these because they have prefixes)
190 |     for (alias, canonical) in &[
191 |         ("kb", "kB"),
192 |         ("kib", "KiB"),
193 |         ("mb", "MB"),
194 |         ("mib", "MiB"),
195 |         ("gb", "GB"),
196 |         ("gib", "GiB"),
197 |         ("tb", "TB"),
198 |         ("tib", "TiB"),
199 |         ("pb", "PB"),
200 |         ("pib", "PiB"),
201 |     ] {
202 |         let _ = ctx.interpret(&format!("let {alias} = {canonical}"), CodeSource::Internal);
203 |     }
204 | 
205 |     // lowercase aliases (so for example usd and USD are the same unit)
206 | 
207 |     let mut unit_names = HashSet::new();
208 |     for names in ctx.unit_names() {
209 |         unit_names.extend(names.iter().map(|name| name.to_owned()));
210 |     }
211 | 
212 |     for name in &unit_names {
213 |         // taken_unit_names.insert(alias_name);
214 |         let name_lower = name.to_lowercase();
215 |         // add every lowercase aliases for every unit as long as that alias isn't
216 |         // already taken
217 |         if !unit_names.contains(&name_lower) {
218 |             let _ = ctx.interpret(&format!("let {name_lower} = {name}"), CodeSource::Internal);
219 |         }
220 |     }
221 | 
222 |     ctx
223 | });
224 | 


--------------------------------------------------------------------------------
/src/web/assets/script.js:
--------------------------------------------------------------------------------
  1 | const searchInputEl = document.getElementById("search-input");
  2 | 
  3 | if (searchInputEl) {
  4 |   // add an element with search suggestions after the search input
  5 |   const suggestionsEl = document.createElement("div");
  6 |   suggestionsEl.id = "search-input-suggestions";
  7 |   suggestionsEl.style.visibility = "hidden";
  8 |   searchInputEl.insertAdjacentElement("afterend", suggestionsEl);
  9 | 
 10 |   let lastValue = "";
 11 |   let nextQueryId = 0;
 12 |   let lastLoadedQueryId = -1;
 13 |   async function updateSuggestions() {
 14 |     const value = searchInputEl.value;
 15 | 
 16 |     if (value === "") {
 17 |       suggestionsEl.style.visibility = "hidden";
 18 |       nextQueryId++;
 19 |       lastLoadedQueryId = nextQueryId;
 20 |       return;
 21 |     }
 22 | 
 23 |     if (value === lastValue) {
 24 |       suggestionsEl.style.visibility = "visible";
 25 |       return;
 26 |     }
 27 |     lastValue = value;
 28 | 
 29 |     const thisQueryId = nextQueryId;
 30 |     nextQueryId++;
 31 | 
 32 |     const res = await fetch(
 33 |       `/autocomplete?q=${encodeURIComponent(value)}`
 34 |     ).then((res) => res.json());
 35 |     const options = res[1];
 36 | 
 37 |     // this makes sure we don't load suggestions out of order
 38 |     if (thisQueryId < lastLoadedQueryId) {
 39 |       return;
 40 |     }
 41 |     lastLoadedQueryId = thisQueryId;
 42 | 
 43 |     renderSuggestions(options);
 44 |   }
 45 | 
 46 |   function renderSuggestions(options) {
 47 |     if (options.length === 0) {
 48 |       suggestionsEl.style.visibility = "hidden";
 49 |       return;
 50 |     }
 51 | 
 52 |     suggestionsEl.style.visibility = "visible";
 53 |     suggestionsEl.innerHTML = "";
 54 |     options.forEach((option) => {
 55 |       const optionEl = document.createElement("div");
 56 |       optionEl.textContent = option;
 57 |       optionEl.className = "search-input-suggestion";
 58 |       suggestionsEl.appendChild(optionEl);
 59 | 
 60 |       optionEl.addEventListener("mousedown", () => {
 61 |         searchInputEl.value = option;
 62 |         searchInputEl.focus();
 63 |         searchInputEl.form.submit();
 64 |       });
 65 |     });
 66 |   }
 67 | 
 68 |   let focusedSuggestionIndex = -1;
 69 |   let focusedSuggestionEl = null;
 70 | 
 71 |   function clearFocusedSuggestion() {
 72 |     if (focusedSuggestionEl) {
 73 |       focusedSuggestionEl.classList.remove("focused");
 74 |       focusedSuggestionEl = null;
 75 |       focusedSuggestionIndex = -1;
 76 |     }
 77 |   }
 78 | 
 79 |   function focusSelectionIndex(index) {
 80 |     clearFocusedSuggestion();
 81 |     focusedSuggestionIndex = index;
 82 |     focusedSuggestionEl = suggestionsEl.children[focusedSuggestionIndex];
 83 |     focusedSuggestionEl.classList.add("focused");
 84 |     searchInputEl.value = focusedSuggestionEl.textContent;
 85 |   }
 86 | 
 87 |   document.addEventListener("keydown", (e) => {
 88 |     // if any modifier keys are pressed, ignore all this
 89 |     if (e.ctrlKey || e.metaKey || e.altKey || e.shiftKey) {
 90 |       return;
 91 |     }
 92 | 
 93 |     // if it's focused then use different keybinds
 94 |     if (searchInputEl.matches(":focus")) {
 95 |       if (e.key === "ArrowDown") {
 96 |         e.preventDefault();
 97 |         if (focusedSuggestionIndex === -1) {
 98 |           focusSelectionIndex(0);
 99 |         } else if (focusedSuggestionIndex < suggestionsEl.children.length - 1) {
100 |           focusSelectionIndex(focusedSuggestionIndex + 1);
101 |         } else {
102 |           focusSelectionIndex(0);
103 |         }
104 |       } else if (e.key === "ArrowUp") {
105 |         e.preventDefault();
106 |         if (focusedSuggestionIndex === -1) {
107 |           focusSelectionIndex(suggestionsEl.children.length - 1);
108 |         } else if (focusedSuggestionIndex > 0) {
109 |           focusSelectionIndex(focusedSuggestionIndex - 1);
110 |         } else {
111 |           focusSelectionIndex(suggestionsEl.children.length - 1);
112 |         }
113 |       } else if (e.key === "Escape") {
114 |         clearFocusedSuggestion();
115 |         suggestionsEl.style.visibility = "hidden";
116 |       }
117 | 
118 |       return;
119 |     }
120 | 
121 |     // if the currently selected element is not the search bar and is contenteditable, don't do anything
122 |     const focusedEl = document.querySelector(":focus");
123 |     if (
124 |       focusedEl &&
125 |       (focusedEl.tagName.toLowerCase() == "input" ||
126 |         focusedEl.tagName.toLowerCase() == "textarea" ||
127 |         focusedEl.getAttribute("contenteditable") !== null)
128 |     )
129 |       return;
130 | 
131 |     // if the user starts typing but they don't have focus on the input, focus it
132 | 
133 |     // must be a letter or number
134 |     if (e.key.match(/^[a-z0-9]$/i)) {
135 |       searchInputEl.focus();
136 |     }
137 |     // right arrow key focuses it at the end
138 |     else if (e.key === "ArrowRight") {
139 |       searchInputEl.focus();
140 |       searchInputEl.setSelectionRange(
141 |         searchInputEl.value.length,
142 |         searchInputEl.value.length
143 |       );
144 |     }
145 |     // left arrow key focuses it at the beginning
146 |     else if (e.key === "ArrowLeft") {
147 |       searchInputEl.focus();
148 |       searchInputEl.setSelectionRange(0, 0);
149 |     }
150 |     // backspace key focuses it at the end
151 |     else if (e.key === "Backspace") {
152 |       searchInputEl.focus();
153 |       searchInputEl.setSelectionRange(
154 |         searchInputEl.value.length,
155 |         searchInputEl.value.length
156 |       );
157 |     }
158 |   });
159 | 
160 |   // update the input suggestions on input
161 |   searchInputEl.addEventListener("input", () => {
162 |     clearFocusedSuggestion();
163 |     updateSuggestions();
164 |   });
165 |   // and when they click suggestions
166 |   searchInputEl.addEventListener("click", updateSuggestions);
167 |   // on unfocus hide the suggestions
168 |   searchInputEl.addEventListener("blur", (e) => {
169 |     suggestionsEl.style.visibility = "hidden";
170 |   });
171 | }
172 | 
173 | const customCssEl = document.getElementById("custom-css");
174 | if (customCssEl) {
175 |   // tab to indent
176 |   // https://stackoverflow.com/a/6637396
177 |   customCssEl.addEventListener("keydown", (e) => {
178 |     if (e.key == "Tab") {
179 |       e.preventDefault();
180 |       var start = customCssEl.selectionStart;
181 |       var end = customCssEl.selectionEnd;
182 |       customCssEl.value =
183 |         customCssEl.value.substring(0, start) +
184 |         "\t" +
185 |         customCssEl.value.substring(end);
186 |       customCssEl.selectionStart = customCssEl.selectionEnd = start + 1;
187 |     }
188 |   });
189 | 
190 |   // ctrl+enter anywhere on the page to submit
191 |   const saveEl = document.getElementById("save-settings-button");
192 |   document.addEventListener("keydown", (e) => {
193 |     if (e.key == "Enter" && (e.ctrlKey || e.metaKey)) {
194 |       e.preventDefault();
195 |       console.log("click");
196 |       saveEl.click();
197 |     }
198 |   });
199 | 
200 |   // save whether the details are open or not
201 |   const customCssDetailsEl = document.getElementById("custom-css-details");
202 |   const customCssDetailsOpen = localStorage.getItem("custom-css-details-open");
203 |   if (customCssDetailsOpen === "true") customCssDetailsEl.open = true;
204 |   customCssDetailsEl.addEventListener("toggle", () => {
205 |     localStorage.setItem("custom-css-details-open", customCssDetailsEl.open);
206 |   });
207 | }
208 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Creative Commons Legal Code
  2 | 
  3 | CC0 1.0 Universal
  4 | 
  5 |     CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE
  6 |     LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN
  7 |     ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS
  8 |     INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES
  9 |     REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS
 10 |     PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM
 11 |     THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED
 12 |     HEREUNDER.
 13 | 
 14 | Statement of Purpose
 15 | 
 16 | The laws of most jurisdictions throughout the world automatically confer
 17 | exclusive Copyright and Related Rights (defined below) upon the creator
 18 | and subsequent owner(s) (each and all, an "owner") of an original work of
 19 | authorship and/or a database (each, a "Work").
 20 | 
 21 | Certain owners wish to permanently relinquish those rights to a Work for
 22 | the purpose of contributing to a commons of creative, cultural and
 23 | scientific works ("Commons") that the public can reliably and without fear
 24 | of later claims of infringement build upon, modify, incorporate in other
 25 | works, reuse and redistribute as freely as possible in any form whatsoever
 26 | and for any purposes, including without limitation commercial purposes.
 27 | These owners may contribute to the Commons to promote the ideal of a free
 28 | culture and the further production of creative, cultural and scientific
 29 | works, or to gain reputation or greater distribution for their Work in
 30 | part through the use and efforts of others.
 31 | 
 32 | For these and/or other purposes and motivations, and without any
 33 | expectation of additional consideration or compensation, the person
 34 | associating CC0 with a Work (the "Affirmer"), to the extent that he or she
 35 | is an owner of Copyright and Related Rights in the Work, voluntarily
 36 | elects to apply CC0 to the Work and publicly distribute the Work under its
 37 | terms, with knowledge of his or her Copyright and Related Rights in the
 38 | Work and the meaning and intended legal effect of CC0 on those rights.
 39 | 
 40 | 1. Copyright and Related Rights. A Work made available under CC0 may be
 41 | protected by copyright and related or neighboring rights ("Copyright and
 42 | Related Rights"). Copyright and Related Rights include, but are not
 43 | limited to, the following:
 44 | 
 45 |   i. the right to reproduce, adapt, distribute, perform, display,
 46 |      communicate, and translate a Work;
 47 |  ii. moral rights retained by the original author(s) and/or performer(s);
 48 | iii. publicity and privacy rights pertaining to a person's image or
 49 |      likeness depicted in a Work;
 50 |  iv. rights protecting against unfair competition in regards to a Work,
 51 |      subject to the limitations in paragraph 4(a), below;
 52 |   v. rights protecting the extraction, dissemination, use and reuse of data
 53 |      in a Work;
 54 |  vi. database rights (such as those arising under Directive 96/9/EC of the
 55 |      European Parliament and of the Council of 11 March 1996 on the legal
 56 |      protection of databases, and under any national implementation
 57 |      thereof, including any amended or successor version of such
 58 |      directive); and
 59 | vii. other similar, equivalent or corresponding rights throughout the
 60 |      world based on applicable law or treaty, and any national
 61 |      implementations thereof.
 62 | 
 63 | 2. Waiver. To the greatest extent permitted by, but not in contravention
 64 | of, applicable law, Affirmer hereby overtly, fully, permanently,
 65 | irrevocably and unconditionally waives, abandons, and surrenders all of
 66 | Affirmer's Copyright and Related Rights and associated claims and causes
 67 | of action, whether now known or unknown (including existing as well as
 68 | future claims and causes of action), in the Work (i) in all territories
 69 | worldwide, (ii) for the maximum duration provided by applicable law or
 70 | treaty (including future time extensions), (iii) in any current or future
 71 | medium and for any number of copies, and (iv) for any purpose whatsoever,
 72 | including without limitation commercial, advertising or promotional
 73 | purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each
 74 | member of the public at large and to the detriment of Affirmer's heirs and
 75 | successors, fully intending that such Waiver shall not be subject to
 76 | revocation, rescission, cancellation, termination, or any other legal or
 77 | equitable action to disrupt the quiet enjoyment of the Work by the public
 78 | as contemplated by Affirmer's express Statement of Purpose.
 79 | 
 80 | 3. Public License Fallback. Should any part of the Waiver for any reason
 81 | be judged legally invalid or ineffective under applicable law, then the
 82 | Waiver shall be preserved to the maximum extent permitted taking into
 83 | account Affirmer's express Statement of Purpose. In addition, to the
 84 | extent the Waiver is so judged Affirmer hereby grants to each affected
 85 | person a royalty-free, non transferable, non sublicensable, non exclusive,
 86 | irrevocable and unconditional license to exercise Affirmer's Copyright and
 87 | Related Rights in the Work (i) in all territories worldwide, (ii) for the
 88 | maximum duration provided by applicable law or treaty (including future
 89 | time extensions), (iii) in any current or future medium and for any number
 90 | of copies, and (iv) for any purpose whatsoever, including without
 91 | limitation commercial, advertising or promotional purposes (the
 92 | "License"). The License shall be deemed effective as of the date CC0 was
 93 | applied by Affirmer to the Work. Should any part of the License for any
 94 | reason be judged legally invalid or ineffective under applicable law, such
 95 | partial invalidity or ineffectiveness shall not invalidate the remainder
 96 | of the License, and in such case Affirmer hereby affirms that he or she
 97 | will not (i) exercise any of his or her remaining Copyright and Related
 98 | Rights in the Work or (ii) assert any associated claims and causes of
 99 | action with respect to the Work, in either case contrary to Affirmer's
100 | express Statement of Purpose.
101 | 
102 | 4. Limitations and Disclaimers.
103 | 
104 |  a. No trademark or patent rights held by Affirmer are waived, abandoned,
105 |     surrendered, licensed or otherwise affected by this document.
106 |  b. Affirmer offers the Work as-is and makes no representations or
107 |     warranties of any kind concerning the Work, express, implied,
108 |     statutory or otherwise, including without limitation warranties of
109 |     title, merchantability, fitness for a particular purpose, non
110 |     infringement, or the absence of latent or other defects, accuracy, or
111 |     the present or absence of errors, whether or not discoverable, all to
112 |     the greatest extent permissible under applicable law.
113 |  c. Affirmer disclaims responsibility for clearing rights of other persons
114 |     that may apply to the Work or any use thereof, including without
115 |     limitation any person's Copyright and Related Rights in the Work.
116 |     Further, Affirmer disclaims responsibility for obtaining any necessary
117 |     consents, permissions or other rights required for any use of the
118 |     Work.
119 |  d. Affirmer understands and acknowledges that Creative Commons is not a
120 |     party to this document and has no duty or obligation with respect to
121 |     this CC0 or use of the Work.
122 | 


--------------------------------------------------------------------------------
/src/engines/answer/thesaurus.rs:
--------------------------------------------------------------------------------
  1 | use eyre::eyre;
  2 | use maud::{html, PreEscaped};
  3 | use scraper::{Html, Selector};
  4 | use serde::Deserialize;
  5 | use tracing::error;
  6 | use url::Url;
  7 | 
  8 | use crate::engines::{EngineResponse, RequestResponse, CLIENT};
  9 | 
 10 | use super::regex;
 11 | 
 12 | pub async fn request(query: &str) -> RequestResponse {
 13 |     let re = regex!(r"^synonym(?:s?) for\s+(\w+)$");
 14 |     let re2 = regex!(r"^(\w+)\s+synonym(?:s?)$");
 15 |     let Some(query) = re
 16 |         .captures(query)
 17 |         .and_then(|m| m.get(1))
 18 |         .or_else(|| re2.captures(query).and_then(|m| m.get(1)))
 19 |     else {
 20 |         return RequestResponse::None;
 21 |     };
 22 |     let query = query.as_str().to_lowercase();
 23 | 
 24 |     CLIENT
 25 |         .get(
 26 |             Url::parse(
 27 |                 format!(
 28 |                     "https://www.thesaurus.com/browse/{}",
 29 |                     urlencoding::encode(&query.to_lowercase())
 30 |                 )
 31 |                 .as_str(),
 32 |             )
 33 |             .unwrap(),
 34 |         )
 35 |         .into()
 36 | }
 37 | 
 38 | #[derive(Debug, Deserialize)]
 39 | pub struct ThesaurusResponse {
 40 |     /// Example: `silly`
 41 |     pub word: String,
 42 |     pub items: Vec<ThesaurusItem>,
 43 | }
 44 | 
 45 | #[derive(Debug, Deserialize)]
 46 | pub struct ThesaurusItem {
 47 |     /// Example `adjective`
 48 |     pub part_of_speech: String,
 49 |     /// Example: `absurd, giddy, foolish`
 50 |     pub as_in: String,
 51 | 
 52 |     pub strongest_matches: Vec<String>,
 53 |     pub strong_matches: Vec<String>,
 54 |     pub weak_matches: Vec<String>,
 55 | }
 56 | 
 57 | pub fn parse_response(body: &str) -> eyre::Result<EngineResponse> {
 58 |     let response = parse_thesaurus_com_response(body)?;
 59 | 
 60 |     if response.items.is_empty() {
 61 |         return Ok(EngineResponse::new());
 62 |     }
 63 | 
 64 |     let rendered_html = render_thesaurus_html(response);
 65 | 
 66 |     Ok(EngineResponse::answer_html(rendered_html))
 67 | }
 68 | 
 69 | fn parse_thesaurus_com_response(body: &str) -> eyre::Result<ThesaurusResponse> {
 70 |     let dom = Html::parse_document(body);
 71 | 
 72 |     let word = dom
 73 |         .select(&Selector::parse("h1").unwrap())
 74 |         .next()
 75 |         .ok_or_else(|| eyre!("No title found"))?
 76 |         .text()
 77 |         .collect::<String>();
 78 | 
 79 |     let card_sel = Selector::parse("[data-type='synonym-and-antonym-card']").unwrap();
 80 |     let card_els = dom.select(&card_sel);
 81 | 
 82 |     let mut items = Vec::<ThesaurusItem>::new();
 83 | 
 84 |     for synonym_and_antonym_card_el in card_els {
 85 |         items.push(parse_thesaurus_com_item(synonym_and_antonym_card_el)?);
 86 |     }
 87 | 
 88 |     Ok(ThesaurusResponse { word, items })
 89 | }
 90 | 
 91 | fn parse_thesaurus_com_item(
 92 |     synonym_and_antonym_card_el: scraper::ElementRef,
 93 | ) -> eyre::Result<ThesaurusItem> {
 94 |     let adjective_as_in_words = synonym_and_antonym_card_el
 95 |         .select(&Selector::parse("div:first-child > p").unwrap())
 96 |         .next()
 97 |         .ok_or_else(|| eyre!("No adjective as in words found"))?
 98 |         .text()
 99 |         .collect::<String>();
100 |     let (part_of_speech, as_in) = adjective_as_in_words
101 |         .split_once(" as in ")
102 |         .ok_or_else(|| eyre!("No 'as in' found"))?;
103 |     let part_of_speech = part_of_speech.trim().to_owned();
104 |     let as_in = as_in.trim().to_owned();
105 | 
106 |     let matches_container_el = synonym_and_antonym_card_el
107 |         .select(&Selector::parse("div:nth-child(2) > div:nth-child(2)").unwrap())
108 |         .next()
109 |         .ok_or_else(|| eyre!("No matches container found"))?;
110 | 
111 |     let mut strongest_matches = Vec::<String>::new();
112 |     let mut strong_matches = Vec::<String>::new();
113 |     let mut weak_matches = Vec::<String>::new();
114 | 
115 |     for match_el in matches_container_el.select(&Selector::parse("div").unwrap()) {
116 |         let match_type = match_el
117 |             .select(&Selector::parse("p").unwrap())
118 |             .next()
119 |             .ok_or_else(|| eyre!("No match type found"))?
120 |             .text()
121 |             .collect::<String>();
122 |         let match_type = match_type
123 |             .split(' ')
124 |             .next()
125 |             .ok_or_else(|| eyre!("No match type found"))?;
126 | 
127 |         let matches = match_el
128 |             .select(&Selector::parse("a").unwrap())
129 |             .map(|el| el.text().collect::<String>())
130 |             .collect::<Vec<String>>();
131 | 
132 |         match match_type {
133 |             "Strongest" => {
134 |                 strongest_matches = matches;
135 |             }
136 |             "Strong" => {
137 |                 strong_matches = matches;
138 |             }
139 |             "Weak" => {
140 |                 weak_matches = matches;
141 |             }
142 |             _ => {
143 |                 error!("Unknown thesaurus match type: {match_type}");
144 |             }
145 |         }
146 |     }
147 | 
148 |     Ok(ThesaurusItem {
149 |         part_of_speech,
150 |         as_in,
151 |         strongest_matches,
152 |         strong_matches,
153 |         weak_matches,
154 |     })
155 | }
156 | 
157 | fn render_thesaurus_html(
158 |     ThesaurusResponse { word, items }: ThesaurusResponse,
159 | ) -> PreEscaped<String> {
160 |     html! {
161 |         h2.answer-thesaurus-word {
162 |             a href={ "https://www.thesaurus.com/browse/" (word) } {
163 |                 (word)
164 |             }
165 |         }
166 |         div.answer-thesaurus-items {
167 |             @for item in items {
168 |                 div.answer-thesaurus-item {
169 |                     (render_thesaurus_item_html(item))
170 |                 }
171 |             }
172 |         }
173 | 
174 |     }
175 | }
176 | 
177 | fn render_thesaurus_item_html(
178 |     ThesaurusItem {
179 |         part_of_speech,
180 |         as_in,
181 |         strongest_matches,
182 |         strong_matches,
183 |         weak_matches,
184 |     }: ThesaurusItem,
185 | ) -> PreEscaped<String> {
186 |     let mut html = String::new();
187 | 
188 |     html.push_str(
189 |         &html! {
190 |             span.answer-thesaurus-word-description {
191 |                 span.answer-thesaurus-part-of-speech { (part_of_speech.to_lowercase()) }
192 |                 ", as in "
193 |                 span.answer-thesaurus-as-in { (as_in) }
194 |             }
195 |         }
196 |         .into_string(),
197 |     );
198 | 
199 |     let render_matches = |matches: Vec<String>, strength: &str| {
200 |         if matches.is_empty() {
201 |             return PreEscaped::default();
202 |         }
203 | 
204 |         html! {
205 |             div.{ "answer-thesaurus-" (strength.to_lowercase().replace(' ', "-")) } {
206 |                 h3.answer-thesaurus-category-title {
207 |                     (strength)
208 |                     " "
209 |                     (if matches.len() == 1 { "match" } else { "matches" })
210 |                 }
211 |                 ul.answer-thesaurus-list {
212 |                     @for synonym in matches {
213 |                         li {
214 |                             a href={ "https://www.thesaurus.com/browse/" (synonym) } { (synonym) }
215 |                         }
216 |                     }
217 |                 }
218 |             }
219 |         }
220 |     };
221 | 
222 |     html.push_str(&render_matches(strongest_matches, "Strongest").into_string());
223 |     html.push_str(&render_matches(strong_matches, "Strong").into_string());
224 |     html.push_str(&render_matches(weak_matches, "Weak").into_string());
225 | 
226 |     PreEscaped(html)
227 | }
228 | 


--------------------------------------------------------------------------------
/src/urls.rs:
--------------------------------------------------------------------------------
  1 | use std::borrow::Cow;
  2 | 
  3 | use tracing::{error, warn};
  4 | use url::Url;
  5 | 
  6 | use crate::config::{HostAndPath, UrlsConfig};
  7 | 
  8 | #[tracing::instrument]
  9 | pub fn normalize_url(url: &str) -> String {
 10 |     let url = url.trim_end_matches('#');
 11 |     if url.is_empty() {
 12 |         warn!("url is empty");
 13 |         return String::new();
 14 |     }
 15 | 
 16 |     let Ok(mut url) = Url::parse(url) else {
 17 |         error!("failed to parse url");
 18 |         return url.to_string();
 19 |     };
 20 | 
 21 |     // make sure the scheme is https
 22 |     if url.scheme() == "http" {
 23 |         url.set_scheme("https").unwrap();
 24 |     }
 25 | 
 26 |     // remove fragment
 27 |     url.set_fragment(None);
 28 | 
 29 |     // remove trailing slash
 30 |     let path = url.path().to_string();
 31 |     if let Some(path) = path.strip_suffix('/') {
 32 |         url.set_path(path);
 33 |     }
 34 | 
 35 |     // remove tracking params
 36 |     let query_pairs = url.query_pairs().into_owned();
 37 |     let mut new_query_pairs = Vec::new();
 38 |     const TRACKING_PARAMS: &[&str] = &["ref_src", "_sm_au_"];
 39 |     for (key, value) in query_pairs {
 40 |         if !TRACKING_PARAMS.contains(&key.as_str()) {
 41 |             new_query_pairs.push((key, value));
 42 |         }
 43 |     }
 44 |     if new_query_pairs.is_empty() {
 45 |         url.set_query(None);
 46 |     } else {
 47 |         url.set_query(Some(
 48 |             &url::form_urlencoded::Serializer::new(String::new())
 49 |                 .extend_pairs(new_query_pairs)
 50 |                 .finish(),
 51 |         ));
 52 |     }
 53 | 
 54 |     // url decode and encode path
 55 |     let path = url.path().to_string();
 56 |     let path = match urlencoding::decode(&path) {
 57 |         Ok(path) => path,
 58 |         Err(e) => {
 59 |             warn!("failed to decode path: {e}");
 60 |             Cow::Owned(path)
 61 |         }
 62 |     };
 63 |     url.set_path(path.as_ref());
 64 | 
 65 |     let url = url.to_string();
 66 |     // remove trailing slash
 67 |     let url = if let Some(url) = url.strip_suffix('/') {
 68 |         url.to_string()
 69 |     } else {
 70 |         url
 71 |     };
 72 | 
 73 |     url
 74 | }
 75 | 
 76 | impl HostAndPath {
 77 |     pub fn contains(&self, host: &str, path: &str) -> bool {
 78 |         if self.host.starts_with('.') {
 79 |             if !host.ends_with(&self.host) {
 80 |                 return false;
 81 |             }
 82 |         } else if host != self.host {
 83 |             return false;
 84 |         }
 85 | 
 86 |         if self.path.ends_with('/') || self.path.is_empty() {
 87 |             path.starts_with(&self.path)
 88 |         } else {
 89 |             path == self.path
 90 |         }
 91 |     }
 92 | 
 93 |     pub fn replace(
 94 |         replace_from: &HostAndPath,
 95 |         replace_with: &HostAndPath,
 96 |         real_url: &HostAndPath,
 97 |     ) -> Option<(String, String)> {
 98 |         let new_host = if replace_from.host.starts_with(".") {
 99 |             if replace_with.host.starts_with(".") {
100 |                 if let Some(host_without_suffix) = real_url.host.strip_suffix(&replace_from.host) {
101 |                     format!("{host_without_suffix}{}", replace_with.host)
102 |                 } else {
103 |                     return None;
104 |                 }
105 |             } else if real_url.host.ends_with(&replace_from.host) {
106 |                 replace_with.host.to_owned()
107 |             } else {
108 |                 return None;
109 |             }
110 |         } else if real_url.host == replace_from.host {
111 |             replace_with.host.clone()
112 |         } else {
113 |             return None;
114 |         };
115 | 
116 |         // host matches, now check path
117 | 
118 |         let new_path = if replace_from.path.ends_with('/') || replace_from.path.is_empty() {
119 |             if replace_with.path.ends_with('/') || replace_with.path.is_empty() {
120 |                 if let Some(path_without_prefix) = real_url.path.strip_prefix(&replace_from.path) {
121 |                     format!("{}{path_without_prefix}", replace_with.path)
122 |                 } else {
123 |                     return None;
124 |                 }
125 |             } else if real_url.path.starts_with(&replace_from.path) {
126 |                 replace_with.path.clone()
127 |             } else {
128 |                 return None;
129 |             }
130 |         } else if real_url.path == replace_from.path {
131 |             replace_with.path.clone()
132 |         } else {
133 |             return None;
134 |         };
135 | 
136 |         Some((new_host, new_path))
137 |     }
138 | }
139 | 
140 | pub fn apply_url_replacements(url: &str, urls_config: &UrlsConfig) -> String {
141 |     let Ok(mut url) = Url::parse(url) else {
142 |         error!("failed to parse url");
143 |         return url.to_string();
144 |     };
145 | 
146 |     let host = url.host_str().unwrap_or_default().to_owned();
147 | 
148 |     let path = url
149 |         .path()
150 |         .strip_prefix("/")
151 |         .unwrap_or(url.path())
152 |         .to_owned();
153 |     let real_url = HostAndPath { host, path };
154 |     for (replace_from, replace_to) in &urls_config.replace {
155 |         if let Some((new_host, new_path)) =
156 |             HostAndPath::replace(replace_from, replace_to, &real_url)
157 |         {
158 |             let _ = url.set_host(Some(&new_host));
159 |             url.set_path(&new_path);
160 |             break;
161 |         }
162 |     }
163 | 
164 |     normalize_url(url.as_ref())
165 | }
166 | pub fn get_url_weight(url: &str, urls_config: &UrlsConfig) -> f64 {
167 |     let Ok(url) = Url::parse(url) else {
168 |         error!("failed to parse url");
169 |         return 1.;
170 |     };
171 | 
172 |     let host = url.host_str().unwrap_or_default().to_owned();
173 |     let path = url.path().strip_prefix("/").unwrap_or_default().to_owned();
174 |     for (check, weight) in &urls_config.weight {
175 |         if check.contains(&host, &path) {
176 |             return *weight;
177 |         }
178 |     }
179 | 
180 |     1.
181 | }
182 | 
183 | #[cfg(test)]
184 | mod tests {
185 |     use crate::config::HostAndPath;
186 | 
187 |     use super::*;
188 | 
189 |     fn test_replacement(from: &str, to: &str, url: &str, expected: &str) {
190 |         let urls_config = UrlsConfig {
191 |             replace: vec![(HostAndPath::new(from), HostAndPath::new(to))],
192 |             weight: vec![],
193 |         };
194 |         let normalized_url = apply_url_replacements(url, &urls_config);
195 |         assert_eq!(normalized_url, expected);
196 |     }
197 | 
198 |     #[test]
199 |     fn test_replace_url() {
200 |         test_replacement(
201 |             "minecraft.fandom.com/wiki/",
202 |             "minecraft.wiki/w/",
203 |             "https://minecraft.fandom.com/wiki/Java_Edition",
204 |             "https://minecraft.wiki/w/Java_Edition",
205 |         );
206 |     }
207 |     #[test]
208 |     fn test_replace_wildcard_host_with_absolute() {
209 |         test_replacement(
210 |             ".medium.com",
211 |             "scribe.rip",
212 |             "https://example.medium.com/asdf",
213 |             "https://scribe.rip/asdf",
214 |         );
215 |     }
216 |     #[test]
217 |     fn test_replace_wildcard_host_with_wildcard() {
218 |         test_replacement(
219 |             ".medium.com",
220 |             ".scribe.rip",
221 |             "https://example.medium.com/asdf",
222 |             "https://example.scribe.rip/asdf",
223 |         );
224 |     }
225 |     #[test]
226 |     fn test_non_matching_wildcard() {
227 |         test_replacement(
228 |             ".medium.com",
229 |             ".scribe.rip",
230 |             "https://medium.com/asdf",
231 |             "https://medium.com/asdf",
232 |         );
233 |     }
234 |     #[test]
235 |     fn test_non_matching_wildcard_to_absolute() {
236 |         test_replacement(
237 |             ".medium.com",
238 |             "scribe.rip",
239 |             "https://example.com/asdf",
240 |             "https://example.com/asdf",
241 |         );
242 |     }
243 | }
244 | 


--------------------------------------------------------------------------------
/src/engines/ranking.rs:
--------------------------------------------------------------------------------
  1 | use std::{collections::HashMap, sync::Arc};
  2 | 
  3 | use crate::{
  4 |     config::Config,
  5 |     urls::{apply_url_replacements, get_url_weight},
  6 | };
  7 | 
  8 | use super::{
  9 |     Answer, AutocompleteResult, Engine, EngineImageResult, EngineImagesResponse, EngineResponse,
 10 |     EngineSearchResult, FeaturedSnippet, ImagesResponse, Infobox, Response, SearchResult,
 11 | };
 12 | 
 13 | pub fn merge_engine_responses(
 14 |     config: Arc<Config>,
 15 |     responses: HashMap<Engine, EngineResponse>,
 16 | ) -> Response {
 17 |     let mut search_results: Vec<SearchResult<EngineSearchResult>> = Vec::new();
 18 |     let mut featured_snippet: Option<FeaturedSnippet> = None;
 19 |     let mut answer: Option<Answer> = None;
 20 |     let mut infobox: Option<Infobox> = None;
 21 | 
 22 |     for (engine, response) in responses {
 23 |         let engine_config = config.engines.get(engine);
 24 | 
 25 |         for (result_index, mut search_result) in response.search_results.into_iter().enumerate() {
 26 |             // position 1 has a score of 1, position 2 has a score of 0.5, position 3 has a
 27 |             // score of 0.33, etc.
 28 |             let base_result_score = 1. / (result_index + 1) as f64;
 29 |             let result_score = base_result_score * engine_config.weight;
 30 | 
 31 |             // apply url config here
 32 |             search_result.url = apply_url_replacements(&search_result.url, &config.urls);
 33 |             let url_weight = get_url_weight(&search_result.url, &config.urls);
 34 |             if url_weight <= 0. {
 35 |                 continue;
 36 |             }
 37 |             let result_score = result_score * url_weight;
 38 | 
 39 |             if let Some(existing_result) = search_results
 40 |                 .iter_mut()
 41 |                 .find(|r| r.result.url == search_result.url)
 42 |             {
 43 |                 // if the weight of this engine is higher than every other one then replace the
 44 |                 // title and description
 45 |                 if engine_config.weight
 46 |                     > existing_result
 47 |                         .engines
 48 |                         .iter()
 49 |                         .map(|&other_engine| {
 50 |                             let other_engine_config = config.engines.get(other_engine);
 51 |                             other_engine_config.weight
 52 |                         })
 53 |                         .max_by(|a, b| a.partial_cmp(b).unwrap())
 54 |                         .unwrap_or(0.)
 55 |                 {
 56 |                     existing_result.result.title = search_result.title;
 57 |                     existing_result.result.description = search_result.description;
 58 |                 }
 59 | 
 60 |                 existing_result.engines.insert(engine);
 61 |                 existing_result.score += result_score;
 62 |             } else {
 63 |                 search_results.push(SearchResult {
 64 |                     result: search_result,
 65 |                     engines: [engine].iter().copied().collect(),
 66 |                     score: result_score,
 67 |                 });
 68 |             }
 69 |         }
 70 | 
 71 |         if let Some(mut engine_featured_snippet) = response.featured_snippet {
 72 |             // if it has a higher weight than the current featured snippet
 73 |             let featured_snippet_weight = featured_snippet.as_ref().map_or(0., |s| {
 74 |                 let other_engine_config = config.engines.get(s.engine);
 75 |                 other_engine_config.weight
 76 |             });
 77 | 
 78 |             // url config applies to featured snippets too
 79 |             engine_featured_snippet.url =
 80 |                 apply_url_replacements(&engine_featured_snippet.url, &config.urls);
 81 |             let url_weight = get_url_weight(&engine_featured_snippet.url, &config.urls);
 82 |             if url_weight <= 0. {
 83 |                 continue;
 84 |             }
 85 |             let featured_snippet_weight = featured_snippet_weight * url_weight;
 86 | 
 87 |             if engine_config.weight > featured_snippet_weight {
 88 |                 featured_snippet = Some(FeaturedSnippet {
 89 |                     url: engine_featured_snippet.url,
 90 |                     title: engine_featured_snippet.title,
 91 |                     description: engine_featured_snippet.description,
 92 |                     engine,
 93 |                 });
 94 |             }
 95 |         }
 96 | 
 97 |         if let Some(engine_answer_html) = response.answer_html {
 98 |             // if it has a higher weight than the current answer
 99 |             let answer_weight = answer.as_ref().map_or(0., |s| {
100 |                 let other_engine_config = config.engines.get(s.engine);
101 |                 other_engine_config.weight
102 |             });
103 |             if engine_config.weight > answer_weight {
104 |                 answer = Some(Answer {
105 |                     html: engine_answer_html,
106 |                     engine,
107 |                 });
108 |             }
109 |         }
110 | 
111 |         if let Some(engine_infobox_html) = response.infobox_html {
112 |             // if it has a higher weight than the current infobox
113 |             let infobox_weight = infobox.as_ref().map_or(0., |s| {
114 |                 let other_engine_config = config.engines.get(s.engine);
115 |                 other_engine_config.weight
116 |             });
117 |             if engine_config.weight > infobox_weight {
118 |                 infobox = Some(Infobox {
119 |                     html: engine_infobox_html,
120 |                     engine,
121 |                 });
122 |             }
123 |         }
124 |     }
125 | 
126 |     search_results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
127 | 
128 |     Response {
129 |         search_results,
130 |         featured_snippet,
131 |         answer,
132 |         infobox,
133 |         config,
134 |     }
135 | }
136 | 
137 | pub fn merge_autocomplete_responses(
138 |     config: &Config,
139 |     responses: HashMap<Engine, Vec<String>>,
140 | ) -> Vec<String> {
141 |     let mut autocomplete_results: Vec<AutocompleteResult> = Vec::new();
142 | 
143 |     for (engine, response) in responses {
144 |         let engine_config = config.engines.get(engine);
145 | 
146 |         for (result_index, autocomplete_result) in response.into_iter().enumerate() {
147 |             // position 1 has a score of 1, position 2 has a score of 0.5, position 3 has a
148 |             // score of 0.33, etc.
149 |             let base_result_score = 1. / (result_index + 1) as f64;
150 |             let result_score = base_result_score * engine_config.weight;
151 | 
152 |             if let Some(existing_result) = autocomplete_results
153 |                 .iter_mut()
154 |                 .find(|r| r.query == autocomplete_result)
155 |             {
156 |                 existing_result.score += result_score;
157 |             } else {
158 |                 autocomplete_results.push(AutocompleteResult {
159 |                     query: autocomplete_result,
160 |                     score: result_score,
161 |                 });
162 |             }
163 |         }
164 |     }
165 | 
166 |     autocomplete_results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
167 | 
168 |     autocomplete_results.into_iter().map(|r| r.query).collect()
169 | }
170 | 
171 | pub fn merge_images_responses(
172 |     config: Arc<Config>,
173 |     responses: HashMap<Engine, EngineImagesResponse>,
174 | ) -> ImagesResponse {
175 |     let mut image_results: Vec<SearchResult<EngineImageResult>> = Vec::new();
176 | 
177 |     for (engine, response) in responses {
178 |         let engine_config = config.engines.get(engine);
179 | 
180 |         for (result_index, image_result) in response.image_results.into_iter().enumerate() {
181 |             // position 1 has a score of 1, position 2 has a score of 0.5, position 3 has a
182 |             // score of 0.33, etc.
183 |             let base_result_score = 1. / (result_index + 1) as f64;
184 |             let result_score = base_result_score * engine_config.weight;
185 | 
186 |             if let Some(existing_result) = image_results
187 |                 .iter_mut()
188 |                 .find(|r| r.result.image_url == image_result.image_url)
189 |             {
190 |                 // if the weight of this engine is higher than every other one then replace the
191 |                 // title and page url
192 |                 if engine_config.weight
193 |                     > existing_result
194 |                         .engines
195 |                         .iter()
196 |                         .map(|&other_engine| {
197 |                             let other_engine_config = config.engines.get(other_engine);
198 |                             other_engine_config.weight
199 |                         })
200 |                         .max_by(|a, b| a.partial_cmp(b).unwrap())
201 |                         .unwrap_or(0.)
202 |                 {
203 |                     existing_result.result.title = image_result.title;
204 |                     existing_result.result.page_url = image_result.page_url;
205 |                 }
206 | 
207 |                 existing_result.engines.insert(engine);
208 |                 existing_result.score += result_score;
209 |             } else {
210 |                 image_results.push(SearchResult {
211 |                     result: image_result,
212 |                     engines: [engine].iter().copied().collect(),
213 |                     score: result_score,
214 |                 });
215 |             }
216 |         }
217 |     }
218 | 
219 |     image_results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
220 | 
221 |     ImagesResponse {
222 |         image_results,
223 |         config,
224 |     }
225 | }
226 | 


--------------------------------------------------------------------------------
/src/engines/search/google.rs:
--------------------------------------------------------------------------------
  1 | use eyre::eyre;
  2 | use scraper::{ElementRef, Selector};
  3 | use tracing::warn;
  4 | use url::Url;
  5 | 
  6 | use crate::{
  7 |     engines::{
  8 |         EngineImageResult, EngineImagesResponse, EngineResponse, RequestResponse, SearchQuery,
  9 |         CLIENT,
 10 |     },
 11 |     parse::{parse_html_response_with_opts, ParseOpts, QueryMethod},
 12 | };
 13 | 
 14 | pub async fn request(search: &SearchQuery) -> eyre::Result<RequestResponse> {
 15 |     let url = Url::parse_with_params(
 16 |         "https://www.google.com/search",
 17 |         &[
 18 |             ("q", search.query.as_str()),
 19 |             // nfpr makes it not try to autocorrect
 20 |             ("nfpr", "1"),
 21 |             ("filter", "0"),
 22 |             ("start", "0"),
 23 |         ],
 24 |     )
 25 |     .unwrap();
 26 | 
 27 |     Ok(CLIENT.get(url).into())
 28 | }
 29 | 
 30 | pub fn parse_response(body: &str) -> eyre::Result<EngineResponse> {
 31 |     parse_html_response_with_opts(
 32 |         body,
 33 |         ParseOpts::new()
 34 |             // xpd is weird, some results have it but it's usually used for ads?
 35 |             // the :first-child filters out the ads though since for ads the first child is always a
 36 |             // span
 37 |             .result("[jscontroller=SC7lYd]")
 38 |             .title("h3")
 39 |             .href("a[href]")
 40 |             .description(
 41 |                 "div[data-sncf='2'], div[data-sncf='1,2'], div[style='-webkit-line-clamp:2']",
 42 |             )
 43 |             .featured_snippet("block-component")
 44 |             .featured_snippet_description(QueryMethod::Manual(Box::new(|el: &ElementRef| {
 45 |                 let mut description = String::new();
 46 | 
 47 |                 // role="heading"
 48 |                 if let Some(heading_el) = el
 49 |                     .select(&Selector::parse("div[role='heading']").unwrap())
 50 |                     .next()
 51 |                 {
 52 |                     description.push_str(&format!("{}\n\n", heading_el.text().collect::<String>()));
 53 |                 }
 54 | 
 55 |                 if let Some(description_container_el) = el
 56 |                     .select(&Selector::parse("div[data-attrid='wa:/description'] > span:first-child").unwrap())
 57 |                     .next()
 58 |                 {
 59 |                     description.push_str(&iter_featured_snippet_children(&description_container_el));
 60 |                 }
 61 |                 else if let Some(description_list_el) = el
 62 |                     .select(&Selector::parse("ul").unwrap())
 63 |                     .next()
 64 |                 {
 65 |                     // render as bullet points
 66 |                     for li in description_list_el.select(&Selector::parse("li").unwrap()) {
 67 |                         let text = li.text().collect::<String>();
 68 |                         description.push_str(&format!("• {text}\n"));
 69 |                     }
 70 |                 }
 71 | 
 72 |                 Ok(description)
 73 |             })))
 74 |             .featured_snippet_title(".g > div[lang] a h3, div[lang] > div[style='position:relative'] a h3")
 75 |             .featured_snippet_href(QueryMethod::Manual(Box::new(|el: &ElementRef| {
 76 |                 let url = el
 77 |                     .select(&Selector::parse(".g > div[lang] a:has(h3), div[lang] > div[style='position:relative'] a:has(h3)").unwrap())
 78 |                     .next()
 79 |                     .and_then(|n| n.value().attr("href"))
 80 |                     .unwrap_or_default();
 81 |                 clean_url(url)
 82 |             }))),
 83 |     )
 84 | }
 85 | 
 86 | // Google autocomplete responses sometimes include clickable links that include
 87 | // text that we shouldn't show.
 88 | // We can filter for these by removing any elements matching
 89 | // [data-ved]:not([data-send-open-event])
 90 | fn iter_featured_snippet_children(el: &ElementRef) -> String {
 91 |     let mut description = String::new();
 92 |     recursive_iter_featured_snippet_children(&mut description, el);
 93 |     description
 94 | }
 95 | fn recursive_iter_featured_snippet_children(description: &mut String, el: &ElementRef) {
 96 |     for inner_node in el.children() {
 97 |         match inner_node.value() {
 98 |             scraper::Node::Text(t) => {
 99 |                 description.push_str(&t.text);
100 |             }
101 |             scraper::Node::Element(inner_el) => {
102 |                 if inner_el.attr("data-ved").is_none()
103 |                     || inner_el.attr("data-send-open-event").is_some()
104 |                 {
105 |                     recursive_iter_featured_snippet_children(
106 |                         description,
107 |                         &ElementRef::wrap(inner_node).unwrap(),
108 |                     );
109 |                 }
110 |             }
111 |             _ => {}
112 |         }
113 |     }
114 | }
115 | 
116 | pub fn request_autocomplete(query: &str) -> wreq::RequestBuilder {
117 |     CLIENT.get(
118 |         Url::parse_with_params(
119 |             "https://suggestqueries.google.com/complete/search",
120 |             &[
121 |                 ("output", "firefox"),
122 |                 ("client", "firefox"),
123 |                 ("hl", "US-en"),
124 |                 ("q", query),
125 |             ],
126 |         )
127 |         .unwrap(),
128 |     )
129 | }
130 | 
131 | pub fn parse_autocomplete_response(body: &str) -> eyre::Result<Vec<String>> {
132 |     let res = serde_json::from_str::<Vec<serde_json::Value>>(body)?;
133 |     Ok(res
134 |         .into_iter()
135 |         .nth(1)
136 |         .unwrap_or_default()
137 |         .as_array()
138 |         .cloned()
139 |         .unwrap_or_default()
140 |         .into_iter()
141 |         .map(|v| v.as_str().unwrap_or_default().to_string())
142 |         .collect())
143 | }
144 | 
145 | pub fn request_images(query: &str) -> wreq::RequestBuilder {
146 |     // ok so google also has a json api for images BUT it gives us less results
147 |     CLIENT.get(
148 |         Url::parse_with_params(
149 |             "https://www.google.com/search",
150 |             &[("q", query), ("udm", "2"), ("prmd", "ivsnmbtz")],
151 |         )
152 |         .unwrap(),
153 |     )
154 | }
155 | 
156 | pub fn parse_images_response(body: &str) -> eyre::Result<EngineImagesResponse> {
157 |     // we can't just scrape the html because it won't give us the image sources,
158 |     // so... we have to scrape their internal json
159 | 
160 |     // iterate through every script until we find something that matches our regex
161 |     let internal_json_regex =
162 |         regex::Regex::new(r#"(?:\(function\(\)\{google\.jl=\{.+?)var \w=(\{".+?\});"#)?;
163 |     let mut internal_json = None;
164 |     let dom = scraper::Html::parse_document(body);
165 |     for script in dom.select(&Selector::parse("script").unwrap()) {
166 |         let script = script.inner_html();
167 |         if let Some(captures) = internal_json_regex.captures(&script).and_then(|c| c.get(1)) {
168 |             internal_json = Some(captures.as_str().to_string());
169 |             break;
170 |         }
171 |     }
172 | 
173 |     let internal_json =
174 |         internal_json.ok_or_else(|| eyre!("couldn't get internal json for google images"))?;
175 |     let internal_json: serde_json::Map<String, serde_json::Value> =
176 |         serde_json::from_str(&internal_json)?;
177 | 
178 |     let mut image_results = Vec::new();
179 |     for element_json in internal_json.values() {
180 |         // the internal json uses arrays instead of maps, which makes it kinda hard to
181 |         // use and also probably pretty unstable
182 | 
183 |         let Some(element_json) = element_json
184 |             .as_array()
185 |             .and_then(|a| a.get(1))
186 |             .and_then(|v| v.as_array())
187 |         else {
188 |             continue;
189 |         };
190 | 
191 |         let Some((image_url, width, height)) = element_json
192 |             .get(3)
193 |             .and_then(|v| serde_json::from_value(v.clone()).ok())
194 |         else {
195 |             warn!("couldn't get image data from google images json");
196 |             continue;
197 |         };
198 | 
199 |         // this is probably pretty brittle, hopefully google doesn't break it any time
200 |         // soon
201 |         let Some(page) = element_json
202 |             .get(9)
203 |             .and_then(|v| v.as_object())
204 |             .and_then(|o| o.get("2003"))
205 |             .and_then(|v| v.as_array())
206 |         else {
207 |             warn!("couldn't get page data from google images json");
208 |             continue;
209 |         };
210 |         let Some(page_url) = page.get(2).and_then(|v| v.as_str()).map(|s| s.to_string()) else {
211 |             warn!("couldn't get page url from google images json");
212 |             continue;
213 |         };
214 |         let Some(title) = page.get(3).and_then(|v| v.as_str()).map(|s| s.to_string()) else {
215 |             warn!("couldn't get page title from google images json");
216 |             continue;
217 |         };
218 | 
219 |         image_results.push(EngineImageResult {
220 |             image_url,
221 |             page_url,
222 |             title,
223 |             width,
224 |             height,
225 |         });
226 |     }
227 | 
228 |     Ok(EngineImagesResponse { image_results })
229 | }
230 | 
231 | fn clean_url(url: &str) -> eyre::Result<String> {
232 |     if url.starts_with("/url?q=") {
233 |         // get the q param
234 |         let url = Url::parse(format!("https://www.google.com{url}").as_str())?;
235 |         let q = url
236 |             .query_pairs()
237 |             .find(|(key, _)| key == "q")
238 |             .unwrap_or_default()
239 |             .1;
240 |         Ok(q.to_string())
241 |     } else {
242 |         Ok(url.to_string())
243 |     }
244 | }
245 | 


--------------------------------------------------------------------------------
/src/web/search.rs:
--------------------------------------------------------------------------------
  1 | mod all;
  2 | mod images;
  3 | 
  4 | use std::{collections::HashMap, net::SocketAddr, str::FromStr};
  5 | 
  6 | use async_stream::stream;
  7 | use axum::{
  8 |     body::Body,
  9 |     extract::{ConnectInfo, Query},
 10 |     http::{header, HeaderMap, StatusCode},
 11 |     response::IntoResponse,
 12 |     Extension, Json,
 13 | };
 14 | use bytes::Bytes;
 15 | use maud::{html, PreEscaped, DOCTYPE};
 16 | 
 17 | use crate::{
 18 |     config::Config,
 19 |     engines::{
 20 |         self, Engine, EngineProgressUpdate, ProgressUpdateData, ResponseForTab, SearchQuery,
 21 |         SearchTab,
 22 |     },
 23 |     web::head_html,
 24 | };
 25 | 
 26 | fn render_beginning_of_html(search: &SearchQuery) -> String {
 27 |     let form_html = html! {
 28 |         form.search-form action="/search" method="get" {
 29 |             input #search-input  type="text" name="q" placeholder="Search" value=(search.query) autofocus onfocus="this.select()" autocomplete="off";
 30 |             @if search.tab != SearchTab::default() {
 31 |                 input type="hidden" name="tab" value=(search.tab.to_string());
 32 |             }
 33 |             input type="submit" value="Search";
 34 |         }
 35 |         @if search.config.image_search.enabled {
 36 |             div.search-tabs {
 37 |                 @if search.tab == SearchTab::All { span.search-tab.selected { "All" } }
 38 |                 @else { a.search-tab href={ "?q=" (search.query) } { "All" } }
 39 |                 @if search.tab == SearchTab::Images { span.search-tab.selected { "Images" } }
 40 |                 @else { a.search-tab href={ "?q=" (search.query) "&tab=images" } { "Images" } }
 41 |             }
 42 |         }
 43 |     };
 44 | 
 45 |     // we don't close the elements here because we do chunked responses
 46 |     html! {
 47 |         (DOCTYPE)
 48 |         html lang="en";
 49 |         {(head_html(Some(&search.query), &search.config))}
 50 |         body;
 51 |         div.main-container.{"search-" (search.tab.to_string())};
 52 |         main;
 53 |         (form_html)
 54 |         div.progress-updates;
 55 |     }
 56 |     .into_string()
 57 | }
 58 | 
 59 | fn render_end_of_html() -> String {
 60 |     r"</main></div></body></html>".to_string()
 61 | }
 62 | 
 63 | fn render_results_for_tab(response: ResponseForTab) -> PreEscaped<String> {
 64 |     match response {
 65 |         ResponseForTab::All(r) => all::render_results(r),
 66 |         ResponseForTab::Images(r) => images::render_results(r),
 67 |     }
 68 | }
 69 | 
 70 | fn render_engine_progress_update(
 71 |     engine: Engine,
 72 |     progress_update: &EngineProgressUpdate,
 73 |     time_ms: u64,
 74 | ) -> String {
 75 |     let message = match progress_update {
 76 |         EngineProgressUpdate::Requesting => "requesting".to_string(),
 77 |         EngineProgressUpdate::Downloading => "downloading".to_string(),
 78 |         EngineProgressUpdate::Parsing => "parsing".to_string(),
 79 |         EngineProgressUpdate::Done => html! { span.progress-update-done { "done" } }.into_string(),
 80 |         EngineProgressUpdate::Error(msg) => {
 81 |             html! { span.progress-update-error { (msg) } }.into_string()
 82 |         }
 83 |     };
 84 | 
 85 |     html! {
 86 |         span.progress-update-time {
 87 |             (format!("{time_ms:>4}"))
 88 |             "ms"
 89 |         }
 90 |         " "
 91 |         (engine)
 92 |         " "
 93 |         (PreEscaped(message))
 94 |     }
 95 |     .into_string()
 96 | }
 97 | 
 98 | pub fn render_engine_list(engines: &[engines::Engine], config: &Config) -> PreEscaped<String> {
 99 |     let mut html = String::new();
100 |     for (i, engine) in engines.iter().enumerate() {
101 |         if config.ui.show_engine_list_separator && i > 0 {
102 |             html.push_str(" &middot; ");
103 |         }
104 |         let raw_engine_id = &engine.id();
105 |         let engine_id = if config.ui.show_engine_list_separator {
106 |             raw_engine_id.replace('_', " ")
107 |         } else {
108 |             raw_engine_id.to_string()
109 |         };
110 |         html.push_str(&html! { span.engine-list-item { (engine_id) } }.into_string())
111 |     }
112 |     html! {
113 |         div.engine-list {
114 |             (PreEscaped(html))
115 |         }
116 |     }
117 | }
118 | 
119 | pub async fn get(
120 |     Query(params): Query<HashMap<String, String>>,
121 |     Extension(config): Extension<Config>,
122 |     headers: HeaderMap,
123 |     ConnectInfo(addr): ConnectInfo<SocketAddr>,
124 | ) -> axum::response::Response {
125 |     let query = params
126 |         .get("q")
127 |         .cloned()
128 |         .unwrap_or_default()
129 |         .trim()
130 |         .replace('\n', " ");
131 |     if query.is_empty() {
132 |         // redirect to index
133 |         return (
134 |             StatusCode::FOUND,
135 |             [
136 |                 (header::LOCATION, "/"),
137 |                 (header::CONTENT_TYPE, "text/html; charset=utf-8"),
138 |             ],
139 |             Body::from("<a href=\"/\">No query provided, click here to go back to index</a>"),
140 |         )
141 |             .into_response();
142 |     }
143 | 
144 |     let search_tab = params
145 |         .get("tab")
146 |         .and_then(|t| SearchTab::from_str(t).ok())
147 |         .unwrap_or_default();
148 | 
149 |     let query = SearchQuery {
150 |         query,
151 |         tab: search_tab,
152 |         request_headers: headers
153 |             .clone()
154 |             .into_iter()
155 |             .map(|(k, v)| {
156 |                 (
157 |                     k.map(|k| k.to_string()).unwrap_or_default(),
158 |                     v.to_str().unwrap_or_default().to_string(),
159 |                 )
160 |             })
161 |             .collect(),
162 |         ip: headers
163 |             // this could be exploited under some setups, but the ip is only used for the
164 |             // "what is my ip" answer so it doesn't really matter
165 |             .get("x-forwarded-for")
166 |             .map_or_else(
167 |                 || addr.ip().to_string(),
168 |                 |ip| ip.to_str().unwrap_or_default().to_string(),
169 |             ),
170 |         config: config.clone().into(),
171 |     };
172 | 
173 |     let trying_to_use_api =
174 |         query.request_headers.get("accept") == Some(&"application/json".to_string());
175 |     if trying_to_use_api {
176 |         if !config.api {
177 |             return (StatusCode::FORBIDDEN, "API access is disabled").into_response();
178 |         }
179 | 
180 |         let (progress_tx, mut progress_rx) = tokio::sync::mpsc::unbounded_channel();
181 |         let search_future = tokio::spawn(async move { engines::search(&query, progress_tx).await });
182 |         if let Err(e) = search_future.await {
183 |             return (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()).into_response();
184 |         }
185 | 
186 |         let mut results = Vec::new();
187 |         while let Some(progress_update) = progress_rx.recv().await {
188 |             if let ProgressUpdateData::Response(r) = progress_update.data {
189 |                 results.push(r);
190 |             }
191 |         }
192 | 
193 |         return Json(results).into_response();
194 |     }
195 | 
196 |     let s = stream! {
197 |         type R = Result<Bytes, eyre::Error>;
198 | 
199 |         // the html is sent in three chunks (technically more if you count progress updates):
200 |         // 1) the beginning of the html, including the search bar
201 |         // 1.5) the progress updates
202 |         // 2) the results
203 |         // 3) the post-search infobox (usually not sent) + the end of the html
204 | 
205 |         let first_part = render_beginning_of_html(&query);
206 |         // second part is in the loop
207 |         let mut third_part = String::new();
208 | 
209 |         yield R::Ok(Bytes::from(first_part));
210 | 
211 |         let (progress_tx, mut progress_rx) = tokio::sync::mpsc::unbounded_channel();
212 | 
213 |         let search_future = tokio::spawn(async move { engines::search(&query, progress_tx).await });
214 | 
215 |         while let Some(progress_update) = progress_rx.recv().await {
216 |             match progress_update.data {
217 |                 ProgressUpdateData::Engine { engine, update } => {
218 |                     let progress_html = format!(
219 |                         r#"<p class="progress-update">{}</p>"#,
220 |                         render_engine_progress_update(engine, &update, progress_update.time_ms)
221 |                     );
222 |                     yield R::Ok(Bytes::from(progress_html));
223 |                 },
224 |                 ProgressUpdateData::Response(results) => {
225 |                     let mut second_part = String::new();
226 | 
227 |                     second_part.push_str("</div>"); // close progress-updates
228 |                     #[allow(clippy::literal_string_with_formatting_args)]
229 |                     second_part.push_str("<style>.progress-updates{display:none}</style>");
230 |                     second_part.push_str(&render_results_for_tab(results).into_string());
231 |                     yield Ok(Bytes::from(second_part));
232 |                 },
233 |                 ProgressUpdateData::PostSearchInfobox(infobox) => {
234 |                     third_part.push_str(&all::render_infobox(&infobox, &config).into_string());
235 |                 }
236 |             }
237 |         }
238 | 
239 |         if let Err(e) = search_future.await? {
240 |             let error_html = html! {
241 |                 h1 {
242 |                     "Error: "
243 |                     (e)
244 |                 }
245 |             }.into_string();
246 |             yield R::Ok(Bytes::from(error_html));
247 |             return;
248 |         };
249 | 
250 |         third_part.push_str(&render_end_of_html());
251 | 
252 |         yield Ok(Bytes::from(third_part));
253 | 
254 |     };
255 | 
256 |     let stream = Body::from_stream(s);
257 | 
258 |     (
259 |         [
260 |             (header::CONTENT_TYPE, "text/html; charset=utf-8"),
261 |             (header::TRANSFER_ENCODING, "chunked"),
262 |         ],
263 |         stream,
264 |     )
265 |         .into_response()
266 | }
267 | 


--------------------------------------------------------------------------------
/src/web/assets/scripts/colorpicker.js:
--------------------------------------------------------------------------------
  1 | // some guy on stackoverflow wrote a bunch of codegolfed color space conversion functions so i
  2 | // stole them for this (except the cmyk functions, those were stolen from other places)
  3 | 
  4 | // https://stackoverflow.com/a/54116681
  5 | function hsvToHsl(h, s, v) {
  6 |   const l = v - (v * s) / 2;
  7 |   const m = Math.min(l, 1 - l);
  8 |   return [h, m ? (v - l) / m : 0, l];
  9 | }
 10 | function hslToHsv(h, s, l) {
 11 |   let v = s * Math.min(l, 1 - l) + l;
 12 |   return [h, v ? 2 - (2 * l) / v : 0, v];
 13 | }
 14 | 
 15 | // https://stackoverflow.com/a/54024653
 16 | function hsvToRgb(h, s, v) {
 17 |   let f = (n, k = (n + h / 60) % 6) =>
 18 |     v - v * s * Math.max(Math.min(k, 4 - k, 1), 0);
 19 |   return [f(5), f(3), f(1)];
 20 | }
 21 | // https://stackoverflow.com/a/54070620
 22 | function rgbToHsv(r, g, b) {
 23 |   let v = Math.max(r, g, b),
 24 |     c = v - Math.min(r, g, b);
 25 |   let h =
 26 |     c && (v == r ? (g - b) / c : v == g ? 2 + (b - r) / c : 4 + (r - g) / c);
 27 |   return [60 * (h < 0 ? h + 6 : h), v && c / v, v];
 28 | }
 29 | // https://stackoverflow.com/a/54071699
 30 | function rgbToHsl(r, g, b) {
 31 |   let v = Math.max(r, g, b),
 32 |     c = v - Math.min(r, g, b),
 33 |     f = 1 - Math.abs(v + v - c - 1);
 34 |   let h =
 35 |     c && (v == r ? (g - b) / c : v == g ? 2 + (b - r) / c : 4 + (r - g) / c);
 36 |   return [60 * (h < 0 ? h + 6 : h), f ? c / f : 0, (v + v - c) / 2];
 37 | }
 38 | 
 39 | // https://www.codeproject.com/Articles/4488/XCmyk-CMYK-to-RGB-Calculator-with-source-code
 40 | function rgbToCmyk(r, g, b) {
 41 |   const k = 1 - Math.max(r, g, b);
 42 |   if (k === 1) return [0, 0, 0, 1];
 43 |   const c = (1 - r - k) / (1 - k);
 44 |   const m = (1 - g - k) / (1 - k);
 45 |   const y = (1 - b - k) / (1 - k);
 46 |   return [c, m, y, k];
 47 | }
 48 | // https://stackoverflow.com/a/37643472
 49 | function cmykToRgb(c, m, y, k) {
 50 |   const r = (1 - c) * (1 - k);
 51 |   const g = (1 - m) * (1 - k);
 52 |   const b = (1 - y) * (1 - k);
 53 |   return [r, g, b];
 54 | }
 55 | 
 56 | // used for making it so an input isn't modified if we just typed in it
 57 | let activeInput = null;
 58 | document.addEventListener("keydown", () => {
 59 |   activeInput = document.activeElement;
 60 | });
 61 | document.addEventListener("focusout", () => {
 62 |   activeInput = null;
 63 | 
 64 |   // in case they set an input to an invalid value
 65 |   updateColorPreview();
 66 | });
 67 | 
 68 | const colorPickerEl = document.getElementsByClassName("answer-colorpicker")[0];
 69 | 
 70 | const canvasEl = colorPickerEl.getElementsByClassName(
 71 |   "answer-colorpicker-canvas"
 72 | )[0];
 73 | const canvasHueSvgEl = canvasEl.getElementsByClassName(
 74 |   "answer-colorpicker-canvas-hue-svg"
 75 | )[0];
 76 | const pickerEl = colorPickerEl.getElementsByClassName(
 77 |   "answer-colorpicker-picker"
 78 | )[0];
 79 | const previewEl = colorPickerEl.getElementsByClassName(
 80 |   "answer-colorpicker-preview"
 81 | )[0];
 82 | const sliderEl = colorPickerEl.getElementsByClassName(
 83 |   "answer-colorpicker-slider"
 84 | )[0];
 85 | const huepickerEl = colorPickerEl.getElementsByClassName(
 86 |   "answer-colorpicker-huepicker"
 87 | )[0];
 88 | 
 89 | const hexInputEl = document.getElementById("answer-colorpicker-hex-input");
 90 | const rgbInputEl = document.getElementById("answer-colorpicker-rgb-input");
 91 | const cmykInputEl = document.getElementById("answer-colorpicker-cmyk-input");
 92 | const hsvInputEl = document.getElementById("answer-colorpicker-hsv-input");
 93 | const hslInputEl = document.getElementById("answer-colorpicker-hsl-input");
 94 | 
 95 | let hsv = parseHsv(hsvInputEl.value);
 96 | let hsl = parseHsl(hslInputEl.value);
 97 | let rgb = parseRgb(rgbInputEl.value);
 98 | let cmyk = parseCmyk(cmykInputEl.value);
 99 | 
100 | function clamp(n, min, max) {
101 |   return Math.max(min, Math.min(max, n));
102 | }
103 | 
104 | function setHsv(h, s, v) {
105 |   h = clamp(h, 0, 360);
106 |   s = clamp(s, 0, 1);
107 |   v = clamp(v, 0, 1);
108 | 
109 |   hsv = [h, s, v];
110 |   hsl = hsvToHsl(...hsv);
111 |   rgb = hsvToRgb(...hsv);
112 |   cmyk = rgbToCmyk(...rgb);
113 |   updateColorPreview();
114 | }
115 | function setHsl(h, s, l) {
116 |   h = clamp(h, 0, 360);
117 |   s = clamp(s, 0, 1);
118 |   l = clamp(l, 0, 1);
119 | 
120 |   hsl = [h, s, l];
121 |   hsv = hslToHsv(...hsl);
122 |   rgb = hsvToRgb(...hsv);
123 |   cmyk = rgbToCmyk(...rgb);
124 |   updateColorPreview();
125 | }
126 | function setRgb(r, g, b) {
127 |   r = clamp(r, 0, 1);
128 |   g = clamp(g, 0, 1);
129 |   b = clamp(b, 0, 1);
130 | 
131 |   rgb = [r, g, b];
132 |   hsl = rgbToHsl(...rgb);
133 |   hsv = hslToHsv(...hsl);
134 |   cmyk = rgbToCmyk(...rgb);
135 |   updateColorPreview();
136 | }
137 | function setCmyk(c, m, y, k) {
138 |   c = clamp(c, 0, 1);
139 |   m = clamp(m, 0, 1);
140 |   y = clamp(y, 0, 1);
141 |   k = clamp(k, 0, 1);
142 | 
143 |   cmyk = [c, m, y, k];
144 |   rgb = cmykToRgb(...cmyk);
145 |   hsl = rgbToHsl(...rgb);
146 |   hsv = rgbToHsv(...rgb);
147 |   updateColorPreview();
148 | }
149 | 
150 | let mouseInCanvas = false;
151 | function canvasMouseDown(clientX, clientY) {
152 |   activeInput = null;
153 |   updatePicker(clientX, clientY);
154 |   mouseInCanvas = true;
155 | }
156 | function canvasMouseMove(clientX, clientY) {
157 |   activeInput;
158 |   if (mouseInCanvas) updatePicker(clientX, clientY);
159 | }
160 | function canvasMouseUp() {
161 |   mouseInCanvas = false;
162 | }
163 | canvasEl.addEventListener("mousedown", (e) => {
164 |   canvasMouseDown(e.clientX, e.clientY);
165 | });
166 | canvasEl.addEventListener("touchstart", (e) => {
167 |   canvasMouseDown(e.touches[0].clientX, e.touches[0].clientY);
168 | });
169 | document.addEventListener("mouseup", () => {
170 |   canvasMouseUp();
171 | });
172 | document.addEventListener("touchend", () => {
173 |   canvasMouseUp();
174 | });
175 | document.addEventListener("mousemove", (e) => {
176 |   canvasMouseMove(e.clientX, e.clientY);
177 | });
178 | document.addEventListener("touchmove", (e) => {
179 |   canvasMouseMove(e.touches[0].clientX, e.touches[0].clientY);
180 | });
181 | 
182 | let mouseInSlider = false;
183 | function sliderMouseDown(clientX) {
184 |   updateHuePicker(clientX);
185 |   mouseInSlider = true;
186 | }
187 | function sliderMouseMove(clientX) {
188 |   if (mouseInSlider) updateHuePicker(clientX);
189 | }
190 | function sliderMouseUp() {
191 |   mouseInSlider = false;
192 | }
193 | sliderEl.addEventListener("mousedown", (e) => {
194 |   sliderMouseDown(e.clientX);
195 | });
196 | sliderEl.addEventListener("touchstart", (e) => {
197 |   sliderMouseDown(e.touches[0].clientX);
198 | });
199 | huepickerEl.addEventListener("mousedown", (e) => {
200 |   sliderMouseDown(e.clientX);
201 | });
202 | huepickerEl.addEventListener("touchstart", (e) => {
203 |   sliderMouseDown(e.touches[0].clientX);
204 | });
205 | document.addEventListener("mouseup", () => {
206 |   sliderMouseUp();
207 | });
208 | document.addEventListener("touchend", () => {
209 |   sliderMouseUp();
210 | });
211 | document.addEventListener("mousemove", (e) => {
212 |   sliderMouseMove(e.clientX);
213 | });
214 | document.addEventListener("touchmove", (e) => {
215 |   sliderMouseMove(e.touches[0].clientX);
216 | });
217 | 
218 | function updatePicker(clientX, clientY) {
219 |   const rect = canvasEl.getBoundingClientRect();
220 |   let x = clientX - rect.left;
221 |   let y = clientY - rect.top;
222 |   if (x < 0) x = 0;
223 |   if (y < 0) y = 0;
224 |   if (x > rect.width) x = rect.width;
225 |   if (y > rect.height) y = rect.height;
226 | 
227 |   pickerEl.style.left = `${(x / rect.width) * 100}%`;
228 |   pickerEl.style.top = `${(y / rect.height) * 100}%`;
229 | 
230 |   const hue = hsv[0];
231 |   setHsv(hue, x / rect.width, 1 - y / rect.height);
232 | }
233 | 
234 | function updateHuePicker(clientX) {
235 |   const rect = sliderEl.getBoundingClientRect();
236 |   let x = clientX - rect.left;
237 |   if (x < 0) x = 0;
238 |   if (x > rect.width) x = rect.width;
239 | 
240 |   huepickerEl.style.left = `${(x / rect.width) * 100}%`;
241 | 
242 |   const hue = (x / rect.width) * 360;
243 |   setHsv(hue, hsv[1], hsv[2]);
244 | }
245 | 
246 | function updateColorPreview() {
247 |   const [r, g, b] = rgb;
248 |   const [hue, saturation, value] = hsv;
249 | 
250 |   const color = `rgb(${r * 255}, ${g * 255}, ${b * 255})`;
251 |   pickerEl.style.backgroundColor = color;
252 |   previewEl.style.backgroundColor = color;
253 | 
254 |   const hueColor = `hsl(${hue}, 100%, 50%)`;
255 |   huepickerEl.style.backgroundColor = hueColor;
256 |   canvasHueSvgEl.style.setProperty("stop-color", hueColor);
257 | 
258 |   pickerEl.style.left = `${saturation * 100}%`;
259 |   pickerEl.style.top = `${(1 - value) * 100}%`;
260 | 
261 |   if (activeInput !== hexInputEl) {
262 |     hexInputEl.value =
263 |       "#" +
264 |       rgb
265 |         .map((c) =>
266 |           Math.round(c * 255)
267 |             .toString(16)
268 |             .padStart(2, "0")
269 |         )
270 |         .join("");
271 |   }
272 |   if (activeInput !== rgbInputEl) {
273 |     rgbInputEl.value = rgb.map((c) => Math.round(c * 255)).join(", ");
274 |   }
275 |   if (activeInput !== cmykInputEl) {
276 |     const cmykPercent = cmyk.map((c) => Math.round(c * 100));
277 |     cmykInputEl.value = `${cmykPercent[0]}%, ${cmykPercent[1]}%, ${cmykPercent[2]}%, ${cmykPercent[3]}%`;
278 |   }
279 |   if (activeInput !== hsvInputEl) {
280 |     const hAngle = Math.round(hsv[0]);
281 |     hsvInputEl.value = `${hAngle}°, ${Math.round(hsv[1] * 100)}%, ${Math.round(
282 |       hsv[2] * 100
283 |     )}%`;
284 |   }
285 |   if (activeInput !== hslInputEl) {
286 |     hslInputEl.value = `${Math.round(hsl[0])}°, ${Math.round(
287 |       hsl[1] * 100
288 |     )}%, ${Math.round(hsl[2] * 100)}%`;
289 |   }
290 | }
291 | 
292 | function parseHex(value) {
293 |   value = hexInputEl.value.replace("#", "");
294 |   if (value.length === 6) {
295 |     const r = parseInt(value.slice(0, 2), 16) / 255;
296 |     const g = parseInt(value.slice(2, 4), 16) / 255;
297 |     const b = parseInt(value.slice(4, 6), 16) / 255;
298 |     return [r, g, b];
299 |   } else if (value.length === 3) {
300 |     const r = parseInt(value[0] + value[0], 16) / 255;
301 |     const g = parseInt(value[1] + value[1], 16) / 255;
302 |     const b = parseInt(value[2] + value[2], 16) / 255;
303 |     return [r, g, b];
304 |   }
305 | }
306 | function setFromHexInput() {
307 |   setRgb(...parseHex(hexInputEl.value));
308 | }
309 | hexInputEl.addEventListener("input", setFromHexInput);
310 | 
311 | function parseRgb(value) {
312 |   return value.split(",").map((c) => parseInt(c) / 255);
313 | }
314 | function setFromRgbInput() {
315 |   setRgb(...parseRgb(rgbInputEl.value));
316 | }
317 | rgbInputEl.addEventListener("input", setFromRgbInput);
318 | 
319 | function parseCmyk(value) {
320 |   return value.split(",").map((c) => parseInt(c) / 100);
321 | }
322 | function setFromCmykInput() {
323 |   setCmyk(...parseCmyk(cmykInputEl.value));
324 | }
325 | cmykInputEl.addEventListener("input", setFromCmykInput);
326 | 
327 | function parseHsv(value) {
328 |   value = hsvInputEl.value.split(",").map((c) => parseInt(c));
329 |   value[1] /= 100;
330 |   value[2] /= 100;
331 |   return value;
332 | }
333 | function setFromHsvInput() {
334 |   setHsv(...parseHsv(hsvInputEl.value));
335 | }
336 | hsvInputEl.addEventListener("input", setFromHsvInput);
337 | 
338 | function parseHsl(value) {
339 |   value = hslInputEl.value.split(",").map((c) => parseInt(c));
340 |   value[1] /= 100;
341 |   value[2] /= 100;
342 |   return value;
343 | }
344 | function setFromHslInput() {
345 |   setHsl(...parseHsl(hslInputEl.value));
346 | }
347 | hslInputEl.addEventListener("input", setFromHslInput);
348 | 
349 | updateColorPreview();
350 | 


--------------------------------------------------------------------------------
/src/web/assets/style.css:
--------------------------------------------------------------------------------
  1 | :root {
  2 |   /* body background */
  3 |   --bg-1: #0b0e14;
  4 |   /* background of the content */
  5 |   --bg-2: #0d1017;
  6 |   /* input suggestions background */
  7 |   --bg-3: #0f131a;
  8 |   /* mostly borders */
  9 |   --bg-4: #234;
 10 | 
 11 |   /* main text color */
 12 |   --fg-1: #bfbdb6;
 13 |   /* search result description */
 14 |   --fg-2: #bba;
 15 |   --fg-3: #998;
 16 | 
 17 |   /* focus outline */
 18 |   --accent: #e6b450;
 19 | 
 20 |   --link: #29e;
 21 |   --link-visited: #92e;
 22 | 
 23 |   /* green, success */
 24 |   --positive: #7fd962;
 25 |   /* red, error */
 26 |   --negative: #d95757;
 27 | 
 28 |   --syntax-string: #aad94c;
 29 |   --syntax-special: #e6b673;
 30 |   --syntax-constant: #d2a6ff;
 31 |   --syntax-comment: #acb6bf8c;
 32 |   --syntax-func: #ffb454;
 33 | }
 34 | 
 35 | ::selection {
 36 |   background-color: var(--accent);
 37 |   color: var(--bg-3);
 38 | }
 39 | 
 40 | html {
 41 |   height: 100%;
 42 | }
 43 | body {
 44 |   font-family: monospace;
 45 |   background-color: var(--bg-1);
 46 |   color: var(--fg-1);
 47 |   margin: 0;
 48 |   line-height: 1.2;
 49 |   height: 100%;
 50 | }
 51 | 
 52 | .settings-link,
 53 | .settings-link:visited {
 54 |   position: absolute;
 55 |   top: 1em;
 56 |   right: 1em;
 57 |   color: var(--link);
 58 | }
 59 | .version-info {
 60 |   position: absolute;
 61 |   bottom: 1em;
 62 |   right: 1em;
 63 | }
 64 | 
 65 | .main-container {
 66 |   /* enough space for the infobox */
 67 |   max-width: 73.5rem;
 68 |   margin: 0 auto;
 69 |   word-break: break-word;
 70 | }
 71 | main {
 72 |   max-width: 40rem;
 73 |   /* margin: 0 0 0 10rem; */
 74 |   padding: 1rem 0.5rem;
 75 |   background-color: var(--bg-2);
 76 |   min-height: 100%;
 77 | }
 78 | .search-images > main {
 79 |   /* image search uses 100% width */
 80 |   max-width: 100%;
 81 | }
 82 | .main-container.search-images {
 83 |   max-width: none;
 84 | }
 85 | @media screen and (max-width: 74rem) {
 86 |   /* small screens */
 87 |   .main-container {
 88 |     margin: 0 auto;
 89 |     max-width: 40rem;
 90 |   }
 91 | }
 92 | input,
 93 | textarea,
 94 | select {
 95 |   font-family: monospace;
 96 |   background-color: var(--bg-2);
 97 |   color: var(--fg-1);
 98 |   border: 1px solid var(--bg-4);
 99 |   font-size: inherit;
100 |   padding: 0.25rem;
101 | }
102 | input:focus-visible {
103 |   outline: none;
104 |   border-color: var(--accent);
105 | }
106 | :focus-visible {
107 |   outline: 1px solid var(--accent);
108 | }
109 | input[type="submit"] {
110 |   cursor: pointer;
111 | }
112 | a {
113 |   color: var(--link);
114 |   text-decoration: none;
115 | }
116 | a:visited {
117 |   color: var(--link-visited);
118 | }
119 | pre {
120 |   white-space: pre-wrap;
121 | }
122 | blockquote {
123 |   margin: 0;
124 |   padding-left: 0.5em;
125 |   border-left: 0.25em solid var(--bg-4);
126 | }
127 | 
128 | /* index page */
129 | .main-container.index-page {
130 |   display: flex;
131 |   flex-direction: column;
132 |   min-height: 100%;
133 |   height: 100%;
134 |   justify-content: center;
135 |   margin: 0 auto;
136 |   padding: 0 0.5em;
137 |   text-align: center;
138 |   max-width: 30em;
139 | }
140 | h1 {
141 |   margin-top: 0;
142 | }
143 | 
144 | /* settings page */
145 | .settings-page .back-to-index-button {
146 |   bottom: 0.5em;
147 |   position: relative;
148 |   color: var(--link);
149 | }
150 | .settings-form select {
151 |   display: block;
152 | }
153 | #save-settings-button {
154 |   margin-top: 1em;
155 |   display: block;
156 | }
157 | #custom-css {
158 |   tab-size: 2;
159 |   width: calc(100% - 0.5rem);
160 |   height: 20rem;
161 |   border: 0;
162 |   outline: 1px solid var(--bg-4);
163 | }
164 | 
165 | /* header */
166 | .search-form {
167 |   margin-bottom: 1rem;
168 |   display: flex;
169 |   gap: 0.5rem;
170 | }
171 | #search-input {
172 |   flex: 1;
173 | }
174 | .main-container:not(.index-page) #search-input {
175 |   max-width: 30em;
176 | }
177 | #search-input-suggestions {
178 |   position: absolute;
179 |   text-align: left;
180 |   margin-top: calc(1.9em + 1px);
181 |   background: var(--bg-3);
182 |   padding: 0.1em 0 0.3em 0;
183 |   border: 1px solid var(--bg-4);
184 |   border-top: transparent;
185 |   z-index: 10;
186 | }
187 | .search-input-suggestion {
188 |   cursor: pointer;
189 |   padding: 0.3em 0.3em;
190 |   white-space: nowrap;
191 | }
192 | .search-input-suggestion.focused,
193 | .search-input-suggestion:hover {
194 |   background: var(--bg-4);
195 | }
196 | 
197 | /* search tabs (like images, if enabled) */
198 | .search-tabs {
199 |   display: flex;
200 |   gap: 0.5rem;
201 |   margin-bottom: 0.5rem;
202 |   margin-top: -0.5rem;
203 | }
204 | .search-tab {
205 |   border: 1px solid var(--bg-4);
206 |   padding: 0.25rem;
207 | }
208 | a.search-tab {
209 |   color: var(--link);
210 | }
211 | 
212 | /* search result */
213 | .search-result {
214 |   padding-top: 1rem;
215 |   border-top: 1px solid var(--bg-4);
216 |   font-size: 1rem;
217 | }
218 | .search-result-anchor {
219 |   display: block;
220 |   word-break: break-all;
221 | }
222 | .search-result-url {
223 |   margin: 0;
224 |   font-size: 0.8rem;
225 |   color: var(--fg-3);
226 | }
227 | .search-result-title {
228 |   margin: 0;
229 |   font-size: 1rem;
230 | }
231 | .search-result-description {
232 |   margin: 0;
233 |   font-size: 0.8em;
234 |   color: var(--fg-2);
235 | }
236 | 
237 | /* engine list */
238 | .engine-list {
239 |   opacity: 0.5;
240 |   justify-content: end;
241 |   display: flex;
242 |   gap: 0.5em;
243 |   font-size: 0.8rem;
244 | }
245 | 
246 | /* featured snippet */
247 | .featured-snippet {
248 |   margin-bottom: 1rem;
249 |   border: 1px solid var(--bg-4);
250 |   padding: 0.5rem;
251 |   font-size: 1.2rem;
252 | }
253 | .featured-snippet .search-result-description {
254 |   margin-bottom: 1rem;
255 |   white-space: pre-wrap;
256 | }
257 | 
258 | /* progress update */
259 | .progress-updates {
260 |   margin-bottom: 1rem;
261 |   border: 1px solid var(--bg-4);
262 |   padding: 0.5rem;
263 |   min-height: 5em;
264 | }
265 | .progress-update {
266 |   margin: 0;
267 |   white-space: pre-wrap;
268 | }
269 | .progress-update-time {
270 |   opacity: 0.5;
271 | }
272 | .progress-update-done {
273 |   color: var(--positive);
274 |   font-weight: bold;
275 | }
276 | .progress-update-error {
277 |   color: var(--negative);
278 |   font-weight: bold;
279 | }
280 | 
281 | /* answer */
282 | .answer {
283 |   margin-bottom: 1rem;
284 |   border: 1px solid var(--bg-4);
285 |   padding: 0.5rem;
286 |   word-break: break-word;
287 | }
288 | .answer h3 {
289 |   margin: 0;
290 |   font-weight: normal;
291 |   font-size: 1.2rem;
292 | }
293 | 
294 | /* styles that are somewhat answer-specific but get reused across other styles sometimes */
295 | .answer-query {
296 |   margin: 0;
297 |   opacity: 0.5;
298 | }
299 | .answer-comment {
300 |   color: var(--syntax-comment);
301 |   font-weight: normal;
302 | }
303 | 
304 | /* styles for specific answers */
305 | .answer-calc-constant {
306 |   color: var(--syntax-constant);
307 |   white-space: pre-wrap;
308 | }
309 | .answer-calc-string {
310 |   color: var(--syntax-string);
311 | }
312 | .answer-calc-special {
313 |   color: var(--syntax-special);
314 | }
315 | .answer-calc-func {
316 |   color: var(--syntax-func);
317 | }
318 | 
319 | .answer-dictionary-word,
320 | .answer-thesaurus-word {
321 |   margin-top: 0;
322 | }
323 | .answer-dictionary-part-of-speech {
324 |   font-style: italic;
325 |   opacity: 0.8;
326 | }
327 | .answer-dictionary-example {
328 |   margin-bottom: 0.5em;
329 | }
330 | 
331 | .answer-thesaurus-item:not(:last-child) {
332 |   border-bottom: 1px solid var(--bg-4);
333 |   margin-bottom: 1rem;
334 |   padding-bottom: 1rem;
335 | }
336 | .answer-thesaurus-word-description {
337 |   font-style: italic;
338 |   opacity: 0.8;
339 | }
340 | .answer-thesaurus-part-of-speech {
341 |   font-weight: bold;
342 | }
343 | .answer-thesaurus-as-in {
344 |   font-style: italic;
345 | }
346 | h3.answer-thesaurus-category-title {
347 |   margin-top: 0.5rem;
348 | }
349 | .answer-thesaurus-strongest {
350 |   opacity: 1;
351 | }
352 | .answer-thesaurus-strong {
353 |   opacity: 0.8;
354 | }
355 | .answer-thesaurus-weak {
356 |   opacity: 0.6;
357 | }
358 | .answer-thesaurus-list {
359 |   margin: 0;
360 |   padding: 0;
361 |   display: flex;
362 |   flex-wrap: wrap;
363 |   gap: 0.8em;
364 |   list-style-type: none;
365 | }
366 | .answer-thesaurus-list a {
367 |   text-decoration: underline;
368 | }
369 | .answer-notepad {
370 |   width: calc(100% - 4px);
371 |   height: fit-content;
372 |   overflow-y: show;
373 |   background-color: transparent;
374 |   color: white;
375 |   border: none;
376 |   outline: none;
377 |   min-height: 4em;
378 |   font-size: 12px;
379 |   resize: none;
380 | }
381 | 
382 | .answer-colorpicker-preview-container {
383 |   display: flex;
384 |   height: 228px;
385 | }
386 | .answer-colorpicker-preview {
387 |   width: 204px;
388 |   max-width: 33%;
389 | }
390 | .answer-colorpicker-picker-container {
391 |   position: absolute;
392 |   pointer-events: none;
393 |   width: 100%;
394 |   height: 100%;
395 | }
396 | .answer-colorpicker-picker,
397 | .answer-colorpicker-huepicker {
398 |   position: absolute;
399 |   width: 1rem;
400 |   height: 1rem;
401 |   transform: translate(-0.5rem, -0.5rem);
402 |   border-radius: 50%;
403 |   border: 2px solid #fff;
404 | 
405 |   touch-action: none;
406 | }
407 | .answer-colorpicker-canvas-container {
408 |   flex: 1;
409 |   position: relative;
410 | }
411 | .answer-colorpicker-canvas {
412 |   height: 100%;
413 |   width: 100%;
414 | 
415 |   touch-action: none;
416 | }
417 | .answer-colorpicker-slider-container {
418 |   margin: 1rem;
419 |   position: relative;
420 |   height: 1rem;
421 | 
422 |   touch-action: none;
423 | }
424 | .answer-colorpicker-slider {
425 |   height: 100%;
426 |   width: 100%;
427 | }
428 | .answer-colorpicker-huepicker {
429 |   transform: translate(-0.5rem, -50%);
430 |   top: 50%;
431 | }
432 | .answer-colorpicker label {
433 |   display: block;
434 |   width: fit-content;
435 | }
436 | .answer-colorpicker-hex-input-container {
437 |   text-align: center;
438 |   margin-bottom: 0.5rem;
439 | }
440 | .answer-colorpicker-hex-input-container label {
441 |   margin: 0 auto;
442 | }
443 | #answer-colorpicker-hex-input {
444 |   width: 100%;
445 |   text-align: center;
446 | }
447 | .answer-colorpicker-other-inputs {
448 |   display: flex;
449 |   gap: 0.5rem;
450 | }
451 | .answer-colorpicker-input-container {
452 |   display: flex;
453 | }
454 | .answer-colorpicker-other-inputs input {
455 |   width: 100%;
456 | }
457 | 
458 | /* infobox */
459 | .infobox {
460 |   margin-bottom: 1rem;
461 |   border: 1px solid var(--bg-4);
462 |   padding: 0.5rem;
463 |   position: absolute;
464 |   top: 3.5rem;
465 |   max-width: 30rem;
466 |   margin-left: 42rem;
467 | }
468 | @media screen and (max-width: 74rem) {
469 |   /* small screens */
470 |   .infobox {
471 |     position: static;
472 |     margin: 0;
473 |     max-width: unset;
474 |     margin-bottom: 1rem;
475 |   }
476 | 
477 |   .postsearch-infobox {
478 |     /* displaying these properly is too hard so don't */
479 |     display: none;
480 |   }
481 | }
482 | .infobox h2 {
483 |   margin-top: 0;
484 |   margin-bottom: 0.5em;
485 | }
486 | .infobox p {
487 |   margin: 0;
488 | }
489 | .infobox pre:not(.infobox-github-readme) {
490 |   border: 1px solid var(--bg-4);
491 |   padding: 0.5rem;
492 |   display: block;
493 | }
494 | .infobox pre > code {
495 |   font-weight: normal;
496 | }
497 | .infobox code {
498 |   font-weight: bold;
499 | }
500 | .infobox img {
501 |   max-width: 100%;
502 | }
503 | .infobox-docs_rs-version {
504 |   opacity: 0.5;
505 |   font-weight: normal;
506 |   font-size: 0.8em;
507 | }
508 | .infobox-github-readme .markdown-alert {
509 |   padding-left: 0.5em;
510 |   border-left: 0.25em solid var(--bg-4);
511 | }
512 | .infobox-github-readme .markdown-alert-title {
513 |   font-weight: bold;
514 | }
515 | .postsearch-infobox p {
516 |   margin-bottom: 1em;
517 | }
518 | .infobox-minecraft_wiki-article > .notaninfobox {
519 |   display: none !important;
520 | }
521 | .noexcerpt,
522 | .navigation-not-searchable {
523 |   display: none !important;
524 | }
525 | .mcw-mainpage-icon {
526 |   display: inline-block;
527 | }
528 | 
529 | /* image results */
530 | .image-results {
531 |   display: flex;
532 |   flex-wrap: wrap;
533 |   gap: 0.5rem;
534 | }
535 | .image-result {
536 |   min-width: 12rem;
537 |   position: relative;
538 |   flex-grow: 1;
539 |   overflow: hidden;
540 | }
541 | .image-result-img-container {
542 |   margin: 0 auto;
543 |   width: fit-content;
544 | }
545 | .image-result img {
546 |   height: 10.3rem;
547 |   width: auto;
548 |   object-fit: scale-down;
549 |   object-position: left;
550 | }
551 | .image-result-page-anchor {
552 |   display: block;
553 |   height: 2.25em;
554 | }
555 | .image-result-page-url {
556 |   overflow: hidden;
557 |   text-overflow: ellipsis;
558 | 
559 |   font-size: 0.8rem;
560 | 
561 |   white-space: nowrap;
562 |   width: 100%;
563 |   position: absolute;
564 |   display: block;
565 | }
566 | .image-result-title {
567 |   overflow: hidden;
568 |   text-overflow: ellipsis;
569 | 
570 |   font-size: 0.85rem;
571 | 
572 |   white-space: nowrap;
573 |   width: 100%;
574 |   position: absolute;
575 |   display: block;
576 |   margin-top: 1em;
577 | }
578 | 


--------------------------------------------------------------------------------
/src/config.rs:
--------------------------------------------------------------------------------
  1 | use std::{
  2 |     collections::HashMap,
  3 |     fs,
  4 |     net::SocketAddr,
  5 |     path::Path,
  6 |     sync::{Arc, LazyLock},
  7 | };
  8 | 
  9 | use serde::Deserialize;
 10 | use tracing::info;
 11 | 
 12 | use crate::engines::Engine;
 13 | 
 14 | impl Default for Config {
 15 |     fn default() -> Self {
 16 |         Config {
 17 |             bind: "0.0.0.0:28019".parse().unwrap(),
 18 |             api: false,
 19 |             ui: UiConfig {
 20 |                 show_engine_list_separator: false,
 21 |                 show_version_info: false,
 22 |                 site_name: "metasearch".to_string(),
 23 |                 show_settings_link: true,
 24 |                 stylesheet_url: "".to_string(),
 25 |                 stylesheet_str: "".to_string(),
 26 |                 favicon_url: "".to_string(),
 27 |                 show_autocomplete: true,
 28 |             },
 29 |             image_search: ImageSearchConfig {
 30 |                 enabled: false,
 31 |                 show_engines: true,
 32 |                 proxy: ImageProxyConfig {
 33 |                     enabled: true,
 34 |                     max_download_size: 10_000_000,
 35 |                 },
 36 |             },
 37 |             engines: Arc::new(EnginesConfig::default()),
 38 |             urls: UrlsConfig {
 39 |                 replace: vec![(
 40 |                     HostAndPath::new("minecraft.fandom.com/wiki/"),
 41 |                     HostAndPath::new("minecraft.wiki/w/"),
 42 |                 )],
 43 |                 weight: vec![],
 44 |             },
 45 |         }
 46 |     }
 47 | }
 48 | 
 49 | impl Default for EnginesConfig {
 50 |     fn default() -> Self {
 51 |         use toml::value::Value;
 52 | 
 53 |         let mut map = HashMap::new();
 54 |         // engines are enabled by default, so engines that aren't listed here are
 55 |         // enabled
 56 | 
 57 |         // main search engines
 58 |         map.insert(Engine::Google, EngineConfig::new().with_weight(1.05));
 59 |         map.insert(Engine::Bing, EngineConfig::new().with_weight(1.0));
 60 |         map.insert(Engine::Brave, EngineConfig::new().with_weight(1.25));
 61 |         map.insert(
 62 |             Engine::Marginalia,
 63 |             EngineConfig::new().with_weight(0.15).with_extra(
 64 |                 vec![(
 65 |                     "args".to_string(),
 66 |                     Value::Table(
 67 |                         vec![
 68 |                             ("profile".to_string(), Value::String("corpo".to_string())),
 69 |                             ("js".to_string(), Value::String("default".to_string())),
 70 |                             ("adtech".to_string(), Value::String("default".to_string())),
 71 |                         ]
 72 |                         .into_iter()
 73 |                         .collect(),
 74 |                     ),
 75 |                 )]
 76 |                 .into_iter()
 77 |                 .collect(),
 78 |             ),
 79 |         );
 80 | 
 81 |         // additional search engines
 82 |         map.insert(
 83 |             Engine::GoogleScholar,
 84 |             EngineConfig::new().with_weight(0.50).disabled(),
 85 |         );
 86 |         map.insert(
 87 |             Engine::RightDao,
 88 |             EngineConfig::new().with_weight(0.10).disabled(),
 89 |         );
 90 |         map.insert(
 91 |             Engine::Stract,
 92 |             EngineConfig::new().with_weight(0.15).disabled(),
 93 |         );
 94 |         map.insert(
 95 |             Engine::Yep,
 96 |             EngineConfig::new().with_weight(0.10).disabled(),
 97 |         );
 98 | 
 99 |         // calculators (give them a high weight so they're always the first thing in
100 |         // autocomplete)
101 |         map.insert(Engine::Numbat, EngineConfig::new().with_weight(10.0));
102 |         map.insert(
103 |             Engine::Fend,
104 |             EngineConfig::new().with_weight(10.0).disabled(),
105 |         );
106 | 
107 |         // other engines
108 |         map.insert(
109 |             Engine::Mdn,
110 |             EngineConfig::new().with_extra(
111 |                 vec![("max_sections".to_string(), Value::Integer(1))]
112 |                     .into_iter()
113 |                     .collect(),
114 |             ),
115 |         );
116 | 
117 |         Self { map }
118 |     }
119 | }
120 | 
121 | impl Default for EngineConfig {
122 |     fn default() -> Self {
123 |         Self {
124 |             enabled: true,
125 |             weight: 1.0,
126 |             extra: Default::default(),
127 |         }
128 |     }
129 | }
130 | static DEFAULT_ENGINE_CONFIG_REF: LazyLock<EngineConfig> = LazyLock::new(EngineConfig::default);
131 | impl EngineConfig {
132 |     pub fn new() -> Self {
133 |         Self::default()
134 |     }
135 |     pub fn with_weight(self, weight: f64) -> Self {
136 |         Self { weight, ..self }
137 |     }
138 |     pub fn disabled(self) -> Self {
139 |         Self {
140 |             enabled: false,
141 |             ..self
142 |         }
143 |     }
144 |     pub fn with_extra(self, extra: toml::Table) -> Self {
145 |         Self { extra, ..self }
146 |     }
147 | }
148 | 
149 | //
150 | 
151 | #[derive(Debug, Clone)]
152 | pub struct Config {
153 |     pub bind: SocketAddr,
154 |     /// Whether the JSON API should be accessible.
155 |     pub api: bool,
156 |     pub ui: UiConfig,
157 |     pub image_search: ImageSearchConfig,
158 |     // wrapped in an arc to make Config cheaper to clone
159 |     pub engines: Arc<EnginesConfig>,
160 |     pub urls: UrlsConfig,
161 | }
162 | 
163 | #[derive(Deserialize, Debug)]
164 | pub struct PartialConfig {
165 |     pub bind: Option<SocketAddr>,
166 |     pub api: Option<bool>,
167 |     pub ui: Option<PartialUiConfig>,
168 |     pub image_search: Option<PartialImageSearchConfig>,
169 |     pub engines: Option<PartialEnginesConfig>,
170 |     pub urls: Option<PartialUrlsConfig>,
171 | }
172 | 
173 | impl Config {
174 |     pub fn overlay(&mut self, partial: PartialConfig) {
175 |         self.bind = partial.bind.unwrap_or(self.bind);
176 |         self.api = partial.api.unwrap_or(self.api);
177 |         self.ui.overlay(partial.ui.unwrap_or_default());
178 |         self.image_search
179 |             .overlay(partial.image_search.unwrap_or_default());
180 |         if let Some(partial_engines) = partial.engines {
181 |             let mut engines = self.engines.as_ref().clone();
182 |             engines.overlay(partial_engines);
183 |             self.engines = Arc::new(engines);
184 |         }
185 |         self.urls.overlay(partial.urls.unwrap_or_default());
186 |     }
187 | }
188 | 
189 | #[derive(Debug, Clone)]
190 | pub struct UiConfig {
191 |     pub show_engine_list_separator: bool,
192 |     pub show_version_info: bool,
193 |     /// Settings are always accessible anyways, this just controls whether the
194 |     /// link to them in the index page is visible.
195 |     pub show_settings_link: bool,
196 |     pub site_name: String,
197 |     pub show_autocomplete: bool,
198 |     pub stylesheet_url: String,
199 |     pub stylesheet_str: String,
200 |     pub favicon_url: String,
201 | }
202 | 
203 | #[derive(Deserialize, Debug, Default)]
204 | pub struct PartialUiConfig {
205 |     pub show_engine_list_separator: Option<bool>,
206 |     pub show_version_info: Option<bool>,
207 |     pub show_settings_link: Option<bool>,
208 |     pub show_autocomplete: Option<bool>,
209 | 
210 |     pub site_name: Option<String>,
211 |     pub stylesheet_url: Option<String>,
212 |     pub stylesheet_str: Option<String>,
213 |     pub favicon_url: Option<String>,
214 | }
215 | 
216 | impl UiConfig {
217 |     pub fn overlay(&mut self, partial: PartialUiConfig) {
218 |         self.show_engine_list_separator = partial
219 |             .show_engine_list_separator
220 |             .unwrap_or(self.show_engine_list_separator);
221 |         self.show_version_info = partial.show_version_info.unwrap_or(self.show_version_info);
222 |         self.show_settings_link = partial
223 |             .show_settings_link
224 |             .unwrap_or(self.show_settings_link);
225 |         self.show_autocomplete = partial.show_autocomplete.unwrap_or(self.show_autocomplete);
226 |         self.site_name = partial.site_name.unwrap_or(self.site_name.clone());
227 |         self.stylesheet_url = partial
228 |             .stylesheet_url
229 |             .unwrap_or(self.stylesheet_url.clone());
230 |         self.stylesheet_str = partial
231 |             .stylesheet_str
232 |             .unwrap_or(self.stylesheet_str.clone());
233 |         self.favicon_url = partial.favicon_url.unwrap_or(self.favicon_url.clone());
234 |     }
235 | }
236 | 
237 | #[derive(Debug, Clone)]
238 | pub struct ImageSearchConfig {
239 |     pub enabled: bool,
240 |     pub show_engines: bool,
241 |     pub proxy: ImageProxyConfig,
242 | }
243 | 
244 | #[derive(Deserialize, Debug, Default)]
245 | pub struct PartialImageSearchConfig {
246 |     pub enabled: Option<bool>,
247 |     pub show_engines: Option<bool>,
248 |     pub proxy: Option<PartialImageProxyConfig>,
249 | }
250 | 
251 | impl ImageSearchConfig {
252 |     pub fn overlay(&mut self, partial: PartialImageSearchConfig) {
253 |         self.enabled = partial.enabled.unwrap_or(self.enabled);
254 |         self.show_engines = partial.show_engines.unwrap_or(self.show_engines);
255 |         self.proxy.overlay(partial.proxy.unwrap_or_default());
256 |     }
257 | }
258 | 
259 | #[derive(Debug, Clone)]
260 | pub struct ImageProxyConfig {
261 |     /// Whether we should proxy remote images through our server. This is mostly
262 |     /// a privacy feature.
263 |     pub enabled: bool,
264 |     /// The maximum size of an image that can be proxied. This is in bytes.
265 |     pub max_download_size: u64,
266 | }
267 | 
268 | #[derive(Deserialize, Debug, Default)]
269 | pub struct PartialImageProxyConfig {
270 |     pub enabled: Option<bool>,
271 |     pub max_download_size: Option<u64>,
272 | }
273 | 
274 | impl ImageProxyConfig {
275 |     pub fn overlay(&mut self, partial: PartialImageProxyConfig) {
276 |         self.enabled = partial.enabled.unwrap_or(self.enabled);
277 |         self.max_download_size = partial.max_download_size.unwrap_or(self.max_download_size);
278 |     }
279 | }
280 | 
281 | #[derive(Debug, Clone)]
282 | pub struct EnginesConfig {
283 |     pub map: HashMap<Engine, EngineConfig>,
284 | }
285 | 
286 | #[derive(Deserialize, Debug, Default)]
287 | pub struct PartialEnginesConfig {
288 |     #[serde(flatten)]
289 |     pub map: HashMap<Engine, PartialDefaultableEngineConfig>,
290 | }
291 | 
292 | #[derive(Deserialize, Clone, Debug)]
293 | #[serde(untagged)]
294 | pub enum PartialDefaultableEngineConfig {
295 |     Boolean(bool),
296 |     Full(PartialEngineConfig),
297 | }
298 | 
299 | impl EnginesConfig {
300 |     pub fn overlay(&mut self, partial: PartialEnginesConfig) {
301 |         for (key, value) in partial.map {
302 |             let full = match value {
303 |                 PartialDefaultableEngineConfig::Boolean(enabled) => PartialEngineConfig {
304 |                     enabled: Some(enabled),
305 |                     ..Default::default()
306 |                 },
307 |                 PartialDefaultableEngineConfig::Full(full) => full,
308 |             };
309 |             if let Some(existing) = self.map.get_mut(&key) {
310 |                 existing.overlay(full);
311 |             } else {
312 |                 let mut new = EngineConfig::default();
313 |                 new.overlay(full);
314 |                 self.map.insert(key, new);
315 |             }
316 |         }
317 |     }
318 | 
319 |     pub fn get(&self, engine: Engine) -> &EngineConfig {
320 |         self.map.get(&engine).unwrap_or(&DEFAULT_ENGINE_CONFIG_REF)
321 |     }
322 | }
323 | 
324 | #[derive(Debug, Clone)]
325 | pub struct EngineConfig {
326 |     pub enabled: bool,
327 |     /// The priority of this engine relative to the other engines.
328 |     pub weight: f64,
329 |     /// Per-engine configs. These are parsed at request time.
330 |     pub extra: toml::Table,
331 | }
332 | 
333 | #[derive(Deserialize, Clone, Debug, Default)]
334 | pub struct PartialEngineConfig {
335 |     pub enabled: Option<bool>,
336 |     pub weight: Option<f64>,
337 |     #[serde(flatten)]
338 |     pub extra: toml::Table,
339 | }
340 | 
341 | impl EngineConfig {
342 |     pub fn overlay(&mut self, partial: PartialEngineConfig) {
343 |         self.enabled = partial.enabled.unwrap_or(self.enabled);
344 |         self.weight = partial.weight.unwrap_or(self.weight);
345 |         self.extra.extend(partial.extra);
346 |     }
347 | }
348 | 
349 | impl Config {
350 |     pub fn read_or_create(config_path: &Path) -> eyre::Result<Self> {
351 |         let mut config = Config::default();
352 | 
353 |         if !config_path.exists() {
354 |             info!("No config found, creating one at {config_path:?}");
355 |             let default_config_str = include_str!("../config-default.toml");
356 |             if let Some(parent_path) = config_path.parent() {
357 |                 let _ = fs::create_dir_all(parent_path);
358 |             }
359 |             fs::write(config_path, default_config_str)?;
360 |         }
361 | 
362 |         let given_config = toml::from_str::<PartialConfig>(&fs::read_to_string(config_path)?)?;
363 |         config.overlay(given_config);
364 |         Ok(config)
365 |     }
366 | }
367 | 
368 | #[derive(Debug, Clone, PartialEq)]
369 | pub struct HostAndPath {
370 |     pub host: String,
371 |     pub path: String,
372 | }
373 | impl HostAndPath {
374 |     pub fn new(s: &str) -> Self {
375 |         let (host, path) = s.split_once('/').unwrap_or((s, ""));
376 |         Self {
377 |             host: host.to_owned(),
378 |             path: path.to_owned(),
379 |         }
380 |     }
381 | }
382 | 
383 | #[derive(Debug, Clone)]
384 | pub struct UrlsConfig {
385 |     pub replace: Vec<(HostAndPath, HostAndPath)>,
386 |     pub weight: Vec<(HostAndPath, f64)>,
387 | }
388 | #[derive(Deserialize, Debug, Default)]
389 | pub struct PartialUrlsConfig {
390 |     #[serde(default)]
391 |     pub replace: HashMap<String, String>,
392 |     #[serde(default)]
393 |     pub weight: HashMap<String, f64>,
394 | }
395 | impl UrlsConfig {
396 |     pub fn overlay(&mut self, partial: PartialUrlsConfig) {
397 |         for (from, to) in partial.replace {
398 |             let from = HostAndPath::new(&from);
399 |             if to.is_empty() {
400 |                 // setting the value to an empty string removes it
401 |                 let index = self.replace.iter().position(|(u, _)| u == &from);
402 |                 // swap_remove is fine because the order of this vec doesn't matter
403 |                 self.replace.swap_remove(index.unwrap());
404 |             } else {
405 |                 let to = HostAndPath::new(&to);
406 |                 self.replace.push((from, to));
407 |             }
408 |         }
409 | 
410 |         for (url, weight) in partial.weight {
411 |             let url = HostAndPath::new(&url);
412 |             self.weight.push((url, weight));
413 |         }
414 | 
415 |         // sort by length so that more specific checks are done first
416 |         self.weight.sort_by(|(a, _), (b, _)| {
417 |             let a_len = a.path.len() + a.host.len();
418 |             let b_len = b.path.len() + b.host.len();
419 |             b_len.cmp(&a_len)
420 |         });
421 |     }
422 | }
423 | 


--------------------------------------------------------------------------------